Class: MediaartsScraper::Page::PageBase

Inherits:
Object
  • Object
show all
Includes:
CommonTableParser, Helper
Defined in:
lib/mediaarts_scraper/page/page_base.rb

Constant Summary

Constants included from CommonTableParser

CommonTableParser::KEY_SEPARATOR

Class Method Summary collapse

Instance Method Summary collapse

Methods included from CommonTableParser

#parse_common_key_value_table, #parse_common_serial_rows_table

Methods included from Helper

#resolve_relative_url

Constructor Details

#initialize(url:) ⇒ PageBase

Returns a new instance of PageBase.



7
8
9
# File 'lib/mediaarts_scraper/page/page_base.rb', line 7

def initialize(url:)
  @original_url = url
end

Class Method Details

.data_class(data_class) ⇒ Object



15
16
17
18
19
# File 'lib/mediaarts_scraper/page/page_base.rb', line 15

def self.data_class(data_class)
  define_method("data_class") do
    data_class
  end
end

.from_url(url) ⇒ Object



11
12
13
# File 'lib/mediaarts_scraper/page/page_base.rb', line 11

def self.from_url(url)
  self.new(url: url)
end

Instance Method Details

#dataObject



21
22
23
# File 'lib/mediaarts_scraper/page/page_base.rb', line 21

def data
  @data ||= data_class.new(raw_data)
end

#docObject



37
38
39
# File 'lib/mediaarts_scraper/page/page_base.rb', line 37

def doc
  @doc ||= Nokogiri::HTML.parse(html, nil, "utf8")
end

#fetch(url) ⇒ Object



41
42
43
44
45
# File 'lib/mediaarts_scraper/page/page_base.rb', line 41

def fetch(url)
  MediaartsScraper.logger.debug { {class: self.class.name, method: "fetch", url: url} }

  open(url) { |f| f.read }
end

#htmlObject



25
26
27
# File 'lib/mediaarts_scraper/page/page_base.rb', line 25

def html
  @html ||= fetch(url)
end

#raw_dataObject



33
34
35
# File 'lib/mediaarts_scraper/page/page_base.rb', line 33

def raw_data
  @raw_data ||= parse(html)
end

#urlObject



29
30
31
# File 'lib/mediaarts_scraper/page/page_base.rb', line 29

def url
  @original_url
end