Class: MediaartsScraper::Page::PageBase
Constant Summary
CommonTableParser::KEY_SEPARATOR
Class Method Summary
collapse
Instance Method Summary
collapse
#parse_common_key_value_table, #parse_common_serial_rows_table
Methods included from Helper
#resolve_relative_url
Constructor Details
#initialize(url:) ⇒ PageBase
Returns a new instance of PageBase.
7
8
9
|
# File 'lib/mediaarts_scraper/page/page_base.rb', line 7
def initialize(url:)
@original_url = url
end
|
Class Method Details
.data_class(data_class) ⇒ Object
15
16
17
18
19
|
# File 'lib/mediaarts_scraper/page/page_base.rb', line 15
def self.data_class(data_class)
define_method("data_class") do
data_class
end
end
|
.from_url(url) ⇒ Object
11
12
13
|
# File 'lib/mediaarts_scraper/page/page_base.rb', line 11
def self.from_url(url)
self.new(url: url)
end
|
Instance Method Details
#data ⇒ Object
21
22
23
|
# File 'lib/mediaarts_scraper/page/page_base.rb', line 21
def data
@data ||= data_class.new(raw_data)
end
|
#doc ⇒ Object
37
38
39
|
# File 'lib/mediaarts_scraper/page/page_base.rb', line 37
def doc
@doc ||= Nokogiri::HTML.parse(html, nil, "utf8")
end
|
#fetch(url) ⇒ Object
41
42
43
44
45
|
# File 'lib/mediaarts_scraper/page/page_base.rb', line 41
def fetch(url)
MediaartsScraper.logger.debug { {class: self.class.name, method: "fetch", url: url} }
open(url) { |f| f.read }
end
|
#html ⇒ Object
25
26
27
|
# File 'lib/mediaarts_scraper/page/page_base.rb', line 25
def html
@html ||= fetch(url)
end
|
#raw_data ⇒ Object
33
34
35
|
# File 'lib/mediaarts_scraper/page/page_base.rb', line 33
def raw_data
@raw_data ||= parse(html)
end
|
#url ⇒ Object
29
30
31
|
# File 'lib/mediaarts_scraper/page/page_base.rb', line 29
def url
@original_url
end
|