Class: TaiwaneseNewsParser::Parser::Udn
Instance Attribute Summary
#article, #url
Class Method Summary
collapse
Instance Method Summary
collapse
applicable?, applicable_parser, #clean_up, #doc, #initialize, #reproduced?, subclasses
Class Method Details
.domain ⇒ Object
2
3
4
|
# File 'lib/taiwanese_news_parser/parser/udn.rb', line 2
def self.domain
'udn.com'
end
|
.names ⇒ Object
6
7
8
|
# File 'lib/taiwanese_news_parser/parser/udn.rb', line 6
def self.names
%{聯合報 聯合晚報}
end
|
.parse_url_id(url) ⇒ Object
37
38
39
|
# File 'lib/taiwanese_news_parser/parser/udn.rb', line 37
def self.parse_url_id(url)
url[%r{\w+/\w+/(\d+)},1]
end
|
Instance Method Details
#parse ⇒ Object
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
|
# File 'lib/taiwanese_news_parser/parser/udn.rb', line 11
def parse
@article[:title] = doc.at_css('#story_title').text
@article[:content] = doc.at_css('#story').text
@article[:company_name] = parse_company_name
@article[:reporter_name] = parse_reporter_name
@article[:published_at] = Time.parse(doc.at_css('#story_update').text)
clean_up
@article
end
|
#parse_company_name ⇒ Object
27
28
29
|
# File 'lib/taiwanese_news_parser/parser/udn.rb', line 27
def parse_company_name
get_company_name_and_reporter_name.match(%r{^(.*?)[//╱]})[1]
end
|
#parse_reporter_name ⇒ Object
30
31
32
33
34
35
|
# File 'lib/taiwanese_news_parser/parser/udn.rb', line 30
def parse_reporter_name
source = get_company_name_and_reporter_name
name = source[%r{[//╱](?:本報)?記者(.*)[//╱]},1]
name ||= source[%r{本報記者(.*)[//╱]?},1]
name
end
|