Class: TaiwaneseNewsParser::Parser::Udn

Inherits:
TaiwaneseNewsParser::Parser show all
Defined in:
lib/taiwanese_news_parser/parser/udn.rb

Instance Attribute Summary

Attributes inherited from TaiwaneseNewsParser::Parser

#article, #url

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from TaiwaneseNewsParser::Parser

applicable?, applicable_parser, #clean_up, #doc, #initialize, #reproduced?, subclasses

Constructor Details

This class inherits a constructor from TaiwaneseNewsParser::Parser

Class Method Details

.domainObject



2
3
4
# File 'lib/taiwanese_news_parser/parser/udn.rb', line 2

def self.domain
  'udn.com'
end

.namesObject



6
7
8
# File 'lib/taiwanese_news_parser/parser/udn.rb', line 6

def self.names
  %{聯合報 聯合晚報}
end

.parse_url_id(url) ⇒ Object



37
38
39
# File 'lib/taiwanese_news_parser/parser/udn.rb', line 37

def self.parse_url_id(url)
  url[%r{\w+/\w+/(\d+)},1]
end

Instance Method Details

#parseObject



11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
# File 'lib/taiwanese_news_parser/parser/udn.rb', line 11

def parse
  @article[:title] = doc.at_css('#story_title').text
  @article[:content] = doc.at_css('#story').text

  #a.web_published_at = Time.parse(doc.at_css('#story_update').text)

  @article[:company_name] = parse_company_name
  @article[:reporter_name] = parse_reporter_name

  @article[:published_at] = Time.parse(doc.at_css('#story_update').text)

  clean_up

  @article
end

#parse_company_nameObject



27
28
29
# File 'lib/taiwanese_news_parser/parser/udn.rb', line 27

def parse_company_name
  get_company_name_and_reporter_name.match(%r{^(.*?)[//╱]})[1]
end

#parse_reporter_nameObject



30
31
32
33
34
35
# File 'lib/taiwanese_news_parser/parser/udn.rb', line 30

def parse_reporter_name
  source = get_company_name_and_reporter_name
  name = source[%r{[//╱](?:本報)?記者(.*)[//╱]},1]
  name ||= source[%r{本報記者(.*)[//╱]?},1]
  name
end