Class: Parser::News
Instance Method Summary
collapse
Methods inherited from XmlBase
#copyright, #fake_url, #tags
Constructor Details
#initialize(source, options = {}) ⇒ News
Returns a new instance of News.
3
4
5
|
# File 'lib/fly_parser/sources/news.rb', line 3
def initialize(source, options = {})
super
end
|
Instance Method Details
#parse_all ⇒ Object
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
|
# File 'lib/fly_parser/sources/news.rb', line 7
def parse_all
items = @source.search('//item')
last_date = Time.now - 2.years items = items.select {|item| item.xpath('pubDate').first.content() > last_date }
items.map do |item|
title = item.xpath('title/text()').text()
date = item.xpath('pubDate').first.content()
link = item.xpath('link/text()').text()
begin
page = Nokogiri::HTML(open(link))
rescue Exception => e
puts e.message
next
end
next if page.search('.article_illustration img').first.nil?
poster_image = page.search('.article_illustration img').first.attributes['src'].value
short_desc = page.search('.article_lead').first().content()
full_desc = page.search('.article_full_text')
full_desc.search('.article_illustration').remove()
full_desc.search('.inject-data').remove()
full_desc.search('.inject_poll').remove()
full_desc.search('a').remove()
tags = @enable_tags ? parse_tags(page) : nil
copyright = "<p>Источник: <a href='#{@copyright[:url]}'>#{@copyright[:title]}</a></p>"
content = "<p>#{short_desc}</p>" + full_desc.inner_html + copyright
{title: title, content: content, poster_image: poster_image, tags: tags }
end.compact
end
|
40
41
42
43
44
|
# File 'lib/fly_parser/sources/news.rb', line 40
def parse_tags(page)
tags_wrapper = page.search('.article_tags_overview_list')
tags = tags_wrapper.search('li a')
tags.map(&:text)
end
|