Class: Parser::NewsNl
- Defined in:
- lib/fly_parser/sources/news-nl.rb
Instance Method Summary collapse
-
#initialize(source, options = {}) ⇒ NewsNl
constructor
A new instance of NewsNl.
- #parse_all ⇒ Object
Methods inherited from XmlBase
Constructor Details
#initialize(source, options = {}) ⇒ NewsNl
Returns a new instance of NewsNl.
3 4 5 |
# File 'lib/fly_parser/sources/news-nl.rb', line 3 def initialize(source, = {}) super end |
Instance Method Details
#parse_all ⇒ Object
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 |
# File 'lib/fly_parser/sources/news-nl.rb', line 7 def parse_all items = @source.search('//item') # last_date = Time.now - 2.years # for dev 2 years # select! or reject! is not exists for Nokogiri#NodeSet # items = items.select {|item| item.xpath('pubDate').first.content() > last_date } items.map do |item| title = item.xpath('title/text()').text() date = item.xpath('pubDate').first.content() link = item.xpath('link/text()').text() begin page = Nokogiri::HTML(open(link)) rescue Exception => e puts e. next end next if page.search('.article_block img.media-full').first.nil? poster_image = page.search('.article_block img.media-full').first.attributes['src'].value full_desc = item.xpath('description/text()') # remove href attributes full_desc = full_desc.text().gsub(/<a href="([a-zA-Z:\/\.\d\-]*)">(.*)<\/a>/,'<a>\2</a>') full_desc.gsub!(/<iframe.*><\/iframe>/, '') copyright = "<p>Source: <a href='#{@copyright[:url]}'>#{@copyright[:title]}</a></p>" content = full_desc + copyright {title: title, content: content, poster_image: poster_image} end.compact end |