Class: Parser::NewsAZ
- Defined in:
- lib/fly_parser/sources/news-az.rb
Instance Method Summary collapse
-
#initialize(source, options = {}) ⇒ NewsAZ
constructor
A new instance of NewsAZ.
- #parse_all ⇒ Object
Methods inherited from XmlBase
Constructor Details
#initialize(source, options = {}) ⇒ NewsAZ
Returns a new instance of NewsAZ.
3 4 5 |
# File 'lib/fly_parser/sources/news-az.rb', line 3 def initialize(source, = {}) super end |
Instance Method Details
#parse_all ⇒ Object
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
# File 'lib/fly_parser/sources/news-az.rb', line 7 def parse_all items = @source.search('//item') # last_date = Time.now - 2.years # for dev 2 years # select! or reject! is not exists for Nokogiri#NodeSet # items = items.select {|item| item.xpath('pubDate').first.content() > last_date } items.map do |item| title = item.xpath('title/text()').text() date = item.xpath('//pubdate').first.content link = item.xpath('link/following-sibling::text()[1]').first begin page = Nokogiri::HTML(open(link)) rescue Exception => e puts e. next end gallery_image = page.search('.gallery-photo img').first single_image = page.search('.content-block .visual img').first poster_image = single_image || gallery_image next if poster_image.nil? poster_image = poster_image.attributes['src'].value full_desc = page.search('.content-block .text-block .text') full_desc.search('a').remove() full_desc.search("//text()[contains(., 'Автор')]").remove() full_desc.search("//text()[contains(., 'Связаться')]").remove() # full_desc.search('//p[1]').remove() copyright = "<p>Mənbə: <a href='#{@copyright[:url]}'>#{@copyright[:title]}</a></p>" full_desc = full_desc.inner_html full_desc.gsub!(/<!--noindex-->.*/,"") full_desc.gsub!(/<!--\/noindex-->.*/,"") full_desc.gsub!(/\r\n/,"") full_desc.gsub!(/Trend.*/, "") full_desc.strip! content = full_desc + copyright {title: title, content: content, poster_image: poster_image} end.compact end |