Class: Statement::Feed
- Inherits:
-
Object
- Object
- Statement::Feed
- Defined in:
- lib/statement/feed.rb
Class Method Summary collapse
- .batch(urls) ⇒ Object
- .date_from_rss_item(link) ⇒ Object
- .from_rss(url) ⇒ Object
- .open_rss(url) ⇒ Object
- .parse_atom(doc, url) ⇒ Object
- .parse_rss(doc, url) ⇒ Object
Class Method Details
.batch(urls) ⇒ Object
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 |
# File 'lib/statement/feed.rb', line 11 def self.batch(urls) results = [] failures = [] hydra = Typhoeus::Hydra.new urls.each do |url| req = Typhoeus::Request.new(url) req.on_complete do |response| if response.success? doc = Nokogiri::XML(response.body) results << parse_atom(doc, url) if url == "http://larson.house.gov/index.php?option=com_ninjarsssyndicator&feed_id=1&format=raw" results << parse_rss(doc, url) else failures << url end end hydra.queue(req) end hydra.run [results.flatten, failures] end |
.date_from_rss_item(link) ⇒ Object
40 41 42 43 44 45 46 47 48 49 50 |
# File 'lib/statement/feed.rb', line 40 def self.date_from_rss_item(link) if !link.xpath('pubDate').text.empty? Date.parse(link.xpath('pubDate').text) elsif !link.xpath('pubdate').empty? Date.parse(link.xpath('pubdate').text) elsif link.xpath('link').text.include?("mikulski.senate.gov") and link.xpath('link').text.include?("-2014") Date.parse(link.xpath('link').text.split('/').last.split('-', -1).first(3).join('/').split('.cfm').first) else nil end end |
.from_rss(url) ⇒ Object
52 53 54 55 56 57 58 59 60 |
# File 'lib/statement/feed.rb', line 52 def self.from_rss(url) doc = open_rss(url) return unless doc if url == "http://larson.house.gov/index.php?option=com_ninjarsssyndicator&feed_id=1&format=raw" parse_atom(doc, url) else parse_rss(doc, url) end end |
.open_rss(url) ⇒ Object
32 33 34 35 36 37 38 |
# File 'lib/statement/feed.rb', line 32 def self.open_rss(url) begin Nokogiri::XML(open(url)) rescue nil end end |
.parse_atom(doc, url) ⇒ Object
74 75 76 77 78 79 80 |
# File 'lib/statement/feed.rb', line 74 def self.parse_atom(doc, url) links = (doc/:entry) return if links.empty? results = links.map do |link| { :source => url, :url => link.children[3]['href'], :title => link.children[1].text, :date => Date.parse(link.children[5].text), :domain => URI.parse(url).host } end end |
.parse_rss(doc, url) ⇒ Object
62 63 64 65 66 67 68 69 70 71 72 |
# File 'lib/statement/feed.rb', line 62 def self.parse_rss(doc, url) links = doc.xpath('//item') return if links.empty? results = links.map do |link| abs_link = Utils.absolute_link(url, link.xpath('link').text) abs_link = "http://www.burr.senate.gov/public/"+ link.xpath('link').text if url == 'http://www.burr.senate.gov/public/index.cfm?FuseAction=RSS.Feed' abs_link = link.xpath('link').text[37..-1] if url == "http://www.johanns.senate.gov/public/?a=RSS.Feed" { :source => url, :url => abs_link, :title => link.xpath('title').text, :date => date_from_rss_item(link), :domain => URI.parse(url).host } end Utils.remove_generic_urls!(results) end |