Class: Statement::Feed

Inherits:
Object
  • Object
show all
Defined in:
lib/statement/feed.rb

Class Method Summary collapse

Class Method Details

.batch(urls) ⇒ Object



11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/statement/feed.rb', line 11

def self.batch(urls)
  results = []
  failures = []
  hydra = Typhoeus::Hydra.new
  urls.each do |url|
    req = Typhoeus::Request.new(url)
    req.on_complete do |response|
      if response.success?
        doc = Nokogiri::XML(response.body)
        results << parse_atom(doc, url) if url == "http://larson.house.gov/index.php?option=com_ninjarsssyndicator&feed_id=1&format=raw"
        results << parse_rss(doc, url)
      else
        failures << url
      end
    end
    hydra.queue(req)
  end
  hydra.run
  [results.flatten, failures]
end

.date_from_rss_item(link) ⇒ Object



40
41
42
43
44
45
46
47
48
49
50
# File 'lib/statement/feed.rb', line 40

def self.date_from_rss_item(link)
  if !link.xpath('pubDate').text.empty?
     Date.parse(link.xpath('pubDate').text)
  elsif !link.xpath('pubdate').empty?
    Date.parse(link.xpath('pubdate').text)
  elsif link.xpath('link').text.include?("mikulski.senate.gov") and link.xpath('link').text.include?("-2014")
    Date.parse(link.xpath('link').text.split('/').last.split('-', -1).first(3).join('/').split('.cfm').first)
  else
    nil
  end
end

.from_rss(url) ⇒ Object



52
53
54
55
56
57
58
59
60
# File 'lib/statement/feed.rb', line 52

def self.from_rss(url)
  doc = open_rss(url)
  return unless doc
  if url == "http://larson.house.gov/index.php?option=com_ninjarsssyndicator&feed_id=1&format=raw"
    parse_atom(doc, url)
  else
    parse_rss(doc, url)
  end
end

.open_rss(url) ⇒ Object



32
33
34
35
36
37
38
# File 'lib/statement/feed.rb', line 32

def self.open_rss(url)
  begin
    Nokogiri::XML(open(url))
  rescue
    nil
  end
end

.parse_atom(doc, url) ⇒ Object



74
75
76
77
78
79
80
# File 'lib/statement/feed.rb', line 74

def self.parse_atom(doc, url)
  links = (doc/:entry)
  return if links.empty?
  results = links.map do |link|
    { :source => url, :url => link.children[3]['href'], :title => link.children[1].text, :date => Date.parse(link.children[5].text), :domain => URI.parse(url).host }
  end
end

.parse_rss(doc, url) ⇒ Object



62
63
64
65
66
67
68
69
70
71
72
# File 'lib/statement/feed.rb', line 62

def self.parse_rss(doc, url)
  links = doc.xpath('//item')
  return if links.empty?
  results = links.map do |link|
    abs_link = Utils.absolute_link(url, link.xpath('link').text)
    abs_link = "http://www.burr.senate.gov/public/"+ link.xpath('link').text if url == 'http://www.burr.senate.gov/public/index.cfm?FuseAction=RSS.Feed'
    abs_link = link.xpath('link').text[37..-1] if url == "http://www.johanns.senate.gov/public/?a=RSS.Feed"
    { :source => url, :url => abs_link, :title => link.xpath('title').text, :date => date_from_rss_item(link), :domain => URI.parse(url).host }
  end
  Utils.remove_generic_urls!(results)
end