Class: Feedbase::FetchFeed
- Inherits:
-
Object
- Object
- Feedbase::FetchFeed
- Defined in:
- lib/feedbase/fetch_feed.rb
Instance Attribute Summary collapse
-
#feed_url ⇒ Object
Returns the value of attribute feed_url.
Instance Method Summary collapse
- #fetch ⇒ Object
- #fix_url(url) ⇒ Object
- #headers ⇒ Object
-
#initialize(feed_url) ⇒ FetchFeed
constructor
A new instance of FetchFeed.
Constructor Details
#initialize(feed_url) ⇒ FetchFeed
Returns a new instance of FetchFeed.
10 11 12 |
# File 'lib/feedbase/fetch_feed.rb', line 10 def initialize(feed_url) @feed_url = feed_url end |
Instance Attribute Details
#feed_url ⇒ Object
Returns the value of attribute feed_url.
8 9 10 |
# File 'lib/feedbase/fetch_feed.rb', line 8 def feed_url @feed_url end |
Instance Method Details
#fetch ⇒ Object
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
# File 'lib/feedbase/fetch_feed.rb', line 38 def fetch url = fix_url(feed_url) start_time = Time.now result = begin Timeout::timeout(20) do agent = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.3) Gecko/2008092416 Firefox/3.0.3" headers # get headers and any redirects `curl -sL -A'#{agent}' '#{url}'` end rescue StandardError, Timeout::Error => ex raise end elapsed = Time.now - start_time if !(x = headers[:headers].scan(/^Location: (.*)$/).flatten).empty? #puts "Redirected to #{x.last}" feed_url = x.last end result2 = Iconv.conv("UTF-8//TRANSLIT//IGNORE", (headers[:encoding] || 'iso-8859-1'), result) f = FeedParser.new(result2).result feed_params = {:feed_url => feed_url, :title => f[:title], :web_url => f[:link]} items = f[:items] { feed_params: feed_params, items: f[:items], download_params: headers.merge(download_time: elapsed) } end |
#fix_url(url) ⇒ Object
66 67 68 69 70 71 |
# File 'lib/feedbase/fetch_feed.rb', line 66 def fix_url(url) unless url =~ /^https?:\/\// url = "http://" + url end url end |
#headers ⇒ Object
14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
# File 'lib/feedbase/fetch_feed.rb', line 14 def headers if @headers return @headers end _headers = begin Timeout::timeout(20) do agent = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.3) Gecko/2008092416 Firefox/3.0.3" # get headers and any redirects res = `curl -sIL -A'#{agent}' '#{feed_url}'`.gsub("\r\n", "\n") if res !~ /^HTTP.*200 OK$/ puts res.inspect raise "Response not OK" end res end end #TODO check for xml @headers = { headers: _headers, encoding: _headers[/^Content-Type:.*charset=(.*)$/i, 1], etag: _headers[/^ETag: (.*)$/,1], last_modified: ((x = _headers[/Last-Modified: (.*)/, 1]) && DateTime.parse(x)) } end |