Class: Spix::FeedDiscovery::Feed
- Defined in:
- lib/spix_parser/tools/feed_discovery/feed.rb
Instance Method Summary collapse
- #fetch(uri, limit = 10) ⇒ Object
- #fetch_html(uri) ⇒ Object
- #fetch_xml(uri) ⇒ Object
- #find_shortcut_in(doc) ⇒ Object
-
#initialize(url) ⇒ Feed
constructor
A new instance of Feed.
- #parse_uri(path) ⇒ Object
- #set_favicon ⇒ Object
- #set_title ⇒ Object
Constructor Details
#initialize(url) ⇒ Feed
Returns a new instance of Feed.
5 6 7 8 9 10 11 12 |
# File 'lib/spix_parser/tools/feed_discovery/feed.rb', line 5 def initialize url self.url = url.to_s self.similars = [] self.exceptions = [] yield self if block_given? rescue => error self.errors = [error] end |
Instance Method Details
#fetch(uri, limit = 10) ⇒ Object
55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
# File 'lib/spix_parser/tools/feed_discovery/feed.rb', line 55 def fetch uri, limit = 10 raise ArgumentError, 'HTTP redirect too deep' if limit == 0 resp = Net::HTTP.get_response uri if resp.kind_of?(Net::HTTPRedirection) or ( = Nokogiri::HTML(resp.body).search('meta[@http-equiv=REFRESH]')).any? path = resp['location'] || .first.get_attribute('content')[/http:\/\/.*/] from_redirect = base_uri.merge path self.url = from_redirect.to_s fetch from_redirect, limit - 1 else resp.body end rescue String.new end |
#fetch_html(uri) ⇒ Object
74 75 76 |
# File 'lib/spix_parser/tools/feed_discovery/feed.rb', line 74 def fetch_html uri Nokogiri::HTML fetch uri end |
#fetch_xml(uri) ⇒ Object
70 71 72 |
# File 'lib/spix_parser/tools/feed_discovery/feed.rb', line 70 def fetch_xml uri Nokogiri::XML fetch uri end |
#find_shortcut_in(doc) ⇒ Object
47 48 49 50 51 52 53 |
# File 'lib/spix_parser/tools/feed_discovery/feed.rb', line 47 def find_shortcut_in doc doc.xpath( '//link[contains(translate(@rel, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz"), "shortcut")]', '//link[contains(translate(@rel, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz"), "icon")]', '//link[contains(translate(@rel, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz"), "ico")]' ).map { |node| node.get_attribute "href" } end |
#parse_uri(path) ⇒ Object
98 99 100 |
# File 'lib/spix_parser/tools/feed_discovery/feed.rb', line 98 def parse_uri path URI.parse URI.encode path end |
#set_favicon ⇒ Object
29 30 31 32 33 34 |
# File 'lib/spix_parser/tools/feed_discovery/feed.rb', line 29 def set_favicon if node = content.search('link').first path = parse_uri node.content.strip self.favicon = shortcut_from parse_uri path.select(:scheme, :host).join("://") rescue nil end end |
#set_title ⇒ Object
24 25 26 27 |
# File 'lib/spix_parser/tools/feed_discovery/feed.rb', line 24 def set_title node = content.search('title').first self.title = node.content if node end |