Class: Hangry::CanonicalUrlParser

Inherits:
Object
  • Object
show all
Defined in:
lib/hangry/canonical_url_parser.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(html_or_nokogiri_doc) ⇒ CanonicalUrlParser

Returns a new instance of CanonicalUrlParser.



6
7
8
9
10
11
12
13
14
15
# File 'lib/hangry/canonical_url_parser.rb', line 6

def initialize(html_or_nokogiri_doc)
  self.nokogiri_doc = case html_or_nokogiri_doc
  when String
    Nokogiri::HTML(html_or_nokogiri_doc)
  when Nokogiri::HTML::Document
    html_or_nokogiri_doc
  else
    raise ArgumentError
  end
end

Instance Attribute Details

#nokogiri_docObject

Returns the value of attribute nokogiri_doc.



4
5
6
# File 'lib/hangry/canonical_url_parser.rb', line 4

def nokogiri_doc
  @nokogiri_doc
end

Instance Method Details

#canonical_domainObject



17
18
19
20
21
22
23
24
# File 'lib/hangry/canonical_url_parser.rb', line 17

def canonical_domain
  return nil unless canonical_url
  full_domain = URI.parse(canonical_url).host
  return nil unless full_domain
  base_domain_fragments = full_domain.split('.')[-2..-1]
  return nil unless base_domain_fragments
  base_domain_fragments.join('.')
end

#canonical_urlObject



26
27
28
29
30
31
32
33
# File 'lib/hangry/canonical_url_parser.rb', line 26

def canonical_url
  node = nokogiri_doc.css('link[rel="canonical"]').first
  return node['href'] if node

  # Fall back to open graph URL (see food network example)
  node = nokogiri_doc.css('meta[property="og:url"]').first
  node ? node['content'] : nil
end