Module: LinkThumbnailer::Doc

Defined in:
lib/link_thumbnailer/doc.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#source_urlObject

Returns the value of attribute source_url.



61
62
63
# File 'lib/link_thumbnailer/doc.rb', line 61

def source_url
  @source_url
end

Instance Method Details

#canonical_urlObject



54
55
56
57
58
59
# File 'lib/link_thumbnailer/doc.rb', line 54

def canonical_url
  if element = xpath("//link[translate(@rel, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz') = 'canonical' and @href]").first
    return element.attributes['href'].value.strip
  end
  nil
end

#descriptionObject



40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/link_thumbnailer/doc.rb', line 40

def description
  if element = xpath("//meta[translate(@name,'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz') = 'description' and @content]").first
    return element.attributes['content'].value.strip
  end

  css('body p').each do |node|
    if !node.has_attribute?('style') && node.first_element_child.nil?
      return node.text.strip
    end
  end

  nil
end

#doc_base_hrefObject



7
8
9
10
# File 'lib/link_thumbnailer/doc.rb', line 7

def doc_base_href
  base = at('//head/base')
  base['href'] if base
end

#img_abs_urls(base_url = nil) ⇒ Object



16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# File 'lib/link_thumbnailer/doc.rb', line 16

def img_abs_urls(base_url = nil)
  result = []

  img_srcs.each do |i|
    begin
      u = URI(i)
    rescue URI::InvalidURIError
      next
    end

    result << if u.is_a?(URI::HTTP)
                u
              else
                URI.join(base_url || doc_base_href || source_url, i)
              end
  end

  result
end

#img_srcsObject



12
13
14
# File 'lib/link_thumbnailer/doc.rb', line 12

def img_srcs
  search('//img').map { |i| i['src'] }.compact
end

#titleObject



36
37
38
# File 'lib/link_thumbnailer/doc.rb', line 36

def title
  css('title').text.strip
end