Class: Seep::Doc

Inherits:
Object
  • Object
show all
Defined in:
lib/seep/doc.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(url, html) ⇒ Doc

Returns a new instance of Doc.



4
5
6
7
# File 'lib/seep/doc.rb', line 4

def initialize(url, html)
  @url = url
  @dom = Nokogiri::HTML.parse(html)
end

Instance Attribute Details

#domObject (readonly)

Returns the value of attribute dom.



2
3
4
# File 'lib/seep/doc.rb', line 2

def dom
  @dom
end

#urlObject (readonly)

Returns the value of attribute url.



2
3
4
# File 'lib/seep/doc.rb', line 2

def url
  @url
end

Instance Method Details

#compute_url(relative_url) ⇒ Object



31
32
33
34
35
36
37
38
39
# File 'lib/seep/doc.rb', line 31

def compute_url( relative_url )
  return nil if relative_url.nil?
  url = begin
    URI.join( @url, relative_url ).to_s
  rescue
    URI.join( @url, URI.escape(relative_url) ).to_s
  end
  url =~ /^http/i ? url : nil
end

#imagesObject



18
19
20
21
22
23
24
25
# File 'lib/seep/doc.rb', line 18

def images
  @images ||= [].tap do |images|
    @dom.search("img").each do |image|
      link = compute_url( image.get_attribute("src") )
      images << link unless link.nil?
    end
  end
end

#inspectObject



41
42
43
# File 'lib/seep/doc.rb', line 41

def inspect
  "#<Seep::Doc #{url} links: #{links.count}, images: #{images.count}>"
end


9
10
11
12
13
14
15
16
# File 'lib/seep/doc.rb', line 9

def links
  @links ||= [].tap do |links|
    @dom.search("a").each do |anchor|
      link = compute_url( anchor.get_attribute("href") )
      links << link unless link.nil? or link == url
    end
  end
end

#urlsObject



27
28
29
# File 'lib/seep/doc.rb', line 27

def urls
  links + images
end