Class: Seep::Doc
- Inherits:
-
Object
- Object
- Seep::Doc
- Defined in:
- lib/seep/doc.rb
Instance Attribute Summary collapse
-
#dom ⇒ Object
readonly
Returns the value of attribute dom.
-
#url ⇒ Object
readonly
Returns the value of attribute url.
Instance Method Summary collapse
- #compute_url(relative_url) ⇒ Object
- #images ⇒ Object
-
#initialize(url, html) ⇒ Doc
constructor
A new instance of Doc.
- #inspect ⇒ Object
- #links ⇒ Object
- #urls ⇒ Object
Constructor Details
#initialize(url, html) ⇒ Doc
Returns a new instance of Doc.
4 5 6 7 |
# File 'lib/seep/doc.rb', line 4 def initialize(url, html) @url = url @dom = Nokogiri::HTML.parse(html) end |
Instance Attribute Details
#dom ⇒ Object (readonly)
Returns the value of attribute dom.
2 3 4 |
# File 'lib/seep/doc.rb', line 2 def dom @dom end |
#url ⇒ Object (readonly)
Returns the value of attribute url.
2 3 4 |
# File 'lib/seep/doc.rb', line 2 def url @url end |
Instance Method Details
#compute_url(relative_url) ⇒ Object
31 32 33 34 35 36 37 38 39 |
# File 'lib/seep/doc.rb', line 31 def compute_url( relative_url ) return nil if relative_url.nil? url = begin URI.join( @url, relative_url ).to_s rescue URI.join( @url, URI.escape(relative_url) ).to_s end url =~ /^http/i ? url : nil end |
#images ⇒ Object
18 19 20 21 22 23 24 25 |
# File 'lib/seep/doc.rb', line 18 def images @images ||= [].tap do |images| @dom.search("img").each do |image| link = compute_url( image.get_attribute("src") ) images << link unless link.nil? end end end |
#inspect ⇒ Object
41 42 43 |
# File 'lib/seep/doc.rb', line 41 def inspect "#<Seep::Doc #{url} links: #{links.count}, images: #{images.count}>" end |
#links ⇒ Object
9 10 11 12 13 14 15 16 |
# File 'lib/seep/doc.rb', line 9 def links @links ||= [].tap do |links| @dom.search("a").each do |anchor| link = compute_url( anchor.get_attribute("href") ) links << link unless link.nil? or link == url end end end |
#urls ⇒ Object
27 28 29 |
# File 'lib/seep/doc.rb', line 27 def urls links + images end |