Class: RWGet::Links

Inherits:
Object
  • Object
show all
Defined in:
lib/rwget/links.rb

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ Links

Returns a new instance of Links.



4
5
# File 'lib/rwget/links.rb', line 4

def initialize(options = {})
end

Instance Method Details

#add(base, href) ⇒ Object



26
27
28
29
30
31
32
# File 'lib/rwget/links.rb', line 26

def add(base, href)
  begin
    @urls << URI.join(base, href.strip) if href
  rescue Exception => e
    STDERR.puts "url error parsing URI.join(#{base.inspect}, #{href.inspect}): #{e.message}"
  end
end

#urls(base, tmpfile) ⇒ Object



7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# File 'lib/rwget/links.rb', line 7

def urls(base, tmpfile)
  @urls = []
  base = base.to_s
  string = File.read(tmpfile.path)
  xml = string =~ /<\?xml/
  doc = xml ? Hpricot.XML(string) : Hpricot(string)
  
  (doc / "//item/link").each do |l|
    add base, l.inner_text
  end
  (doc / "a").each do |a| 
    add base, a.attributes["href"]
  end
  @urls
rescue Exception => e
  STDERR.puts "Couldn't parse #{base} for links: #{e.message}"
  []
end