Class: RWGet::SitemapLinks

Inherits:
Object
  • Object
show all
Defined in:
lib/rwget/sitemap_links.rb

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ SitemapLinks

Returns a new instance of SitemapLinks.



7
8
# File 'lib/rwget/sitemap_links.rb', line 7

def initialize(options = {})
end

Instance Method Details

#urls(base, tmpfile) ⇒ Object



10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# File 'lib/rwget/sitemap_links.rb', line 10

def urls(base, tmpfile)
  type = `file -z "#{tmpfile.path}"`
  return []                                 unless type =~ /XML/i
  tmpfile = Zlib::GzipReader.new(tmpfile)   if type =~ /gzip/i
  
  doc = LibXML::XML::Reader.io(tmpfile)
  urls = []
  while doc.read
    next unless doc.node_type == 1 #element
    begin
      urls << URI.parse(doc.node.content) if doc.name == "loc"
    rescue 
      STDERR.puts "Skipping #{doc.node.to_s}"
    end
  end
  urls
end