Class: Wraith::Sitemap

Inherits:
Spider
  • Object
show all
Defined in:
lib/wraith/spider.rb

Instance Method Summary collapse

Methods inherited from Spider

#determine_paths, #initialize

Constructor Details

This class inherits a constructor from Wraith::Spider

Instance Method Details

#spiderObject



78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# File 'lib/wraith/spider.rb', line 78

def spider
  unless @wraith.sitemap.nil?
    puts "reading sitemap.xml from #{@wraith.sitemap}"
    if @wraith.sitemap =~ URI.regexp
      sitemap = Nokogiri::XML(open(@wraith.sitemap))
    else
      sitemap = Nokogiri::XML(File.open(@wraith.sitemap))
    end
    urls = {}
    sitemap.css('loc').each do |loc|
      path = loc.content
      # Allow use of either domain in the sitemap.xml
      @wraith.domains.each do |_k, v|
        path.sub!(v, '')
      end
      if @wraith.spider_skips.nil? || @wraith.spider_skips.none? { |regex| regex.match(path) }
        add_path(path)
      end
    end
  end
end