2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
|
# File 'lib/report_command.rb', line 2
def self.start(opts)
if opts[:output]
options = opts.to_hash.delete_if { |k, v| v.nil?}
options[:quiet] = !opts[:verbose]
if options.has_key?(:seed_url_file)
filename = options.delete(:seed_url_file)
options[:seed_urls] = []
File.open(filename, "r") do |f|
f.each_line do |line|
options[:seed_urls] << line
end
end
end
@crawler = CobwebCrawler.new({:cache_type => :full, :raise_exceptions => true}.merge(options))
columns = nil
CSV.open(options[:output], "wb", :force_quotes => true) do |csv|
statistics = @crawler.crawl(options[:url]) do |page|
puts "Reporting on #{page[:url]} [#{page[:status_code]}]"
@doc = page[:body]
page["link_rel"] = scope.link_tag_with_rel("canonical")["href"]
page["title"] = scope.head_tag.title_tag.contents
page["description"] = scope.meta_tag_with_name("description")["content"]
page["keywords"] = scope.meta_tag_with_name("keywords")["content"]
page["img tag count"] = scope.img_tags.count
page["scripts in body"] = scope.body_tag.script_tags.count
page["img without alt count"] = scope.img_tags.select{|node| node[:alt].nil? || node[:alt].strip().empty?}.count
page["img alt"] = scope.img_tags_with_alt.map{|node| node[:alt]}.uniq
if !columns
columns = page.keys.reject{|k| k==:body || k==:links}
csv << columns.map{|k| k.to_s}
end
csv << columns.map{|k| page[k]}
end
end
end
end
|