38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
|
# File 'lib/blinkr/engine.rb', line 38
def run
context = OpenStruct.new({:pages => {}})
if defined?(JRUBY_VERSION) && @config.browser == 'manticore'
require 'blinkr/manticore_wrapper'
bulk_browser = browser = ManticoreWrapper.new(@config, context)
else
bulk_browser = browser = TyphoeusWrapper.new(@config, context)
end
browser = PhantomJSWrapper.new(@config, context) if @config.browser == 'phantomjs'
page_count = 0
urls = sitemap_locations.uniq
puts "Fetching #{urls.size} pages from sitemap"
browser.process_all(urls, @config.max_page_retrys) do |response, resource_errors, javascript_errors|
url = response.request.base_url
if response.success?
puts "Loaded page #{url}" if @config.verbose
body = Nokogiri::HTML(response.body)
page = OpenStruct.new({:response => response, :body => body.freeze,
:errors => ErrorArray.new(@config),
:resource_errors => resource_errors || [],
:javascript_errors => javascript_errors || []})
context.pages[url] = page
collect page
page_count += 1
else
puts "#{response.code} #{response.status_message} Unable to load page #{url} #{'(' + response.return_message + ')' unless response.return_message.nil?}"
end
end
puts 'Executing Typhoeus::Hydra.run, this could take awhile' if @config.browser == 'typhoeus'
puts "Loaded #{page_count} pages using #{browser.name}."
puts 'Analyzing pages'
analyze context, bulk_browser
context.pages.reject! { |_, page| page.errors.empty? }
unless @config.export.nil?
FileUtils.mkdir_p Pathname.new(@config.report).parent
end
Blinkr::Report.new(context, self, @config).render
end
|