Class: Blinkr::Engine

Inherits:
Object
  • Object
show all
Includes:
HttpUtils, Sitemap
Defined in:
lib/blinkr/engine.rb

Defined Under Namespace

Classes: ErrorArray

Instance Method Summary collapse

Methods included from Sitemap

#sitemap_locations

Methods included from HttpUtils

#retry?, #sanitize

Constructor Details

#initialize(config) ⇒ Engine

Returns a new instance of Engine.



32
33
34
35
36
# File 'lib/blinkr/engine.rb', line 32

def initialize(config)
  @config = config.validate
  @extensions = []
  load_pipeline
end

Instance Method Details

#analyze(context, typhoeus) ⇒ Object



93
94
95
# File 'lib/blinkr/engine.rb', line 93

def analyze(context, typhoeus)
  execute :analyze, context, typhoeus
end

#append(context) ⇒ Object



79
80
81
# File 'lib/blinkr/engine.rb', line 79

def append(context)
  execute :append, context
end

#collect(page) ⇒ Object



97
98
99
# File 'lib/blinkr/engine.rb', line 97

def collect(page)
  execute :collect, page
end

#runObject



38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# File 'lib/blinkr/engine.rb', line 38

def run
  context = OpenStruct.new({:pages => {}})
  if defined?(JRUBY_VERSION) && @config.browser == 'manticore'
    require 'blinkr/manticore_wrapper'
    bulk_browser = browser = ManticoreWrapper.new(@config, context)
  else
    bulk_browser = browser = TyphoeusWrapper.new(@config, context)
  end
  browser = PhantomJSWrapper.new(@config, context) if @config.browser == 'phantomjs'
  page_count = 0
  urls = sitemap_locations.uniq
  puts "Fetching #{urls.size} pages from sitemap"
  browser.process_all(urls, @config.max_page_retrys) do |response, resource_errors, javascript_errors|
    url = response.request.base_url
    if response.success?
      puts "Loaded page #{url}" if @config.verbose
      body = Nokogiri::HTML(response.body)
      page = OpenStruct.new({:response => response, :body => body.freeze,
                             :errors => ErrorArray.new(@config),
                             :resource_errors => resource_errors || [],
                             :javascript_errors => javascript_errors || []})
      context.pages[url] = page
      collect page
      page_count += 1
    else
      puts "#{response.code} #{response.status_message} Unable to load page #{url} #{'(' + response.return_message + ')' unless response.return_message.nil?}"
    end
  end
  puts 'Executing Typhoeus::Hydra.run, this could take awhile' if @config.browser == 'typhoeus'
  # browser.hydra.run if @config.browser == 'typhoeus'
  puts "Loaded #{page_count} pages using #{browser.name}."
  puts 'Analyzing pages'
  analyze context, bulk_browser
  context.pages.reject! { |_, page| page.errors.empty? }

  unless @config.export.nil?
    FileUtils.mkdir_p Pathname.new(@config.report).parent
  end
  Blinkr::Report.new(context, self, @config).render
end

#transform(page, error, &block) ⇒ Object



83
84
85
86
87
88
89
90
91
# File 'lib/blinkr/engine.rb', line 83

def transform(page, error, &block)
  default = yield
  result = execute(:transform, page, error, default)
  if result.empty?
    default
  else
    result.join
  end
end