Class: BrowserCrawler::Engine

Inherits:
Object
  • Object
show all
Includes:
DSL::JsHelpers, DSL::SignIn, HooksOperator, Capybara::DSL
Defined in:
lib/browser_crawler/engine.rb

Defined Under Namespace

Classes: UnavailableCallBackMethod

Constant Summary collapse

REPORT_SAVE_FOLDER_PATH =
'tmp'.freeze
CUPRITE_OPTIONS =
{
  window_size: [1280, 1600]
}.freeze
SCREENSHOT_OPERATOR_OPTIONS =
{
  save_screenshots: false,
  save_screenshots_to: nil,
  format: 'png',
  filename: nil
}.freeze

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from DSL::JsHelpers

#wait_for_page_to_load

Methods included from DSL::SignIn

#o365_login, #o365_stay_signed_in, #pingfed_login, #pingfed_o365_login, #sign_in

Methods included from HooksOperator

#exchange_on_hooks, #with_hooks_for

Constructor Details

#initialize(browser_options: {}, screenshots_options: {}, max_pages: nil, deep_visit: false, logger: nil) ⇒ Engine

Returns a new instance of Engine.



44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# File 'lib/browser_crawler/engine.rb', line 44

def initialize(browser_options: {},
               screenshots_options: {},
               max_pages: nil,
               deep_visit: false,
               logger: nil)
  screenshots_operator_options = SCREENSHOT_OPERATOR_OPTIONS
                                 .merge(screenshots_options)
  @screenshot_operator = ScreenshotOperator.new(**screenshots_operator_options)

  cuprite_options = CUPRITE_OPTIONS.merge(browser_options)

  @logger = logger || Logger.new(STDOUT)

  register_chrome_driver(cuprite_options)
  initialize_report_store(cuprite_options)
  initialize_crawl_manager(max_pages, deep_visit)
end

Instance Attribute Details

#crawl_managerObject (readonly)

Returns the value of attribute crawl_manager.



39
40
41
# File 'lib/browser_crawler/engine.rb', line 39

def crawl_manager
  @crawl_manager
end

#loggerObject (readonly)

Returns the value of attribute logger.



39
40
41
# File 'lib/browser_crawler/engine.rb', line 39

def logger
  @logger
end

#report_storeObject (readonly)

Returns the value of attribute report_store.



39
40
41
# File 'lib/browser_crawler/engine.rb', line 39

def report_store
  @report_store
end

#screenshot_operatorObject (readonly)

Returns the value of attribute screenshot_operator.



39
40
41
# File 'lib/browser_crawler/engine.rb', line 39

def screenshot_operator
  @screenshot_operator
end

Instance Method Details

#after(type: :all, &hook) ⇒ Object



99
100
101
# File 'lib/browser_crawler/engine.rb', line 99

def after(type: :all, &hook)
  HooksContainer.instance.add_hook(method: :after, type: type, hook: hook)
end

#before(type: :all, &hook) ⇒ Object



95
96
97
# File 'lib/browser_crawler/engine.rb', line 95

def before(type: :all, &hook)
  HooksContainer.instance.add_hook(method: :before, type: type, hook: hook)
end

#change_page_scan_rules(&hook) ⇒ Object



107
108
109
# File 'lib/browser_crawler/engine.rb', line 107

def change_page_scan_rules(&hook)
  HooksContainer.instance.add_hook(type: :scan_rules, hook: hook)
end


68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# File 'lib/browser_crawler/engine.rb', line 68

def extract_links(url:)
  initialize_crawler(url)

  begin
    with_hooks_for(type: :all) do
      crawl_manager.crawl(
        target_url: url,
        capybara_session: Capybara.current_session,
        screenshot_operator: screenshot_operator
      )
    end
  rescue StandardError => error
    logger
      .fatal("#{error.message} \n #{error.backtrace.join("\n")}")
  ensure
    @report_store.finish
  end
  self
end

#js_before_run(javascript: '') ⇒ Object



62
63
64
65
66
# File 'lib/browser_crawler/engine.rb', line 62

def js_before_run(javascript: '')
  return if javascript.empty?

  @javascript_before_run = javascript
end

#report_save(folder_path: '', type: :yaml) ⇒ Object



88
89
90
91
92
93
# File 'lib/browser_crawler/engine.rb', line 88

def report_save(folder_path: '', type: :yaml)
  save_folder_path = folder_path.empty? ? REPORT_SAVE_FOLDER_PATH : folder_path
  ReportFactory.save(store: @report_store,
                     type: type.to_sym,
                     save_folder_path: save_folder_path)
end


103
104
105
# File 'lib/browser_crawler/engine.rb', line 103

def unvisited_links(&hook)
  HooksContainer.instance.add_hook(type: :unvisited_links, hook: hook)
end