Class: BrowserCrawler::Engine
- Inherits:
-
Object
- Object
- BrowserCrawler::Engine
- Includes:
- DSL::JsHelpers, DSL::SignIn, HooksOperator, Capybara::DSL
- Defined in:
- lib/browser_crawler/engine.rb
Defined Under Namespace
Classes: UnavailableCallBackMethod
Constant Summary collapse
- REPORT_SAVE_FOLDER_PATH =
'tmp'.freeze
- CUPRITE_OPTIONS =
{ window_size: [1280, 1600] }.freeze
- SCREENSHOT_OPERATOR_OPTIONS =
{ save_screenshots: false, save_screenshots_to: nil, format: 'png', filename: nil }.freeze
Instance Attribute Summary collapse
-
#crawl_manager ⇒ Object
readonly
Returns the value of attribute crawl_manager.
-
#logger ⇒ Object
readonly
Returns the value of attribute logger.
-
#report_store ⇒ Object
readonly
Returns the value of attribute report_store.
-
#screenshot_operator ⇒ Object
readonly
Returns the value of attribute screenshot_operator.
Instance Method Summary collapse
- #after(type: :all, &hook) ⇒ Object
- #before(type: :all, &hook) ⇒ Object
- #change_page_scan_rules(&hook) ⇒ Object
- #extract_links(url:) ⇒ Object
-
#initialize(browser_options: {}, screenshots_options: {}, max_pages: nil, deep_visit: false, logger: nil) ⇒ Engine
constructor
A new instance of Engine.
- #js_before_run(javascript: '') ⇒ Object
- #report_save(folder_path: '', type: :yaml) ⇒ Object
- #unvisited_links(&hook) ⇒ Object
Methods included from DSL::JsHelpers
Methods included from DSL::SignIn
#o365_login, #o365_stay_signed_in, #pingfed_login, #pingfed_o365_login, #sign_in
Methods included from HooksOperator
#exchange_on_hooks, #with_hooks_for
Constructor Details
#initialize(browser_options: {}, screenshots_options: {}, max_pages: nil, deep_visit: false, logger: nil) ⇒ Engine
Returns a new instance of Engine.
44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
# File 'lib/browser_crawler/engine.rb', line 44 def initialize(browser_options: {}, screenshots_options: {}, max_pages: nil, deep_visit: false, logger: nil) = SCREENSHOT_OPERATOR_OPTIONS .merge() @screenshot_operator = ScreenshotOperator.new(**) = CUPRITE_OPTIONS.merge() @logger = logger || Logger.new(STDOUT) register_chrome_driver() initialize_report_store() initialize_crawl_manager(max_pages, deep_visit) end |
Instance Attribute Details
#crawl_manager ⇒ Object (readonly)
Returns the value of attribute crawl_manager.
39 40 41 |
# File 'lib/browser_crawler/engine.rb', line 39 def crawl_manager @crawl_manager end |
#logger ⇒ Object (readonly)
Returns the value of attribute logger.
39 40 41 |
# File 'lib/browser_crawler/engine.rb', line 39 def logger @logger end |
#report_store ⇒ Object (readonly)
Returns the value of attribute report_store.
39 40 41 |
# File 'lib/browser_crawler/engine.rb', line 39 def report_store @report_store end |
#screenshot_operator ⇒ Object (readonly)
Returns the value of attribute screenshot_operator.
39 40 41 |
# File 'lib/browser_crawler/engine.rb', line 39 def screenshot_operator @screenshot_operator end |
Instance Method Details
#after(type: :all, &hook) ⇒ Object
99 100 101 |
# File 'lib/browser_crawler/engine.rb', line 99 def after(type: :all, &hook) HooksContainer.instance.add_hook(method: :after, type: type, hook: hook) end |
#before(type: :all, &hook) ⇒ Object
95 96 97 |
# File 'lib/browser_crawler/engine.rb', line 95 def before(type: :all, &hook) HooksContainer.instance.add_hook(method: :before, type: type, hook: hook) end |
#change_page_scan_rules(&hook) ⇒ Object
107 108 109 |
# File 'lib/browser_crawler/engine.rb', line 107 def change_page_scan_rules(&hook) HooksContainer.instance.add_hook(type: :scan_rules, hook: hook) end |
#extract_links(url:) ⇒ Object
68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
# File 'lib/browser_crawler/engine.rb', line 68 def extract_links(url:) initialize_crawler(url) begin with_hooks_for(type: :all) do crawl_manager.crawl( target_url: url, capybara_session: Capybara.current_session, screenshot_operator: screenshot_operator ) end rescue StandardError => error logger .fatal("#{error.} \n #{error.backtrace.join("\n")}") ensure @report_store.finish end self end |
#js_before_run(javascript: '') ⇒ Object
62 63 64 65 66 |
# File 'lib/browser_crawler/engine.rb', line 62 def js_before_run(javascript: '') return if javascript.empty? @javascript_before_run = javascript end |
#report_save(folder_path: '', type: :yaml) ⇒ Object
88 89 90 91 92 93 |
# File 'lib/browser_crawler/engine.rb', line 88 def report_save(folder_path: '', type: :yaml) save_folder_path = folder_path.empty? ? REPORT_SAVE_FOLDER_PATH : folder_path ReportFactory.save(store: @report_store, type: type.to_sym, save_folder_path: save_folder_path) end |
#unvisited_links(&hook) ⇒ Object
103 104 105 |
# File 'lib/browser_crawler/engine.rb', line 103 def unvisited_links(&hook) HooksContainer.instance.add_hook(type: :unvisited_links, hook: hook) end |