Class: Webshaker::Scraper

Inherits:
Object
  • Object
show all
Defined in:
lib/webshaker/scraper.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(url, options = {}, status_update: ->(status) {}) ⇒ Scraper

Returns a new instance of Scraper.



9
10
11
12
13
14
15
16
17
18
19
20
# File 'lib/webshaker/scraper.rb', line 9

def initialize(url, options = {}, status_update: ->(status) {})
  @url = url
  @options = options
  @status_update = status_update

  status_update.call(:scrape_init)

  @driver = Selenium::WebDriver.for(
    :chrome,
    options: Selenium::WebDriver::Chrome::Options.new.tap(&method(:configure))
  )
end

Instance Attribute Details

#driverObject (readonly)

Returns the value of attribute driver.



7
8
9
# File 'lib/webshaker/scraper.rb', line 7

def driver
  @driver
end

#optionsObject (readonly)

Returns the value of attribute options.



7
8
9
# File 'lib/webshaker/scraper.rb', line 7

def options
  @options
end

#status_updateObject (readonly)

Returns the value of attribute status_update.



7
8
9
# File 'lib/webshaker/scraper.rb', line 7

def status_update
  @status_update
end

#urlObject (readonly)

Returns the value of attribute url.



7
8
9
# File 'lib/webshaker/scraper.rb', line 7

def url
  @url
end

Class Method Details

.scrape(url, options = {}) ⇒ Object



38
39
40
# File 'lib/webshaker/scraper.rb', line 38

def self.scrape(url, options = {})
  new(url, options).scrape
end

Instance Method Details

#scrapeObject



22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# File 'lib/webshaker/scraper.rb', line 22

def scrape
  status_update.call(:scrape_start)

  driver.navigate.to url

  do_wait

  screenshot = driver.screenshot_as :base64
  html_content = clean_up(driver.page_source)
  driver.quit

  status_update.call(:scrape_done)

  ScrapeResult.new(screenshot, html_content)
end