Class: GoogleImageScraper::Scraper

Inherits:
Object
  • Object
show all
Defined in:
lib/google_image_scraper/scraper.rb

Constant Summary collapse

LARGE_PICTURE_XPATH =
'/html/body/div[2]/c-wiz/div[3]' \
'/div[2]/div[3]/div/div/div[3]/div[2]/c-wiz/div[1]/div[1]/div/div[2]/a/img'.freeze
SEARCH_PAGE_URL_FORMAT =
'https://www.google.com/search?q=%s&source=lnms&tbm=isch'.freeze

Instance Method Summary collapse

Constructor Details

#initializeScraper

Returns a new instance of Scraper.



10
11
12
13
14
# File 'lib/google_image_scraper/scraper.rb', line 10

def initialize
  @driver = setup_driver
  @file_saver = FileSaver.new
  @wait = Selenium::WebDriver::Wait.new(timeout: 10)
end

Instance Method Details

#scrape(keyword, limit = nil) ⇒ Object



16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# File 'lib/google_image_scraper/scraper.rb', line 16

def scrape(keyword, limit = nil)
  @driver.get search_page_url(keyword)
  img_end = limit&.-(1) || -1
  start = 0

  loop do
    # After 2nd time of this execution, html_elements are increased than previous execution.
    # This is because search page uses continuous loading triggered by scroll point.
    elements = @driver.find_elements(css: '.mJxzWe img')
    break if start == elements.size

    download_images elements[start..img_end]

    start = elements.size
    elements.last.click unless limit
  end
end