Module: ScrapeHelper

Included in:
BrilliantWebScraper
Defined in:
lib/scraper/scrape_helper.rb

Overview

Scrapes below data

Instance Method Summary collapse

Instance Method Details

#perform_scrape(url, read_timeout, open_timeout) ⇒ Object



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# File 'lib/scraper/scrape_helper.rb', line 9

def perform_scrape(url, read_timeout, open_timeout)
  timeout_in_sec = scraper_timeout(read_timeout, open_timeout)
  Timeout::timeout(timeout_in_sec) do
    response = ScrapeRequest.new(url, read_timeout, open_timeout)
    retry_count = 0
    body = response.body
    begin
      body = body.tr("\000", '')
      encoding = body.detect_encoding[:encoding]
      body = body.encode('UTF-8', encoding)
      grep_data(body)
    rescue Encoding::UndefinedConversionError, ArgumentError => e
      retry_count += 1
      raise WebScraper::ParserError, e.message if retry_count > 1
      body = body.encode('UTF-8', invalid: :replace, undef: :replace, replace: '?')
      retry
    rescue Encoding::CompatibilityError => e
      raise WebScraper::ParserError, e.message
    rescue StandardError => e
      raise WebScraper::RequestError, e.message
    end
  end
rescue Timeout::Error => e
  raise WebScraper::TimeoutError, e.message
end