Module: HTTPHelper

Includes:
TextHelper
Included in:
TitleGrabber::MultiThreadedGrabber
Defined in:
lib/http_helper.rb

Constant Summary collapse

INVALID_BYTE_SEQ =
"invalid byte sequence".freeze
CONNECTION_ERRORS =
["SSL_connect", "Connection reset",
"execution expired"].freeze
REST_INTERVAL =
0.5..1
REDIR_FORBIDDEN =

redirection forbidden: t.co/blui5zKJjD -> bit.ly/2HXRYGw (RuntimeError)

/redirection forbidden/
HTTP_REDIR =
%r(-> (http://\S+))
SRV_UNAVAILABLE =
503

Constants included from TextHelper

TextHelper::SINGLE_SPACE

Instance Method Summary collapse

Methods included from TextHelper

#clean_up_whitespace, #utf8_encode

Instance Method Details

#open_w_timeout(url, connect_to:, read_to:, max_retries:) ⇒ Object



19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/http_helper.rb', line 19

def open_w_timeout(url, connect_to:, read_to:, max_retries:)
  logger.info "[#{Thread.current.name}] GET #{url}"
  retries = 0

  begin
    Timeout.timeout(read_to) {
      open(url, ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE,
                open_timeout: connect_to, read_timeout: read_to)
    }
  rescue => err
    msg = err.message
    if msg =~ REDIR_FORBIDDEN
      url = msg[HTTP_REDIR, 1]
      url ? retry : return
    end

    if err.is_a?(OpenURI::HTTPError) || err.is_a?(Timeout::Error) ||
         CONNECTION_ERRORS.any? { |e| msg.start_with?(e) }
      retries += 1

      will_retry = retries <= max_retries
      if will_retry && err.is_a?(OpenURI::HTTPError)
        will_retry = Integer(err.io.status.first) == SRV_UNAVAILABLE
      end

      if will_retry
        rest_time = rand(REST_INTERVAL)
        logger.warn "[#{Thread.current.name}] URL: #{url} [#{msg}] - Going to sleep for #{rest_time.round(1)} secs - Retry ##{retries}"
        sleep(rest_time)
        retry
      else
        logger.error "[#{Thread.current.name}] URL: #{url} [#{msg}]"
      end
    else
      logger.error "[#{Thread.current.name}] URL: #{url} [#{msg}]"
    end

    nil
  end
end

#read_w_timeout(url, connect_to:, read_to:, max_retries:) ⇒ Object



60
61
62
63
64
65
# File 'lib/http_helper.rb', line 60

def read_w_timeout(url, connect_to:, read_to:, max_retries:)
  if res = open_w_timeout(url, connect_to: connect_to, read_to: read_to,
                               max_retries: max_retries)
    [res.base_uri.to_s, utf8_encode(res.read)]
  end
end