Class: TheMask::Socket

Inherits:
Object
  • Object
show all
Defined in:
lib/the_mask/socket.rb

Constant Summary collapse

DEFAULT_OPEN_TIMEOUT =

TODO: Move from Mechanize to native Sockets ;)

3
DEFAULT_READ_TIMEOUT =

seconds

3
GENERAL_TIMEOUT =

seconds

5
MAXIMUM_TRIES =

seconds

3
MINIMUM_PAGE_LENGTH =

bytes

100
FORCE_READ =
false
RESET_USER_AGENT =
true

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ Socket

Returns a new instance of Socket.



12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# File 'lib/the_mask/socket.rb', line 12

def initialize(options = {})
  @proxies = nil
  @timeout = options[:timeout] || GENERAL_TIMEOUT
  @max_tries = options[:max_tries] || MAXIMUM_TRIES
  @force = options[:force] || FORCE_READ
  @min_page_length = options[:min_page_length] || MINIMUM_PAGE_LENGTH
  @reset_user_agent = options[:reset_ua] || RESET_USER_AGENT

  @agent = Mechanize.new

  @agent.open_timeout = options[:open_timeout] || DEFAULT_OPEN_TIMEOUT
  @agent.read_timeout = options[:read_timeout] || DEFAULT_READ_TIMEOUT

  unless options[:proxies]
    if options[:proxy]
      if options[:proxy][:username] &&  options[:proxy][:password]
        @agent.set_proxy options[:proxy][:ip], options[:proxy][:port], options[:proxy][:username], options[:proxy][:password]
      else
        @agent.set_proxy options[:proxy][:ip], options[:proxy][:port]
      end
    end
  else
    @proxies = TheMask::ProxyList.new(options[:proxies])
  end

  @agent.user_agent = TheMask.get_random_user_agent_str unless @reset_user_agent
end

Instance Method Details

#open_url(url) ⇒ Object



40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# File 'lib/the_mask/socket.rb', line 40

def open_url(url)
  read_proc = Proc.new do
    tries = 0
    page_data = nil
    begin
      tries += 1

      if !@force && tries > @max_tries
        raise "TheMask: maximum tries reached for URL = #{url} after #{tries} tries. Check the availability of the host or your proxy settings."
      end

      @agent.user_agent = TheMask.get_random_user_agent_str if @reset_user_agent

      unless @proxies.nil?
        proxy = @proxies.get_proxy

        if proxy.username && proxy.password
          @agent.set_proxy proxy.ip, proxy.port, proxy.username, proxy.password
        else
          @agent.set_proxy proxy.ip, proxy.port
        end
      end

      Timeout::timeout(@timeout) do
        page_data = @agent.get url
      end
    rescue Errno::ETIMEDOUT => e
      retry
    rescue Net::HTTP::Persistent::Error => e
      retry
    rescue Timeout::Error => e
      retry
    rescue SignalException => e
      retry
    rescue Net::HTTPNotFound => e
      retry
    rescue URI::InvalidURIError => e
      retry
    rescue Mechanize::ResponseCodeError => e
      retry
    rescue Net::OpenTimeout => e
      retry
    rescue Net::HTTPInternalServerError => e
      retry
    rescue
      retry
    end
    page_data
  end

  if @force
    while true
      data = read_proc.call

      unless data.nil? || data.body.to_s.empty? || data.body.to_s.length < @min_page_length
        return data.body
      end
    end
  end

  read_proc.call.body
end