Class: RHACK::Scout

Inherits:
Object show all
Defined in:
lib/rhack.rb,
lib/rhack/scout.rb

Direct Known Subclasses

Proxy::Interceptor

Defined Under Namespace

Classes: ProxyError

Constant Summary collapse

DefaultHeader =
{
    "Expect"	              => "",
    "Keep-Alive"	        => "300",
    "Accept-Charset"	=> "windows-1251,utf-8;q=0.7,*;q=0.7",
    "Accept-Language"	=> "ru,en-us;q=0.7,en;q=0.3",
    "Connection"	        => "keep-alive"
}
@@retry =
scout.retry.b || {}
@@timeout =
scout.timeout.b || 60
@@cacert =
scout.cacert.b ? File.expand_path(scout.cacert) : File.expand_path('../../config/cacert.pem', __FILE__)

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(*argv) ⇒ Scout

Returns a new instance of Scout.

Raises:



27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/rhack/scout.rb', line 27

def initialize(*argv)
  uri, proxy, @ua, @refforge, opts = argv.get_opts ['http://', nil, :rand, 1]
  raise ProxyError, proxy if proxy and (!webproxy && !proxy.is(Array) or webproxy && !proxy.is(String))
  'http://' >> uri if uri !~ /^\w+:\/\//
  if proxy
    if proxy[1] and proxy[1].to_i == 0
      @webproxy	= eval("WebProxy::#{proxy[1]}")
      @proxy	    = proxy[0].parse(:uri).root
    else 
      proxy[0]	  = proxy[0].to_ip if proxy[0].is Integer
      @proxy	  = proxy
    end
  end
  @cookies    	= {}
  @body       	= {}
  @num    	    = []
  @cookies_enabled	= opts[:cp] || opts[:ck]
  @raise_err   	= opts[:raise] # no way to use @raise id, it makes any 'raise' call here fail
  @engine     	= opts[:engine]
  @timeout    	= opts[:timeout] || @@timeout || 60
  @post_proc	= @get_proc = @head_proc = @put_proc = @delete_proc = Proc::NULL
  update uri
  
  @retry = opts[:retry] || {}
  @retry = {@uri.host => @retry} if @retry.is Array
end

Instance Attribute Details

#bodyObject (readonly)

Returns the value of attribute body.



9
10
11
# File 'lib/rhack/scout.rb', line 9

def body
  @body
end

#cookiesObject (readonly)

Returns the value of attribute cookies.



10
11
12
# File 'lib/rhack/scout.rb', line 10

def cookies
  @cookies
end

#cookies_enabledObject (readonly)

Returns the value of attribute cookies_enabled.



10
11
12
# File 'lib/rhack/scout.rb', line 10

def cookies_enabled
  @cookies_enabled
end

#errorObject (readonly)

Returns the value of attribute error.



9
10
11
# File 'lib/rhack/scout.rb', line 9

def error
  @error
end

#headersObject (readonly)

Returns the value of attribute headers.



9
10
11
# File 'lib/rhack/scout.rb', line 9

def headers
  @headers
end

#httpObject (readonly)

Returns the value of attribute http.



9
10
11
# File 'lib/rhack/scout.rb', line 9

def http
  @http
end

#last_methodObject (readonly)

Returns the value of attribute last_method.



9
10
11
# File 'lib/rhack/scout.rb', line 9

def last_method
  @last_method
end

#pathObject

Returns the value of attribute path.



7
8
9
# File 'lib/rhack/scout.rb', line 7

def path
  @path
end

#proxyObject

Returns the value of attribute proxy.



7
8
9
# File 'lib/rhack/scout.rb', line 7

def proxy
  @proxy
end

#proxystrObject (readonly)

Returns the value of attribute proxystr.



9
10
11
# File 'lib/rhack/scout.rb', line 9

def proxystr
  @proxystr
end

#raise_errObject

Returns the value of attribute raise_err.



6
7
8
# File 'lib/rhack/scout.rb', line 6

def raise_err
  @raise_err
end

#refforgeObject (readonly)

Returns the value of attribute refforge.



10
11
12
# File 'lib/rhack/scout.rb', line 10

def refforge
  @refforge
end

#retryObject

Returns the value of attribute retry.



6
7
8
# File 'lib/rhack/scout.rb', line 6

def retry
  @retry
end

#rootObject

Returns the value of attribute root.



7
8
9
# File 'lib/rhack/scout.rb', line 7

def root
  @root
end

#sldObject

Returns the value of attribute sld.



7
8
9
# File 'lib/rhack/scout.rb', line 7

def sld
  @sld
end

#timeoutObject

Returns the value of attribute timeout.



6
7
8
# File 'lib/rhack/scout.rb', line 6

def timeout
  @timeout
end

#uaObject (readonly)

Returns the value of attribute ua.



10
11
12
# File 'lib/rhack/scout.rb', line 10

def ua
  @ua
end

#uriObject (readonly)

Returns the value of attribute uri.



8
9
10
# File 'lib/rhack/scout.rb', line 8

def uri
  @uri
end

#webproxyObject (readonly)

Returns the value of attribute webproxy.



9
10
11
# File 'lib/rhack/scout.rb', line 9

def webproxy
  @webproxy
end

Instance Method Details

#available?Boolean

Scout must not be reused until not only response will have come, but callback will have been processed, too. Otherwise, #retry! may not work as expected: if a scout gets callback as a block argument, then it may re-run not original callback, but it’s copy with another scope.

Returns:

  • (Boolean)


238
239
240
# File 'lib/rhack/scout.rb', line 238

def available?
  !loaded? and !@busy
end

#cp_offObject



201
# File 'lib/rhack/scout.rb', line 201

def cp_off() @cookies_enabled = false end

#cp_onObject



200
# File 'lib/rhack/scout.rb', line 200

def cp_on() @cookies_enabled = true end

#dumpObject



110
111
112
113
114
115
# File 'lib/rhack/scout.rb', line 110

def dump
  str = "IP: #{@proxystr}\nRequest: "
  str << ({"Action"=>@root+@path} + @http.headers).dump+@body.dump+"Response: #{res}"
  str << "\nReady" if @ready
  str
end

#expand(uri) ⇒ Object



128
129
130
131
132
133
134
# File 'lib/rhack/scout.rb', line 128

def expand(uri)
  if !@webproxy || @http.last_effective_url
    path = (@http.last_effective_url ? @http.last_effective_url.parse(:uri) : @uri).path
    return uri.sub(/^(\.\.?\/)?/, File.split(uri =~ /^\.\./ ? File.split(path)[0] : path)[0])
  end
  uri
end

#fix(path) ⇒ Object



117
118
119
120
121
122
123
124
125
126
# File 'lib/rhack/scout.rb', line 117

def fix(path)
  path = path.tr ' ', '+'
  path = expand path if path =~ /^\./
  if update(path) or @uri.root
    path = @webproxy.encode(path) if @webproxy
  else
    path = @webproxy.encode(@root+path) if @webproxy
  end
  path
end

#load(path = @path, headers = {}, not_redir = 1, relvl = 10, &callback) ⇒ Object



281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
# File 'lib/rhack/scout.rb', line 281

def load(path=@path, headers={}, not_redir=1, relvl=10, &callback)
  @busy = true
  # cache preprocessed data for one time so we can do #retry
  @__path = path
  @__headers = headers
  @__not_redir = not_redir
  @__relvl = relvl
  @__callback = callback
  
  @http.path = path = fix(path)
  @http.headers = mkHeader(path).merge!(headers)
  @http.timeout = @timeout

  @http.on_complete {|curl| # = @http
    # @http has already been removed when a request had complete,
    # but this callback may occure wherever in a serial queue of curl callbacks.
    @error = nil
    # While not outdated, Curl::Response here may contain pointers on freed
    # memory, thus throwing exception on #to_s and #inspect
    @http.outdate!
    res = @http.res
    process_cookies res if @cookies_enabled
    # We cannot just cancel on_complete in on_redirect block,
    # because loadGet should (and will) immediately reset on_complete back.
    if res.code.in(300..399) and !not_redir.b and (relvl -= 1) > -1 and loc = res.hash.location
      loadGet(loc, headers: headers, relvl: relvl, redir: true, &callback)
    else
      yield @http if block_given?
      # Now, we assume that data of this @http have been copied or will not be used anymore,
      # thus the scout can be reused.
      @busy = false
      @http.on_failure &Proc::NULL
    end
  }
  # Curl::Err::* (TCP/IP level) exception callback.
  # May be set out there.
  @http.on_failure {|curl, error|
    process_failure(*error)
  } unless @http.on_failure
  
  load!
end

#load!Object



265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
# File 'lib/rhack/scout.rb', line 265

def load!
  unless Curl.carier.add @http
    L.warn "#{self}##{object_id}: Failed to add Curl::Easy##{@http.object_id} to Curl::Multi##{Curl.carier.object_id}. Trying to remove it and re-add."
    Curl.carier.remove @http
    Curl.сarier.add @http
  end
rescue RuntimeError => e
  e.message << ". Failed to load allready loaded? easy handler: Bad file descriptor" unless Curl::Err::CurlError === e
  L.warn "#{self}##{object_id}: #{e.inspect}: #{e.message}"
  if loaded?
    Curl.carier.remove @http
  end
  sleep 1
  load!
end

#loadDelete(*argv, &callback) ⇒ Object



337
338
339
340
341
342
343
344
345
346
347
348
# File 'lib/rhack/scout.rb', line 337

def loadDelete(*argv, &callback)
  uri, opts = argv.get_opts [@path], 
                 :headers => {}, :redir => false, :relvl => 2
  @http.delete = true
  @last_method	= :delete
  if block_given?
    @delete_proc	= callback
  else#if @http.callback != @post_proc
    callback = @delete_proc 
  end
  load(uri, opts[:headers], !opts[:redir], opts[:relvl], &callback)
end

#loaded?Boolean

Returns:

  • (Boolean)


229
230
231
# File 'lib/rhack/scout.rb', line 229

def loaded?
  Curl.carier.reqs.include? @http
end

#loadGet(*argv, &callback) ⇒ Object



324
325
326
327
328
329
330
331
332
333
334
335
# File 'lib/rhack/scout.rb', line 324

def loadGet(*argv, &callback)
  uri, opts = argv.get_opts [@path], 
                 :headers => {}, :redir => false, :relvl => 2
  @http.get	    = true
  @last_method	= :get
  if block_given?
    @get_proc	  = callback
  else#if @http.callback != @get_proc
    callback = @get_proc 
  end
  load(uri, opts[:headers], !opts[:redir], opts[:relvl], &callback)
end

#loadHead(*argv, &callback) ⇒ Object



380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
# File 'lib/rhack/scout.rb', line 380

def loadHead(*argv, &callback)
  uri, emulate, headers = argv.get_opts [@path, :if_retry]
  @http.head	  = true if emulate != :always
  @last_method	= :head
  if block_given?
    @head_proc	= callback
  else#if @http.callback != @head_proc
    callback = @head_proc 
  end
  emu = lambda {
    @headers = ''
    @http.on_header {|h|
      @headers << h
      h == "\r\n" ? 0 : h.size
    }
    @http.get	    = true
    load(uri, headers) {|c| c.on_header; callback[c]}
  }
  if emulate != :always
    load(uri, headers) {|c|
      if !@error and c.res.code != 200 and emulate == :if_retry
        emu.call
      else
        callback[c]
      end
    } 
  else emu.call
  end
end

#loadPost(*argv, &callback) ⇒ Object



350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
# File 'lib/rhack/scout.rb', line 350

def loadPost(*argv, &callback)
  hash, multipart, uri, opts = argv.get_opts [@body, @http.multipart_form_post?, @path], :headers => {}, :redir => false, :relvl => 2
  @http.delete = false
  unless hash.is Hash # not parameterized
    opts[:headers] = opts[:headers].reverse_merge 'Content-Type' => 'application/octet-stream'
  end
  mkBody hash, multipart.b
  @last_method	= :post
  if block_given?
    @post_proc	= callback
  else#if @http.callback != @post_proc
    callback = @post_proc 
  end
  load(uri, opts[:headers], !opts[:redir], opts[:relvl], &callback)
end

#loadPut(*argv, &callback) ⇒ Object



366
367
368
369
370
371
372
373
374
375
376
377
378
# File 'lib/rhack/scout.rb', line 366

def loadPut(*argv, &callback)
  body_or_file, uri, opts = argv.get_opts [@body, @path], 
                         :headers => {}, :redir => false, :relvl => 2
  @http.delete = false
  @http.put_data = @body = body_or_file
  @last_method	= :put
  if block_given?
    @put_proc	= callback
  else#if @http.callback != @post_proc
    callback = @put_proc 
  end
  load(uri, opts[:headers], !opts[:redir], opts[:relvl], &callback)
end

#main_cksObject



203
# File 'lib/rhack/scout.rb', line 203

def main_cks() @cookies[@uri.host] ||= {} end

#main_cks=(cks) ⇒ Object



204
205
206
207
208
# File 'lib/rhack/scout.rb', line 204

def main_cks=(cks)
  @cookies[@uri.host] = @webproxy ? 
    @webproxy.ck_encode(@root, cks) : 
    cks.map2 {|k, v| Cookie(k, v)}   
end

#mkBody(params, multipart = false) ⇒ Object



136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
# File 'lib/rhack/scout.rb', line 136

def mkBody(params, multipart=false)
  if multipart
    @http.multipart_post_body = @body = params.map {|k, v|
      v = v.call if v.is Proc
      if v[%r(^file://(.+))] or v.is Hash
        path = $1 || v[:path]
        name = v.is(Hash) && v[:name] ||
          File.basename(path)
        content_type = v.is(Hash) && v[:content_type].to_s ||
          (MIME::Types.of(path)[0] || {}).content_type ||
          "application/octet-stream"
        Curl::PostField.file(k, type, name, read(path))
      else
        Curl::PostField.content(k.to_s, v.to_s)
      end
    }
  else
    @http.post_body = case params
    when IO
      @body = params.read
      params.close
      @body
    when String
      @body = if params[%r(^file://(.+))]
        read $1
      else
        params
      end
    else
      @body = params.urlencode
    end
  end
end

#mkHeader(uri) ⇒ Object



170
171
172
173
174
175
176
177
178
179
180
181
182
183
# File 'lib/rhack/scout.rb', line 170

def mkHeader(uri)
  header = DefaultHeader.dup
  if @cookies_enabled
    cookies = ''
    main_cks.each {|k, v| main_cks.delete k if v.use(cookies, @uri) == :expired}
    header['Cookie'] = cookies[0..-3]                                 
  end
  if @refforge
    ref = @uri.root ? uri : (@webproxy ? @http.host : @root)+uri
    header['Referer'] = ref.match(/(.+)[^\/]*$/)[1]           
  end
  header['User-Agent'] = @ua == :rand ? RHACK.useragents.rand : @ua if @ua
  header
end

#process_cookies(res) ⇒ Object



185
186
187
188
189
190
191
192
193
194
195
196
197
198
# File 'lib/rhack/scout.rb', line 185

def process_cookies(res)
  ck = []
  case res
    when String
      res.split(/\r?\n/).each {|h|
        hs = h/': '
        ck << hs[1] if hs[0] and hs[0].downcase! == 'set-cookie'
      }
    when Curl::Response
      ck = res['cookies']
  end
  return if !ck.b
  ck.each {|c| Cookie(c, self)}
end

#process_failure(curl_err, message, &callback) ⇒ Object

  • if curl should retry request based on Curl::Err class only

    > false



244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
# File 'lib/rhack/scout.rb', line 244

def process_failure(curl_err, message, &callback)
  @error = curl_err.new message
  #@error = [curl_err, message] # old
  @http.outdate!
  # we must clean @http.on_complete, otherwise
  # it would run right after this function and with broken data
  @http.on_complete &Proc::NULL
  if retry? curl_err
    L.debug "#{curl_err} -> reloading scout"
    retry!
  else
    L.debug "#{curl_err} -> not reloading scout"
    raise @error if @raise_err
    #raise *@error if @raise_err # old
    yield if block_given?
    # Now, we assume that data of this @http have been copied or will not be used anymore,
    # thus the scout can be reused.
    @busy = false
  end
end

#reqObject



106
107
108
# File 'lib/rhack/scout.rb', line 106

def req 
  res.req
end

#resObject



102
103
104
# File 'lib/rhack/scout.rb', line 102

def res
  @http.res
end

#retry!(path = @__path, headers = @__headers, not_redir = @__not_redir, relvl = @__relvl, callback = @__callback) ⇒ Object



222
223
224
225
226
227
# File 'lib/rhack/scout.rb', line 222

def retry!(path=@__path, headers=@__headers, not_redir=@__not_redir, relvl=@__relvl, callback=@__callback)
  # all external params including post_body are still set
  setup_curl # @http reload here
  # and now we can set @http.on_complete back again
  load(path, headers, not_redir, relvl, &callback)
end

#retry?(curl_err) ⇒ Boolean

Returns:

  • (Boolean)


210
211
212
213
214
215
216
217
218
219
220
# File 'lib/rhack/scout.rb', line 210

def retry?(curl_err)
  # sites = ['0chan.ru', '2-ch.ru', 'www.nomer.org', 'nomer.org'].select_in('http://www.nomer.org') = ['www.nomer.org', 'nomer.org']
  sites = (@@retry.keys + @retry.keys).select_in @root
  return false if sites.empty?
  errname = curl_err.self_name
  # retry = ['www.nomer.org', 'nomer.org'].any? {|www| {'nomer.org' => ['TimeoutError']}[www].include? 'TimeoutError'}
  sites.any? {|site|
    (@@retry[site] || []).include? errname or 
    (@retry[site] || []).include? errname
  }
end

#setup_curlObject



54
55
56
57
58
59
60
61
# File 'lib/rhack/scout.rb', line 54

def setup_curl
  if loaded?
    Curl.carier.remove @http
  end
  @http = Curl::Easy(@webproxy ? @proxy : @root)
  @http.base = self       
  @http.cacert = @@cacert
end

#to_sObject Also known as: inspect



91
92
93
94
95
96
97
98
99
# File 'lib/rhack/scout.rb', line 91

def to_s
  str = "<##{self.class.self_name} @ "
  if @webproxy
    str << "#{@proxy} ~ "
  elsif @proxy
    str << @proxy*':'+" ~ " 
  end
  str << @root+'>'
end

#update(uri) ⇒ Object



63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# File 'lib/rhack/scout.rb', line 63

def update(uri)
  if !uri[/^\w+:\/\//]
    uri = '/' + uri if uri[0,1] != '/'
    @uri = uri.parse:uri
    return                       
  end
  @uri = uri.parse:uri
  return if @uri.root == @root
  @root	= @uri.root
  @sld	  = @root[/[\w-]+\.[a-z]+$/]
  @path	= @uri.fullpath
  if @http
    @http.url = @webproxy ? @proxy : @root
  else
    setup_curl
  end
  if @proxy
    @http.proxy_url = @proxy*':' if !@webproxy
    @proxystr = @webproxy ? @proxy[0] : @http.proxy_url
  else @proxystr = 'localhost' 
  end
  if @cookies_enabled.is Hash
    self.main_cks = @cookies_enabled
    @cookies_enabled = true    
  end
  self
end