Class: Pioneer::Request

Inherits:
Object
  • Object
show all
Defined in:
lib/pioneer/request.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(url, pioneer, counter = 0) ⇒ Request

Returns a new instance of Request.



6
7
8
9
10
11
# File 'lib/pioneer/request.rb', line 6

def initialize(url, pioneer, counter=0)
  @pioneer = pioneer
  @url     = parse_url(url)
  @counter = counter
  @request_opts = @pioneer.request_opts
end

Instance Attribute Details

#counterObject (readonly)

Returns the value of attribute counter.



4
5
6
# File 'lib/pioneer/request.rb', line 4

def counter
  @counter
end

#errorObject (readonly)

Returns the value of attribute error.



4
5
6
# File 'lib/pioneer/request.rb', line 4

def error
  @error
end

#pioneerObject (readonly)

Returns the value of attribute pioneer.



4
5
6
# File 'lib/pioneer/request.rb', line 4

def pioneer
  @pioneer
end

#request_optsObject (readonly)

Returns the value of attribute request_opts.



4
5
6
# File 'lib/pioneer/request.rb', line 4

def request_opts
  @request_opts
end

#responseObject (readonly)

Returns the value of attribute response.



4
5
6
# File 'lib/pioneer/request.rb', line 4

def response
  @response
end

#resultObject (readonly)

Returns the value of attribute result.



4
5
6
# File 'lib/pioneer/request.rb', line 4

def result
  @result
end

#urlObject (readonly)

Returns the value of attribute url.



4
5
6
# File 'lib/pioneer/request.rb', line 4

def url
  @url
end

Instance Method Details

#handle_request_error_or_return_resultObject

Handle base fatal request error If we have got connection error or whatever

we will fire either Exception or call "if_request_error" if exists


26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# File 'lib/pioneer/request.rb', line 26

def handle_request_error_or_return_result
  begin
    req = EM::HttpRequest.new(url, @request_opts).aget pioneer.http_opts
    if pioneer.headers
      req.headers{
        pioneer.headers.call(req)
      }
    end
    @response = EM::Synchrony.sync req
  rescue => e
    @error = "Request totaly failed. Url: #{url}, error: #{e.message}"
    pioneer.logger.fatal(@error)
    if pioneer.respond_to? :if_request_error
      return pioneer.if_request_error(self)
    else
      raise Pioneer::HttpRequestError, @error
    end
  end
  handle_response_error_or_return_result
end

#handle_response_error_or_return_resultObject

Handle http error If we can’t make proper response we will ether fire Exception

or call "if_response_error" if exists


52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/pioneer/request.rb', line 52

def handle_response_error_or_return_result
  if response.error
    @error = "Response for #{url} get an error: #{response.error}"
    pioneer.logger.error(@error)
    if pioneer.respond_to? :if_response_error
      return pioneer.if_response_error(self)
    else
      raise Pioneer::HttpResponseError, error
    end
  end
  handle_status_or_return_result
end

#handle_status_or_return_resultObject

Handle wrong status or run “processing” If status is not 200 we will either do nothing (?)

or call "if_status_XXX" if exist
or "if_status_not_200"


71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# File 'lib/pioneer/request.rb', line 71

def handle_status_or_return_result
  status = response.response_header.status
  case status
  when 200
    pioneer.processing(self)
  else
    @error = "This #{url} returns this http status: #{status}"
    pioneer.logger.error(@error)
    if pioneer.respond_to? "if_status_#{status}".to_sym
      pioneer.send("if_status_#{status}", self)
    elsif pioneer.respond_to? :if_status_not_200
      pioneer.if_status_not_200(self)
    else
      nil # nothing?
    end
  end
end

#parse_url(url) ⇒ Object

We should parse url befor sending request We use URI.escape for escaping IMPORTAINT: We should replace ampersand (&) in params with “&” !!! Pluses (+) will be replaced with “%2B”



114
115
116
117
118
119
120
121
122
# File 'lib/pioneer/request.rb', line 114

def parse_url(url)
  url = "http://" + url unless url =~ /http/
  url = URI.escape(url)
  # replace "&" ampersands :)
  url = url.gsub("&", "%26")
  # replace pluses
  url = url.gsub("+", "%2B")
  url
end

#performObject

Request processing



16
17
18
19
# File 'lib/pioneer/request.rb', line 16

def perform
  pioneer.logger.info("going to #{url}")
  @result = handle_request_error_or_return_result
end

#response_bodyObject

Shortcut for response.response



127
128
129
# File 'lib/pioneer/request.rb', line 127

def response_body
  response.response
end

#response_headerObject

Shortcut for response.response_header



134
135
136
# File 'lib/pioneer/request.rb', line 134

def response_header
  response.response_header
end

#retry(count = nil) ⇒ Object

We can call retry from crawler like “req.retry” If count is seted, so it will retry it not more then “count” times



93
94
95
96
97
98
# File 'lib/pioneer/request.rb', line 93

def retry(count=nil)
  if count
    skip if @counter >= count
  end
  raise Pioneer::HttpRetryRequest
end

#skipObject

We can skip request from crawler like “req.skip” I.E. if response_body is blank or 404 error



104
105
106
# File 'lib/pioneer/request.rb', line 104

def skip
  raise Pioneer::HttpSkipRequest
end