Module: Tap::Mechanize::Utils

Included in:
Capture
Defined in:
lib/tap/mechanize/utils.rb

Constant Summary collapse

EOL =
Rack::Utils::Multipart::EOL

Class Method Summary collapse

Class Method Details

.each_member(obj) ⇒ Object

Yields each member of an input array to the block and collects the result. If obj is not an array, the value is simply yielded to the block.



161
162
163
164
165
166
167
# File 'lib/tap/mechanize/utils.rb', line 161

def each_member(obj)
  if obj.kind_of?(Array)
    obj.collect {|value| yield(value) }
  else
    yield(obj)
  end
end

.headerize(str) ⇒ Object

Headerizes an underscored string. The input is be converted to a string using to_s.

headerize('SOME_STRING')   # => 'Some-String'
headerize('some string')   # => 'Some-String'
headerize('Some-String')   # => 'Some-String'


195
196
197
198
199
200
# File 'lib/tap/mechanize/utils.rb', line 195

def headerize(str)
  str.to_s.gsub(/\s|-/, "_").split("_").collect do |s|
    s =~ /^(.)(.*)/
    $1.upcase + $2.downcase
  end.join("-")
end

.parse_http_request(socket, keep_content = true) ⇒ Object

Parses a WEBrick::HTTPRequest from the input socket into a hash that may be resubmitted by Dispatch. Sockets can be any kind of IO (File, StringIO, etc..) and should be positioned such that the next line is the start of an HTTP request. Strings used as sockets are converted into StringIO objects.

parse_http_request("GET /path HTTP/1.1\n")
# => {
# :request_method => "GET",
# :url => "/path",
# :version => "1.1",
# :headers => {},
# :params => {},
# }

WEBrick parsing of HTTP format

WEBrick will parse headers then the body of a request, and currently (1.8.6) considers an empty line as a break between the headers and body. In general header parsing is forgiving with end-line characters (ie “rn” and “n” are both acceptable) but parsing of multipart/form data IS NOT.

Multipart/form data REQUIRES that the end-line characters are “rn”. A boundary is always started with “–” and the last boundary completed with “–”. As always, the content-length must be correct.

# Notice an empty line between the last header 
# (in this case 'Content-Length') and the body.
msg = <<-_end_of_message_
POST /path HTTP/1.1
Host: localhost:8080
Content-Type: multipart/form-data; boundary=1234567890
Content-Length: 158

--1234567890
Content-Disposition: form-data; name="one"

value one
--1234567890
Content-Disposition: form-data; name="two"

value two
--1234567890--
_end_of_message_

# ensure the end of line characters are correct...
socket = StringIO.new msg.gsub(/\n/, "\r\n")

Tap::Net.parse_http_request(socket)
# => {
# :request_method => "POST",
# :url => "http://localhost:8080/path",
# :version => "HTTP/1.1",
# :headers => {
#   "Host" => "localhost:8080",
#   "Content-Type" => "multipart/form-data; boundary=1234567890", 
#   "Content-Length" => "158"},
# :params => {
#   "one" => "value one", 
#   "two" => "value two"}}

– TODO: check if there are other headers to capture from a multipart/form file. Currently only ‘Filename’ and ‘Content-Type’ are added



76
77
78
79
80
81
82
83
# File 'lib/tap/mechanize/utils.rb', line 76

def parse_http_request(socket, keep_content=true)
  socket = StringIO.new(socket) if socket.kind_of?(String)
  
  req = WEBrick::HTTPRequest.new(WEBrick::Config::HTTP)
  req.parse(socket)

  parse_webrick_request(req, keep_content)
end

.parse_multipart(env) ⇒ Object

Lifted from Rack::Utils::Multipart, and modified to collect overloaded params and params with names suffixed by ‘[]’ as arrays.



206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
# File 'lib/tap/mechanize/utils.rb', line 206

def parse_multipart(env)
  unless env['CONTENT_TYPE'] =~
    %r|\Amultipart/form-data.*boundary=\"?([^\";,]+)\"?|n
    nil
  else
    boundary = "--#{$1}"

    params = {}
    buf = ""
    content_length = env['CONTENT_LENGTH'].to_i
    input = env['rack.input']

    boundary_size = boundary.size + EOL.size
    bufsize = 16384

    content_length -= boundary_size

    status = input.read(boundary_size)
    raise EOFError, "bad content body"  unless status == boundary + EOL

    rx = /(?:#{EOL})?#{Regexp.quote boundary}(#{EOL}|--)/

    loop {
      head = nil
      body = ''
      filename = content_type = name = nil

      until head && buf =~ rx
        if !head && i = buf.index("\r\n\r\n")
          head = buf.slice!(0, i+2) # First \r\n
          buf.slice!(0, 2)          # Second \r\n

          filename = head[/Content-Disposition:.* filename="?([^\";]*)"?/ni, 1]
          content_type = head[/Content-Type: (.*)\r\n/ni, 1]
          name = head[/Content-Disposition:.* name="?([^\";]*)"?/ni, 1]

          if filename
            body = Tempfile.new("RackMultipart")
            body.binmode  if body.respond_to?(:binmode)
          end

          next
        end

        # Save the read body part.
        if head && (boundary_size+4 < buf.size)
          body << buf.slice!(0, buf.size - (boundary_size+4))
        end

        c = input.read(bufsize < content_length ? bufsize : content_length)
        raise EOFError, "bad content body"  if c.nil? || c.empty?
        buf << c
        content_length -= c.size
      end

      # Save the rest.
      if i = buf.index(rx)
        body << buf.slice!(0, i)
        buf.slice!(0, boundary_size+2)

        content_length = -1  if $1 == "--"
      end

      if filename
        body.rewind
        data = {:filename => filename, :type => content_type,
          :name => name, :tempfile => body, :head => head}
      else
        data = body
      end

      if name
        case current = params[name]
        when nil 
          params[name] = (name =~ /\[\]\z/ ? [data] : data)
        when Array 
          params[name] << data
        else 
          params[name] = [current, data]
        end
      end

      break  if buf.empty? || content_length == -1
    }

    params
  end
end

.parse_rack_request(request, request_params, keep_content = true) ⇒ Object

Parses a Rack::Request, with the same activity as parse_http_request.



123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# File 'lib/tap/mechanize/utils.rb', line 123

def parse_rack_request(request, request_params, keep_content=true)
  headers = {}
  request.env.each_pair do |key, value|
    key = case key
    when "HTTP_VERSION" then next
    when /^HTTP_(.*)/ then $1
    when 'CONTENT_TYPE' then key
    else next
    end

    headers[headerize(key)] = value
  end

  params = {}
  request_params.each_pair do |key, value|
    params[key] = each_member(value) do |obj|
      if obj.kind_of?(Hash)
        file = {'Content-Type' => obj[:type], 'Filename' => obj[:filename]}
        file['Content'] = obj[:tempfile].read if keep_content
        file
      else
        obj
      end
    end
  end

  { 
    :uri => File.join("http://", headers['Host'], request.env['PATH_INFO']), 
    :request_method => request.request_method,
    :version => request.env['HTTP_VERSION'] =~ /^HTTP\/(.*)$/ ? $1.to_f : request.env['HTTP_VERSION'],
    :headers => headers, 
    :params => params
  }
end

.parse_webrick_request(req, keep_content = true) ⇒ Object

Parses a WEBrick::HTTPRequest, with the same activity as parse_http_request.



87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# File 'lib/tap/mechanize/utils.rb', line 87

def parse_webrick_request(req, keep_content=true)
  headers = {}
  req.header.each_pair do |key, values| 
    headers[headerize(key)] = splat(values)
  end if req.header
  
  params = {}
  req.query.each_pair do |key, value|
    # no sense for how robust this is...
    # In tests value is (always?) a WEBrick::HTTPUtils::FormData. Each 
    # data is likewise a FormData.  If FormData is a file, it has a 
    # filename and you have to try [] to get the content-type.  
    # Senseless.  No wonder WEBrick has no documentation, who could
    # write it?
    values = []
    value.each_data do |data|
      values << if data.filename
        hash = {'Filename' => data.filename, 'Content-Type' => data['Content-Type']}
        hash['Content'] = data.to_a.join("\n") if keep_content
        hash
      else
        data.to_s
      end
    end
     
    params[key] = splat(values)
  end if req.query
  
  { :uri => headers['Host'] ? File.join("http://", headers['Host'], req.path_info) : req.path_info,
    :request_method => req.request_method,
    :version => req.http_version.to_s,
    :headers => headers, 
    :params => params}
end

.splat(array) ⇒ Object

Returns the first member of arrays length <= 1, or the array in all other cases. Splat is useful to simplify hashes of http headers and parameters that may have multiple values, but typically only have one.

splat([])                  # => nil
splat([:one])              # => :one
splat([:one, :two])        # => [:one, :two]


178
179
180
181
182
183
184
185
186
# File 'lib/tap/mechanize/utils.rb', line 178

def splat(array)
  return array unless array.kind_of?(Array)
  
  case array.length
  when 0 then nil
  when 1 then array.first
  else array
  end
end