Class: HTTPTools::Parser

Inherits:
Object
  • Object
show all
Defined in:
lib/http_tools/parser.rb

Overview

HTTPTools::Parser is a pure Ruby HTTP request & response parser with an evented API.

The HTTP message can be fed into the parser piece by piece as it comes over the wire, and the parser will call its callbacks as it works its way through the message.

Example:

parser = HTTPTools::Parser.new
parser.on(:header) do
  puts parser.status_code.to_s + " " + parser.message
  puts parser.header.inspect
end
parser.on(:finish) {print parser.body}

parser << "HTTP/1.1 200 OK\r\n"
parser << "Content-Length: 20\r\n\r\n"
parser << "<h1>Hello world</h1>"

Prints:

200 OK
{"Content-Length" => "20"}
<h1>Hello world</h1>

Constant Summary collapse

EMPTY =

:stopdoc:

"".freeze
COLON =
":".freeze
SPACE =
" ".freeze
KEY_TERMINATOR =
": ".freeze
CONTENT_LENGTH =
"Content-Length".freeze
TRANSFER_ENCODING =
"Transfer-Encoding".freeze
TRAILER =
"Trailer".freeze
CONNECTION =
"Connection".freeze
CLOSE =
"close".freeze
CHUNKED =
"chunked".freeze
REQUEST_METHOD =
"REQUEST_METHOD".freeze
PATH_INFO =
"PATH_INFO".freeze
QUERY_STRING =
"QUERY_STRING".freeze
SERVER_NAME =
"SERVER_NAME".freeze
SERVER_PORT =
"SERVER_PORT".freeze
HTTP_HOST =
"HTTP_HOST".freeze
RACK_INPUT =
"rack.input".freeze
PROTOTYPE_ENV =
{
"SCRIPT_NAME" => "".freeze,
"rack.version" => [1, 1].freeze,
"rack.url_scheme" => "http".freeze,
"rack.errors" => STDERR,
"rack.multithread" => false,
"rack.multiprocess" => false,
"rack.run_once" => false}.freeze
HTTP_ =
"HTTP_".freeze
LOWERCASE =
"a-z-".freeze
UPPERCASE =
"A-Z_".freeze
NO_HTTP_ =
{"CONTENT_LENGTH" => true, "CONTENT_TYPE" => true}.freeze
EVENTS =

:startdoc:

%W{header stream trailer finish error}.map {|e| e.freeze}.freeze

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeParser

:call-seq: Parser.new -> parser

Create a new HTTPTools::Parser.



88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# File 'lib/http_tools/parser.rb', line 88

def initialize
  @state = :start
  @buffer = @scanner = StringScanner.new("")
  @header = {}
  @trailer = {}
  @force_no_body = nil
  @allow_html_without_header = nil
  @force_trailer = nil
  @max_chunk_size = nil
  @status_code = nil
  @header_complete = nil
  @content_left = nil
  @chunked = nil
  @body = nil
  @header_callback = nil
  @stream_callback = method(:setup_stream_callback)
  @trailer_callback = nil
  @finish_callback = nil
  @error_callback = nil
end

Instance Attribute Details

#allow_html_without_headerObject

Allow responses with no status line or headers if it looks like HTML.



79
80
81
# File 'lib/http_tools/parser.rb', line 79

def allow_html_without_header
  @allow_html_without_header
end

#bodyObject (readonly)

Returns the value of attribute body.



69
70
71
# File 'lib/http_tools/parser.rb', line 69

def body
  @body
end

#force_no_bodyObject

Skip parsing the body, e.g. with the response to a HEAD request.



76
77
78
# File 'lib/http_tools/parser.rb', line 76

def force_no_body
  @force_no_body
end

#force_trailerObject

Force parser to expect and parse a trailer when Trailer header missing.



73
74
75
# File 'lib/http_tools/parser.rb', line 73

def force_trailer
  @force_trailer
end

#headerObject (readonly)

Returns the value of attribute header.



69
70
71
# File 'lib/http_tools/parser.rb', line 69

def header
  @header
end

#max_chunk_sizeObject

Max size for a ‘Transfer-Encoding: chunked’ body chunk. nil for no limit.



82
83
84
# File 'lib/http_tools/parser.rb', line 82

def max_chunk_size
  @max_chunk_size
end

#messageObject (readonly)

Returns the value of attribute message.



69
70
71
# File 'lib/http_tools/parser.rb', line 69

def message
  @message
end

#path_infoObject (readonly)

Returns the value of attribute path_info.



69
70
71
# File 'lib/http_tools/parser.rb', line 69

def path_info
  @path_info
end

#query_stringObject (readonly)

Returns the value of attribute query_string.



69
70
71
# File 'lib/http_tools/parser.rb', line 69

def query_string
  @query_string
end

#request_methodObject (readonly)

Returns the value of attribute request_method.



69
70
71
# File 'lib/http_tools/parser.rb', line 69

def request_method
  @request_method
end

#request_uriObject (readonly)

Returns the value of attribute request_uri.



69
70
71
# File 'lib/http_tools/parser.rb', line 69

def request_uri
  @request_uri
end

#stateObject (readonly)

:nodoc:



68
69
70
# File 'lib/http_tools/parser.rb', line 68

def state
  @state
end

#status_codeObject (readonly)

Returns the value of attribute status_code.



69
70
71
# File 'lib/http_tools/parser.rb', line 69

def status_code
  @status_code
end

#trailerObject (readonly)

Returns the value of attribute trailer.



69
70
71
# File 'lib/http_tools/parser.rb', line 69

def trailer
  @trailer
end

#versionObject (readonly)

Returns the value of attribute version.



69
70
71
# File 'lib/http_tools/parser.rb', line 69

def version
  @version
end

Instance Method Details

#add_listener(event, proc = nil, &block) ⇒ Object Also known as: on

:call-seq: parser.add_listener(event) {|arg| block} -> parser parser.add_listener(event, proc) -> parser parser.on(event) {|arg| block} -> parser parser.on(event, proc) -> parser

Available events are :header, :stream, :trailer, :finish, and :error.

Adding a second callback for an event will overwite the existing callback.

Events:

header

Called when headers are complete

stream

Supplied with one argument, the last chunk of body data fed in to the parser as a String, e.g. “<h1>Hello”. If no listener is set for this event the body can be retrieved with #body

trailer

Called on the completion of the trailer, if present

finish

Called on completion of the entire message. Any unconsumed data (such as the start of the next message with keepalive) can be retrieved with #rest

error

Supplied with one argument, an error encountered while parsing as a HTTPTools::ParseError. If a listener isn’t registered for this event, an exception will be raised when an error is encountered



293
294
295
296
# File 'lib/http_tools/parser.rb', line 293

def add_listener(event, proc=nil, &block)
  instance_variable_set(:"@#{event}_callback", proc || block)
  self
end

#concat(data) ⇒ Object Also known as: <<

:call-seq: parser.concat(data) -> parser parser << data -> parser

Feed data into the parser and trigger callbacks.

Will raise HTTPTools::ParseError on error, unless a callback has been set for the :error event, in which case the callback will recieve the error insted.



118
119
120
121
122
# File 'lib/http_tools/parser.rb', line 118

def concat(data)
  @buffer << data
  @state = send(@state)
  self
end

#envObject

:call-seq: parser.env -> hash or nil

Returns a Rack compatible environment hash. Will return nil if called before headers are complete.

“SERVER_NAME” and “SERVER_PORT” are only supplied if they can be determined from the request (e.g., they are present in the “Host” header).

“rack.input” is only supplied if #env is called after parsing the request has finsished, and no listener is set for the stream event

If not supplied, you must ensure “SERVER_NAME”, “SERVER_PORT”, and “rack.input” are present to make the environment hash fully Rack compliant



139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
# File 'lib/http_tools/parser.rb', line 139

def env
  return unless @header_complete
  env = PROTOTYPE_ENV.dup
  env[REQUEST_METHOD] = @request_method.upcase
  env[PATH_INFO] = @path_info
  env[QUERY_STRING] = @query_string
  @header.each do |key, value|
    upper_key = key.tr(LOWERCASE, UPPERCASE)
    upper_key[0,0] = HTTP_ unless NO_HTTP_.key?(upper_key)
    env[upper_key.freeze] = value
  end
  host, port = env[HTTP_HOST].split(COLON) if env.key?(HTTP_HOST)
  env[SERVER_NAME] = host if host
  env[SERVER_PORT] = port if port
  @trailer.each {|k, val| env[HTTP_ + k.tr(LOWERCASE, UPPERCASE)] = val}
  if @body || @stream_callback == method(:setup_stream_callback)
    env[RACK_INPUT] = StringIO.new(@body || "")
  end
  env
end

#finishObject

:call-seq: parser.finish -> parser

Used to notify the parser that the request has finished in a case where it can not be determined by the request itself.

For example, when a server does not set a content length, and instead relies on closing the connection to signify the body end.

until parser.finished?
  begin
    parser << socket.sysread(1024 * 16)
  rescue EOFError
    parser.finish
    break
  end
end

This method can not be used to interrupt parsing from within a callback.

Will raise HTTPTools::MessageIncompleteError if called too early, or HTTPTools::EndOfMessageError if the message has already finished, unless a callback has been set for the :error event, in which case the callback will recieve the error insted.



183
184
185
186
187
188
189
190
191
192
193
194
195
196
# File 'lib/http_tools/parser.rb', line 183

def finish
  if @state == :body_on_close
    @buffer = @scanner
    @state = end_of_message
  elsif @state == :body_chunked && @buffer.eos? && !@trailer_expected &&
    @header.any? {|k,v| CONNECTION.casecmp(k) == 0 && CLOSE.casecmp(v) == 0}
    @state = end_of_message
  elsif @state == :start && @buffer.string.length < 1
    raise EmptyMessageError.new("Message empty")
  else
    raise MessageIncompleteError.new("Message ended early")
  end
  self
end

#finished?Boolean

:call-seq: parser.finished? -> bool

Returns true when the parser has come to the end of the message, false otherwise.

Some HTTP servers may not supply the necessary information in the response to determine the end of the message (e.g., no content length) and insted close the connection to signify the end of the message, see #finish for how to deal with this.

Returns:

  • (Boolean)


208
209
210
# File 'lib/http_tools/parser.rb', line 208

def finished?
  @state == :end_of_message
end

#header?Boolean

:call-seq: parser.header? -> bool

Returns true when the parser has received the complete header, false otherwise.

Returns:

  • (Boolean)


217
218
219
# File 'lib/http_tools/parser.rb', line 217

def header?
  @header_complete
end

#inspectObject

:nodoc:



299
300
301
# File 'lib/http_tools/parser.rb', line 299

def inspect # :nodoc:
  super.sub(/ .*>$/, " #{posstr(false)} #{state}>")
end

#resetObject

:call-seq: parser.reset -> parser

Reset the parser so it can be used to process a new request. Callbacks will not be removed.



242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
# File 'lib/http_tools/parser.rb', line 242

def reset
  @state = :start
  @buffer.string.replace("")
  @buffer.reset
  @request_method = nil
  @path_info = nil
  @query_string = nil
  @request_uri = nil
  @version = nil
  @status_code = nil
  @header_complete = nil
  @header = {}
  @trailer = {}
  @last_key = nil
  @content_left = nil
  @chunked = nil
  @trailer_expected = nil
  @body = nil
  if @stream_callback == method(:stream_callback)
    @stream_callback = method(:setup_stream_callback)
  end
  self
end

#restObject

:call-seq: parser.rest -> string

Returns unconsumed data in the parser’s buffer.



225
226
227
# File 'lib/http_tools/parser.rb', line 225

def rest
  @buffer.rest
end

#rest_sizeObject

:call-seq: parser.rest_size -> int

Returns the size in bytes of the unconsumed data in the parser’s buffer.



233
234
235
# File 'lib/http_tools/parser.rb', line 233

def rest_size
  @buffer.rest_size
end