Class: Arachni::Parser

Inherits:
Object show all
Includes:
UI::Output, Utilities
Defined in:
lib/arachni/parser.rb

Overview

Analyzes HTML code extracting inputs vectors and supporting information.

Defined Under Namespace

Modules: Extractors

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Utilities

#available_port, #bytes_to_kilobytes, #bytes_to_megabytes, #caller_name, #caller_path, #cookie_decode, #cookie_encode, #cookies_from_document, #cookies_from_file, #cookies_from_response, #exception_jail, #exclude_path?, #follow_protocol?, #form_decode, #form_encode, #forms_from_document, #forms_from_response, #full_and_absolute_url?, #generate_token, #get_path, #hms_to_seconds, #html_decode, #html_encode, #include_path?, #links_from_document, #links_from_response, #normalize_url, #page_from_response, #page_from_url, #parse_set_cookie, #path_in_domain?, #path_too_deep?, #port_available?, #rand_port, #random_seed, #redundant_path?, #regexp_array_match, #remove_constants, #request_parse_body, #seconds_to_hms, #skip_page?, #skip_path?, #skip_resource?, #skip_response?, #uri_decode, #uri_encode, #uri_parse, #uri_parse_query, #uri_parser, #uri_rewrite

Methods included from UI::Output

#debug?, #debug_off, #debug_on, #disable_only_positives, #included, #mute, #muted?, #only_positives, #only_positives?, #print_bad, #print_debug, #print_debug_backtrace, #print_debug_level_1, #print_debug_level_2, #print_debug_level_3, #print_error, #print_error_backtrace, #print_exception, #print_info, #print_line, #print_ok, #print_status, #print_verbose, #reroute_to_file, #reroute_to_file?, reset_output_options, #unmute, #verbose?, #verbose_on

Constructor Details

#initialize(response, options = Options) ⇒ Parser

Returns a new instance of Parser.

Parameters:

  • response (HTTP::Response, Array<HTTP::Response>)

    Response(s) to analyze and parse. By providing multiple responses the parser will be able to perform some preliminary differential analysis and identify nonce tokens in inputs.

  • options (Options) (defaults to: Options)


73
74
75
76
77
78
79
80
81
82
83
84
# File 'lib/arachni/parser.rb', line 73

def initialize( response, options = Options )
    @options = options

    if response.is_a? Array
        @secondary_responses = response[1..-1]
        @secondary_responses.compact! if @secondary_responses
        response = response.shift
    end

    @response = response
    self.url  = response.url
end

Instance Attribute Details

#responseHTTP::Response (readonly)

Returns:



65
66
67
# File 'lib/arachni/parser.rb', line 65

def response
  @response
end

#urlString

Returns:



62
63
64
# File 'lib/arachni/parser.rb', line 62

def url
  @url
end

Instance Method Details

#baseString

Returns Base ‘href`, if there is one.

Returns:

  • (String)

    Base ‘href`, if there is one.



311
312
313
# File 'lib/arachni/parser.rb', line 311

def base
    @base ||= document.search( '//base[@href]' ).first['href'] rescue nil
end

#bodyObject



127
128
129
# File 'lib/arachni/parser.rb', line 127

def body
    @body || @response.body
end

#body=(string) ⇒ String

Returns Override the #response body for the parsing process.

Returns:



122
123
124
125
# File 'lib/arachni/parser.rb', line 122

def body=( string )
    @links = @forms = @cookies = @document = nil
    @body = string
end

Returns Cookies with which to update the HTTP cookie-jar.

Returns:



286
287
288
289
290
291
292
293
294
295
296
297
# File 'lib/arachni/parser.rb', line 286

def cookie_jar
    return @cookie_jar.freeze if @cookie_jar
    from_jar = []

    # Make a list of the response cookie names.
    cookie_names = Set.new( cookies.map( &:name ) )

    from_jar |= HTTP::Client.cookie_jar.for_url( @url ).
        reject { |cookie| cookie_names.include?( cookie.name ) }

    @cookie_jar = (cookies | from_jar)
end

#cookiesArray<Element::Cookie>

Returns Cookies from HTTP headers and response body.

Returns:



249
250
251
252
253
254
255
256
# File 'lib/arachni/parser.rb', line 249

def cookies
    return @cookies.freeze if @cookies

    @cookies = Cookie.from_headers( @url, @response.headers )
    return @cookies if !text?

    @cookies |= Cookie.from_document( @url, document )
end

#cookies_to_be_auditedArray<Element::Cookie>

Returns Cookies to be audited.

Returns:



260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
# File 'lib/arachni/parser.rb', line 260

def cookies_to_be_audited
    return @cookies_to_be_audited.freeze if @cookies_to_be_audited
    return [] if !text?

    # Make a list of the response cookie names.
    cookie_names = Set.new( cookies.map(&:name) )

    # Grab all cookies from the cookiejar giving preferrence to the ones
    # specified by the current page, if there are any.
    from_http_jar = HTTP::Client.cookie_jar.cookies.reject do |c|
        cookie_names.include?( c.name )
    end

    # These cookies are to be audited and thus are dirty and anarchistic,
    # so they have to contain even cookies completely irrelevant to the
    # current page. I.e. it contains all cookies that have been observed
    # since the beginning of the scan
    @cookies_to_be_audited = (cookies | from_http_jar).map do |c|
        dc = c.dup
        dc.action = @url
        dc
    end
end

#documentNokogiri::HTML?

Returns a parsed HTML document from the body of the HTTP response or ‘nil` if the response data wasn’t text-based or the response couldn’t be parsed.

Returns:

  • (Nokogiri::HTML, nil)

    Returns a parsed HTML document from the body of the HTTP response or ‘nil` if the response data wasn’t text-based or the response couldn’t be parsed.



135
136
137
138
# File 'lib/arachni/parser.rb', line 135

def document
    return @document.freeze if @document
    @document = Nokogiri::HTML( body ) if text? rescue nil
end

#formsArray<Element::Form>

Returns Forms from #document.

Returns:



161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
# File 'lib/arachni/parser.rb', line 161

def forms
    return @forms.freeze if @forms
    return [] if !text? || !(body =~ /<\s*form/i)

    f = Form.from_document( @url, document )
    return f if !@secondary_responses

    @secondary_responses.each do |response|
        next if response.body.to_s.empty?

        Form.from_document( @url, response.body ).each do |form2|
            f.each do |form|
                next if "#{form.coverage_id}:#{form.name_or_id}" !=
                    "#{form2.coverage_id}:#{form2.name_or_id}"

                form.inputs.each do |k, v|
                    next if !(v != form2.inputs[k] &&
                        form.field_type_for( k ) == :hidden)

                    form.nonce_name = k
                end
            end
        end
    end

    @forms = f
end

#headersHash

Note:

It will include common request headers as well headers from the HTTP request.

Returns List of valid auditable HTTP header fields.

Returns:

  • (Hash)

    List of valid auditable HTTP header fields.



145
146
147
148
149
150
151
152
153
154
155
156
157
# File 'lib/arachni/parser.rb', line 145

def headers
    @headers ||= {
        'Accept'          => 'text/html,application/xhtml+xml,application' +
            '/xml;q=0.9,*/*;q=0.8',
        'Accept-Charset'  => 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
        'Accept-Encoding' => 'gzip;q=1.0,deflate;q=0.6,identity;q=0.3',
        'From'            => @options.authorized_by  || '',
        'User-Agent'      => @options.http.user_agent || '',
        'Referer'         => @url,
        'Pragma'          => 'no-cache'
    }.merge( response.request.headers ).
        map { |k, v| Header.new( url: @url, inputs: { k => v } ) }.freeze
end

#jsonsArray<Element::JSON>

Returns:



230
231
232
# File 'lib/arachni/parser.rb', line 230

def jsons
    @jsons ||= [JSON.from_request( @url, response.request )].compact
end

Returns Link to the page.

Returns:



191
192
193
194
# File 'lib/arachni/parser.rb', line 191

def link
    return if link_vars.empty? && !@response.redirection?
    Link.new( url: @url, inputs: link_vars )
end

Returns LinkTemplate for the current page.

Returns:



198
199
200
201
202
203
204
205
206
207
208
# File 'lib/arachni/parser.rb', line 198

def link_template
    template, inputs = LinkTemplate.extract_inputs( @url )
    return if !template

    LinkTemplate.new(
        url:      @url.freeze,
        action:   @url.freeze,
        inputs:   inputs,
        template: template
    )
end

Returns Links matching OptionsGroups::Audit#link_templates in #document.

Returns:



221
222
223
224
225
226
227
# File 'lib/arachni/parser.rb', line 221

def link_templates
    return @link_templates.freeze if @link_templates
    return @link_templates = [link_template].compact if !text?

    @link_templates =
        [link_template].compact | LinkTemplate.from_document( @url, document )
end

Returns Parameters found in #url.

Returns:



241
242
243
244
245
# File 'lib/arachni/parser.rb', line 241

def link_vars
    return {} if !(parsed = uri_parse( @url ))

    @link_vars ||= parsed.rewrite.query_parameters.freeze
end

Returns Links in #document.

Returns:



212
213
214
215
216
217
# File 'lib/arachni/parser.rb', line 212

def links
    return @links.freeze if @links
    return @links = [link].compact if !text? || !(body =~ /\?.*=/)

    @links = [link].compact | Link.from_document( @url, document )
end

#pagePage

Returns:



110
111
112
# File 'lib/arachni/parser.rb', line 110

def page
    @page ||= Page.new( parser: self )
end

#pathsArray<String>

Returns Distinct links to follow.

Returns:



301
302
303
304
305
306
307
# File 'lib/arachni/parser.rb', line 301

def paths
  return @paths if @paths
  @paths = []
  return @paths.freeze if !document

  @paths = run_extractors.freeze
end

#text?Boolean

Returns ‘true` if the given HTTP response data are text based, `false` otherwise.

Returns:

  • (Boolean)

    ‘true` if the given HTTP response data are text based, `false` otherwise.



116
117
118
# File 'lib/arachni/parser.rb', line 116

def text?
    !@body.to_s.empty? || @response.text?
end

#to_absolute(relative_url) ⇒ String

Converts a relative URL to an absolute one.

Parameters:

  • relative_url (String)

    URL to convert to absolute.

Returns:



99
100
101
102
103
104
105
106
107
# File 'lib/arachni/parser.rb', line 99

def to_absolute( relative_url )
    if (url = base)
        base_url = url
    else
        base_url = @url
    end

    super( relative_url, base_url )
end

#xmlsArray<Element::XML>

Returns:



235
236
237
# File 'lib/arachni/parser.rb', line 235

def xmls
    @xmls ||= [XML.from_request( @url, response.request )].compact
end