Class: Arachni::Parser
- Includes:
- UI::Output, Utilities
- Defined in:
- lib/arachni/parser.rb
Overview
Analyzes HTML code extracting inputs vectors and supporting information.
Defined Under Namespace
Modules: Extractors
Instance Attribute Summary collapse
- #response ⇒ HTTP::Response readonly
- #url ⇒ String
Instance Method Summary collapse
-
#base ⇒ String
Base ‘href`, if there is one.
- #body ⇒ Object
-
#body=(string) ⇒ String
Override the #response body for the parsing process.
-
#cookie_jar ⇒ Array<Element::Cookie>
Cookies with which to update the HTTP cookie-jar.
-
#cookies ⇒ Array<Element::Cookie>
Cookies from HTTP headers and response body.
-
#cookies_to_be_audited ⇒ Array<Element::Cookie>
Cookies to be audited.
-
#document ⇒ Nokogiri::HTML?
Returns a parsed HTML document from the body of the HTTP response or ‘nil` if the response data wasn’t text-based or the response couldn’t be parsed.
-
#forms ⇒ Array<Element::Form>
Forms from #document.
-
#headers ⇒ Hash
List of valid auditable HTTP header fields.
-
#initialize(response, options = Options) ⇒ Parser
constructor
A new instance of Parser.
- #jsons ⇒ Array<Element::JSON>
-
#link ⇒ Element::Link
Link to the page.
-
#link_template ⇒ Element::LinkTemplate
LinkTemplate for the current page.
-
#link_templates ⇒ Array<Element::LinkTemplate>
Links matching OptionsGroups::Audit#link_templates in #document.
-
#link_vars ⇒ Hash
Parameters found in #url.
-
#links ⇒ Array<Element::Link>
Links in #document.
- #page ⇒ Page
-
#paths ⇒ Array<String>
Distinct links to follow.
-
#text? ⇒ Boolean
‘true` if the given HTTP response data are text based, `false` otherwise.
-
#to_absolute(relative_url) ⇒ String
Converts a relative URL to an absolute one.
- #xmls ⇒ Array<Element::XML>
Methods included from Utilities
#available_port, #bytes_to_kilobytes, #bytes_to_megabytes, #caller_name, #caller_path, #cookie_decode, #cookie_encode, #cookies_from_document, #cookies_from_file, #cookies_from_response, #exception_jail, #exclude_path?, #follow_protocol?, #form_decode, #form_encode, #forms_from_document, #forms_from_response, #full_and_absolute_url?, #generate_token, #get_path, #hms_to_seconds, #html_decode, #html_encode, #include_path?, #links_from_document, #links_from_response, #normalize_url, #page_from_response, #page_from_url, #parse_set_cookie, #path_in_domain?, #path_too_deep?, #port_available?, #rand_port, #random_seed, #redundant_path?, #regexp_array_match, #remove_constants, #request_parse_body, #seconds_to_hms, #skip_page?, #skip_path?, #skip_resource?, #skip_response?, #uri_decode, #uri_encode, #uri_parse, #uri_parse_query, #uri_parser, #uri_rewrite
Methods included from UI::Output
#debug?, #debug_off, #debug_on, #disable_only_positives, #included, #mute, #muted?, #only_positives, #only_positives?, #print_bad, #print_debug, #print_debug_backtrace, #print_debug_level_1, #print_debug_level_2, #print_debug_level_3, #print_error, #print_error_backtrace, #print_exception, #print_info, #print_line, #print_ok, #print_status, #print_verbose, #reroute_to_file, #reroute_to_file?, reset_output_options, #unmute, #verbose?, #verbose_on
Constructor Details
#initialize(response, options = Options) ⇒ Parser
Returns a new instance of Parser.
73 74 75 76 77 78 79 80 81 82 83 84 |
# File 'lib/arachni/parser.rb', line 73 def initialize( response, = Options ) @options = if response.is_a? Array @secondary_responses = response[1..-1] @secondary_responses.compact! if @secondary_responses response = response.shift end @response = response self.url = response.url end |
Instance Attribute Details
#response ⇒ HTTP::Response (readonly)
65 66 67 |
# File 'lib/arachni/parser.rb', line 65 def response @response end |
Instance Method Details
#base ⇒ String
Returns Base ‘href`, if there is one.
311 312 313 |
# File 'lib/arachni/parser.rb', line 311 def base @base ||= document.search( '//base[@href]' ).first['href'] rescue nil end |
#body ⇒ Object
127 128 129 |
# File 'lib/arachni/parser.rb', line 127 def body @body || @response.body end |
#body=(string) ⇒ String
Returns Override the #response body for the parsing process.
122 123 124 125 |
# File 'lib/arachni/parser.rb', line 122 def body=( string ) @links = @forms = @cookies = @document = nil @body = string end |
#cookie_jar ⇒ Array<Element::Cookie>
Returns Cookies with which to update the HTTP cookie-jar.
286 287 288 289 290 291 292 293 294 295 296 297 |
# File 'lib/arachni/parser.rb', line 286 def return @cookie_jar.freeze if @cookie_jar from_jar = [] # Make a list of the response cookie names. = Set.new( .map( &:name ) ) from_jar |= HTTP::Client..for_url( @url ). reject { || .include?( .name ) } @cookie_jar = ( | from_jar) end |
#cookies ⇒ Array<Element::Cookie>
Returns Cookies from HTTP headers and response body.
249 250 251 252 253 254 255 256 |
# File 'lib/arachni/parser.rb', line 249 def return @cookies.freeze if @cookies @cookies = Cookie.from_headers( @url, @response.headers ) return @cookies if !text? @cookies |= Cookie.from_document( @url, document ) end |
#cookies_to_be_audited ⇒ Array<Element::Cookie>
Returns Cookies to be audited.
260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 |
# File 'lib/arachni/parser.rb', line 260 def return @cookies_to_be_audited.freeze if @cookies_to_be_audited return [] if !text? # Make a list of the response cookie names. = Set.new( .map(&:name) ) # Grab all cookies from the cookiejar giving preferrence to the ones # specified by the current page, if there are any. from_http_jar = HTTP::Client...reject do |c| .include?( c.name ) end # These cookies are to be audited and thus are dirty and anarchistic, # so they have to contain even cookies completely irrelevant to the # current page. I.e. it contains all cookies that have been observed # since the beginning of the scan @cookies_to_be_audited = ( | from_http_jar).map do |c| dc = c.dup dc.action = @url dc end end |
#document ⇒ Nokogiri::HTML?
Returns a parsed HTML document from the body of the HTTP response or ‘nil` if the response data wasn’t text-based or the response couldn’t be parsed.
135 136 137 138 |
# File 'lib/arachni/parser.rb', line 135 def document return @document.freeze if @document @document = Nokogiri::HTML( body ) if text? rescue nil end |
#forms ⇒ Array<Element::Form>
Returns Forms from #document.
161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 |
# File 'lib/arachni/parser.rb', line 161 def forms return @forms.freeze if @forms return [] if !text? || !(body =~ /<\s*form/i) f = Form.from_document( @url, document ) return f if !@secondary_responses @secondary_responses.each do |response| next if response.body.to_s.empty? Form.from_document( @url, response.body ).each do |form2| f.each do |form| next if "#{form.coverage_id}:#{form.name_or_id}" != "#{form2.coverage_id}:#{form2.name_or_id}" form.inputs.each do |k, v| next if !(v != form2.inputs[k] && form.field_type_for( k ) == :hidden) form.nonce_name = k end end end end @forms = f end |
#headers ⇒ Hash
It will include common request headers as well headers from the HTTP request.
Returns List of valid auditable HTTP header fields.
145 146 147 148 149 150 151 152 153 154 155 156 157 |
# File 'lib/arachni/parser.rb', line 145 def headers @headers ||= { 'Accept' => 'text/html,application/xhtml+xml,application' + '/xml;q=0.9,*/*;q=0.8', 'Accept-Charset' => 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept-Encoding' => 'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', 'From' => @options. || '', 'User-Agent' => @options.http.user_agent || '', 'Referer' => @url, 'Pragma' => 'no-cache' }.merge( response.request.headers ). map { |k, v| Header.new( url: @url, inputs: { k => v } ) }.freeze end |
#jsons ⇒ Array<Element::JSON>
230 231 232 |
# File 'lib/arachni/parser.rb', line 230 def jsons @jsons ||= [JSON.from_request( @url, response.request )].compact end |
#link ⇒ Element::Link
Returns Link to the page.
191 192 193 194 |
# File 'lib/arachni/parser.rb', line 191 def link return if link_vars.empty? && !@response.redirection? Link.new( url: @url, inputs: link_vars ) end |
#link_template ⇒ Element::LinkTemplate
Returns LinkTemplate for the current page.
198 199 200 201 202 203 204 205 206 207 208 |
# File 'lib/arachni/parser.rb', line 198 def link_template template, inputs = LinkTemplate.extract_inputs( @url ) return if !template LinkTemplate.new( url: @url.freeze, action: @url.freeze, inputs: inputs, template: template ) end |
#link_templates ⇒ Array<Element::LinkTemplate>
Returns Links matching OptionsGroups::Audit#link_templates in #document.
221 222 223 224 225 226 227 |
# File 'lib/arachni/parser.rb', line 221 def link_templates return @link_templates.freeze if @link_templates return @link_templates = [link_template].compact if !text? @link_templates = [link_template].compact | LinkTemplate.from_document( @url, document ) end |
#link_vars ⇒ Hash
Returns Parameters found in #url.
241 242 243 244 245 |
# File 'lib/arachni/parser.rb', line 241 def link_vars return {} if !(parsed = uri_parse( @url )) @link_vars ||= parsed.rewrite.query_parameters.freeze end |
#links ⇒ Array<Element::Link>
Returns Links in #document.
212 213 214 215 216 217 |
# File 'lib/arachni/parser.rb', line 212 def links return @links.freeze if @links return @links = [link].compact if !text? || !(body =~ /\?.*=/) @links = [link].compact | Link.from_document( @url, document ) end |
#page ⇒ Page
110 111 112 |
# File 'lib/arachni/parser.rb', line 110 def page @page ||= Page.new( parser: self ) end |
#paths ⇒ Array<String>
Returns Distinct links to follow.
301 302 303 304 305 306 307 |
# File 'lib/arachni/parser.rb', line 301 def paths return @paths if @paths @paths = [] return @paths.freeze if !document @paths = run_extractors.freeze end |
#text? ⇒ Boolean
Returns ‘true` if the given HTTP response data are text based, `false` otherwise.
116 117 118 |
# File 'lib/arachni/parser.rb', line 116 def text? !@body.to_s.empty? || @response.text? end |
#to_absolute(relative_url) ⇒ String
Converts a relative URL to an absolute one.
99 100 101 102 103 104 105 106 107 |
# File 'lib/arachni/parser.rb', line 99 def to_absolute( relative_url ) if (url = base) base_url = url else base_url = @url end super( relative_url, base_url ) end |
#xmls ⇒ Array<Element::XML>
235 236 237 |
# File 'lib/arachni/parser.rb', line 235 def xmls @xmls ||= [XML.from_request( @url, response.request )].compact end |