Class: Arachni::Page

Inherits:
Object show all
Includes:
Utilities
Defined in:
lib/arachni/page.rb,
lib/arachni/page/dom.rb,
lib/arachni/page/scope.rb,
lib/arachni/page/dom/transition.rb

Overview

It holds page data like elements, cookies, headers, etc...

Author:

Defined Under Namespace

Classes: DOM, Error, Scope

Constant Summary collapse

ELEMENTS =
[
    :links, :forms, :cookies, :headers, :link_templates, :jsons, :xmls,
    :ui_inputs, :ui_forms
]
METADATA =
[ :nonce_name, :skip_dom ]

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods included from Utilities

#available_port, available_port_mutex, #bytes_to_kilobytes, #bytes_to_megabytes, #caller_name, #caller_path, #cookie_decode, #cookie_encode, #cookies_from_file, #cookies_from_parser, #cookies_from_response, #exception_jail, #exclude_path?, #follow_protocol?, #form_decode, #form_encode, #forms_from_parser, #forms_from_response, #full_and_absolute_url?, #generate_token, #get_path, #hms_to_seconds, #html_decode, #html_encode, #include_path?, #links_from_parser, #links_from_response, #normalize_url, #page_from_response, #page_from_url, #parse_set_cookie, #path_in_domain?, #path_too_deep?, #port_available?, #rand_port, #random_seed, #redundant_path?, #regexp_array_match, #remove_constants, #request_parse_body, #seconds_to_hms, #skip_page?, #skip_path?, #skip_resource?, #skip_response?, #to_absolute, #uri_decode, #uri_encode, #uri_parse, #uri_parse_query, #uri_parser, #uri_rewrite

Constructor Details

#initialize(options) ⇒ Page

Needs either a :parser or a :response or user provided data.

Parameters:

  • options (Hash)

    Hash from which to set instance attributes.

Options Hash (options):


143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
# File 'lib/arachni/page.rb', line 143

def initialize( options )
    fail ArgumentError, 'Options cannot be empty.' if options.empty?
    options = options.dup

    @cache = {}

    @do_not_audit_elements = options.delete(:do_not_audit_elements)

    @cache[:parser] = options.delete(:parser)
    @response = @cache[:parser].response if @cache[:parser]

    # We need to know whether or not the page has been dynamically updated
    # with elements, in order to optimize #dup and #hash operations.
    @has_custom_elements = Set.new

    @metadata ||= {}

    options.each do |k, v|
        send( "#{k}=", try_dup( v ) )
    end

    @dom = DOM.new( (options[:dom] || {}).merge( page: self ) )

    fail ArgumentError, 'No URL given!' if !url

    Platform::Manager.fingerprint( self )

    @element_audit_whitelist ||= []
    @element_audit_whitelist   = Set.new( @element_audit_whitelist )
end

Instance Attribute Details

#domDOM

Returns DOM snapshot.

Returns:

  • (DOM)

    DOM snapshot.


110
111
112
# File 'lib/arachni/page.rb', line 110

def dom
  @dom
end

#element_audit_whitelistSet<Integer> (readonly)

Returns Audit whitelist based on Element::Capabilities::Auditable#coverage_hash.


132
133
134
# File 'lib/arachni/page.rb', line 132

def element_audit_whitelist
  @element_audit_whitelist
end

#metadataHash (readonly)

Returns Holds page data that will need to persist between #clear_cache calls and other utility data.

Returns:

  • (Hash)

    Holds page data that will need to persist between #clear_cache calls and other utility data.


124
125
126
# File 'lib/arachni/page.rb', line 124

def 
  @metadata
end

#responseHTTP::Response (readonly)

Returns HTTP response.

Returns:


114
115
116
# File 'lib/arachni/page.rb', line 114

def response
  @response
end

Class Method Details

._load(data) ⇒ Object


605
606
607
# File 'lib/arachni/page.rb', line 605

def self._load( data )
    new( Marshal.load( data ) )
end

.from_data(data) ⇒ Object

Parameters:

  • options (Hash)

    a customizable set of options


82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# File 'lib/arachni/page.rb', line 82

def self.from_data( data )
    data = data.dup

    data[:response]        ||= {}
    data[:response][:code] ||= 200
    data[:response][:url]  ||= data.delete( :url )
    data[:response][:body] ||= data.delete( :body ) || ''

    data[:response][:request]       ||= {}
    data[:response][:request][:url] ||= data[:response][:url]

    data[:cookie_jar] ||= []

    data[:response][:request] = Arachni::HTTP::Request.new( data[:response][:request] )
    data[:response]           = Arachni::HTTP::Response.new( data[:response] )

    new data
end

.from_response(response) ⇒ Page

Parameters:

Returns:


59
60
61
# File 'lib/arachni/page.rb', line 59

def self.from_response( response )
    Parser.new( response ).page
end

.from_rpc_data(data) ⇒ Page

Parameters:

Returns:


573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
# File 'lib/arachni/page.rb', line 573

def self.from_rpc_data( data )
    dom = data.delete('dom')
    normalized_data = {}
    data.each do |name, value|

        value = case name
                    when 'response'
                        HTTP::Response.from_rpc_data( value )

                    when *ELEMENTS.map(&:to_s)
                        value.map do |e|
                            Element.type_to_class( name[0...-1].to_sym ).from_rpc_data( e )
                        end.to_a

                    else
                        value
                end

        normalized_data[name.to_sym] = value
    end

    instance = new( normalized_data )
    instance.instance_variable_set(
        '@dom', DOM.from_rpc_data( dom.merge( page: instance ) )
    )
    instance
end

.from_url(url, opts = {}, &block) ⇒ Page

Parameters:

  • url (String)

    URL to fetch.

  • opts (Hash) (defaults to: {})
  • block (Block)

    Block to which to pass the page object. If given, the request will be performed asynchronously. If no block is given, the page will be fetched synchronously and be returned by this method.

Options Hash (opts):

  • :precision (Integer) — default: 2

    How many times to request the page and examine changes between requests. Used tp identify nonce tokens etc.

  • :http (Hash)

    HTTP request options.

Returns:


37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# File 'lib/arachni/page.rb', line 37

def self.from_url( url, opts = {}, &block )
    responses = []

    opts[:precision] ||= 2
    opts[:precision].times do
        HTTP::Client.get( url, opts[:http] || {} ) do |res|
            responses << res
            next if responses.size != opts[:precision]
            block.call( from_response( responses ) ) if block_given?
        end
    end

    if !block_given?
        HTTP::Client.run
        from_response( responses )
    end
end

Instance Method Details

#==(other) ⇒ Object


467
468
469
# File 'lib/arachni/page.rb', line 467

def ==( other )
    hash == other.hash
end

#_dump(_) ⇒ Object


601
602
603
# File 'lib/arachni/page.rb', line 601

def _dump( _ )
    Marshal.dump( to_initialization_options( false ) )
end

#audit_element?(element) ⇒ Bool

Returns true if the element should be audited, false otherwise.

Parameters:

Returns:

  • (Bool)

    true if the element should be audited, false otherwise.

See Also:


229
230
231
232
233
234
235
# File 'lib/arachni/page.rb', line 229

def audit_element?( element )
    return if @do_not_audit_elements
    return true if @element_audit_whitelist.empty?
    @element_audit_whitelist.include?(
        element.is_a?( Integer ) ? element : element.coverage_hash
    )
end

#bodyString

Returns HTTP response body.

Returns:

  • (String)

    HTTP response body.


269
270
271
272
# File 'lib/arachni/page.rb', line 269

def body
    return '' if !@body && !@response
    @body ||= response.body
end

#body=(string) ⇒ Object

Parameters:

  • string (String)

    Page body.


276
277
278
279
280
281
# File 'lib/arachni/page.rb', line 276

def body=( string )
    @has_javascript = nil
    clear_cache

    @body = string.to_s.freeze
end

#clear_cachePage

Note:

Will preserve caches for elements which have been externally modified.

Returns self with caches cleared.

Returns:

  • (Page)

    self with caches cleared.


352
353
354
355
356
357
358
359
360
361
362
# File 'lib/arachni/page.rb', line 352

def clear_cache
    ELEMENTS.each do |type|
        next if @has_custom_elements.include? type
        # Remove the association to this page before clearing the elements
        # from cache to make it easier on the GC.
        (@cache[type] || []).each { |e| e.page = nil }
    end

    @cache.delete_if { |k, _| !@has_custom_elements.include? k }
    self
end

#codeString

Returns URL of the page.

Returns:

  • (String)

    URL of the page.


256
257
258
259
# File 'lib/arachni/page.rb', line 256

def code
    return 0 if !@code && !response
    @code ||= response.code
end

Returns Cookies extracted from the supplied cookie-jar.

Returns:


300
301
302
# File 'lib/arachni/page.rb', line 300

def cookie_jar
    @cookie_jar ||= (parser ? parser.cookie_jar : [])
end

#do_not_audit_elementsObject

It forces #audit_element? to always returns false.


238
239
240
# File 'lib/arachni/page.rb', line 238

def do_not_audit_elements
    @do_not_audit_elements = true
end

#documentArachni::Parser::Document

Returns Parsed HTML document.

Returns:


342
343
344
345
346
# File 'lib/arachni/page.rb', line 342

def document
    @cache[:document] ||= (parser.nil? ?
        Arachni::Parser.parse( body ) :
        parser.document)
end

#dupObject


475
476
477
# File 'lib/arachni/page.rb', line 475

def dup
    self.class.new to_initialization_options
end

#elementsArray<Element::Base>

Returns All page elements.

Returns:


320
321
322
# File 'lib/arachni/page.rb', line 320

def elements
    ELEMENTS.map { |type| send( type ) }.flatten
end

#elements_within_scopeArray<Element::Base>

Returns All page elements that are within the scope of the scan.

Returns:


326
327
328
329
330
331
# File 'lib/arachni/page.rb', line 326

def elements_within_scope
    ELEMENTS.map do |type|
        next if !Options.audit.element? type
        send( type ).select { |e| e.scope.in? }
    end.flatten.compact
end

#eql?(other) ⇒ Boolean

Returns:

  • (Boolean)

471
472
473
# File 'lib/arachni/page.rb', line 471

def eql?( other )
    self == other
end

#has_elements?(*tags) ⇒ Boolean

Returns true if the page contains any of the given elements, false otherwise.

Parameters:

Returns:

  • (Boolean)

    true if the page contains any of the given elements, false otherwise.


412
413
414
415
416
417
418
419
420
421
422
423
424
425
# File 'lib/arachni/page.rb', line 412

def has_elements?( *tags )
    return if !text?

    tags.flatten.each do |tag|
        tag = tag.to_s

        next if !body.has_html_tag?( tag )

        return false if !document
        return true  if document.nodes_by_name( tag ).any?
    end

    false
end

#has_script?Boolean

Returns true if the page contains client-side code, false otherwise.

Returns:

  • (Boolean)

    true if the page contains client-side code, false otherwise.


384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
# File 'lib/arachni/page.rb', line 384

def has_script?
    return @has_javascript if !@has_javascript.nil?

    if !response.headers.content_type.to_s.start_with?( 'text/html' ) || !text?
        return @has_javascript = false
    end

    dbody = body.downcase

    # First check, quick and simple.
    if dbody.include?( '<script' ) || dbody.include?( 'javascript:' )
        return @has_javascript = true
    end

    # Check for event attributes, if there are any then there's JS to be
    # executed.
    Browser::Javascript.events.flatten.each do |event|
        return @has_javascript = true if dbody.include?( "#{event}=" )
    end

    @has_javascript = false
end

#hashObject


463
464
465
# File 'lib/arachni/page.rb', line 463

def hash
    digest.hash
end

#import_metadata(other, metas = METADATA) ⇒ Object


495
496
497
498
499
500
501
502
503
504
505
506
507
# File 'lib/arachni/page.rb', line 495

def ( other, metas = METADATA )
    [metas].flatten.each do |meta|
        other..each do |element_type, data|
            @metadata[element_type] ||= {}
            @metadata[element_type][meta.to_s] ||= {}
            @metadata[element_type][meta.to_s].merge!( data[meta.to_s] )
        end
    end

    

    self
end

#method(*args) ⇒ String

Returns The request method that returned the page.

Returns:

  • (String)

    The request method that returned the page


335
336
337
338
# File 'lib/arachni/page.rb', line 335

def method( *args )
    return super( *args ) if args.any?
    response.request.method
end

#parsed_urlArachni::URI

Returns:


186
187
188
# File 'lib/arachni/page.rb', line 186

def parsed_url
    Arachni::URI( url )
end

#parserParser

Returns:


191
192
193
194
195
196
197
198
199
200
# File 'lib/arachni/page.rb', line 191

def parser
    return if !@response
    return @cache[:parser] if @cache[:parser]

    @cache[:parser] = Parser.new( @response )

    # The page may have a browser-assigned body, set it as the one to parse.
    @cache[:parser].body = body
    @cache[:parser]
end

#parser=(p) ⇒ Object


202
203
204
# File 'lib/arachni/page.rb', line 202

def parser=( p )
    @cache[:parser] = p
end

#pathsArray<String>

Returns Paths contained in this page.

Returns:

See Also:


308
309
310
# File 'lib/arachni/page.rb', line 308

def paths
    @cache[:paths] ||= (parser ? parser.paths : [])
end

#performerObject

Returns Object which performed the #request which lead to this page.

Returns:

  • (Object)

    Object which performed the #request which lead to this page.


181
182
183
# File 'lib/arachni/page.rb', line 181

def performer
    request.performer
end

#persistent_hashObject


459
460
461
# File 'lib/arachni/page.rb', line 459

def persistent_hash
    digest.persistent_hash
end

#platformsPlatform

Returns Applicable platforms for the page.

Returns:

  • (Platform)

    Applicable platforms for the page.


314
315
316
# File 'lib/arachni/page.rb', line 314

def platforms
    Platform::Manager[url]
end

#prepare_for_reportObject


364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
# File 'lib/arachni/page.rb', line 364

def prepare_for_report
    # We want a hard clear, that's why we don't call #clear_cache.
    @cache.clear

    # If we're dealing with binary data remove it before storing.
    if !text?
        response.body = nil
        self.body     = nil
    end

    @cookie_jar.clear if @cookie_jar

    @dom.digest      = nil
    @dom.skip_states = nil

    self
end

#query_varsHash

Returns URL query parameters.

Returns:


263
264
265
# File 'lib/arachni/page.rb', line 263

def query_vars
    @cache[:query_vars] ||= uri_parse_query( url )
end

#reload_metadataObject


487
488
489
490
491
492
493
# File 'lib/arachni/page.rb', line 487

def 
    ELEMENTS.each do |type|
        next if !@cache[type]

        @cache[type].each { |e|  e }
    end
end

#requestHTTP::Request

Returns HTTP request.

Returns:


244
245
246
# File 'lib/arachni/page.rb', line 244

def request
    response.request
end

#scopeScope

Returns:


175
176
177
# File 'lib/arachni/page.rb', line 175

def scope
    @scope = Scope.new( self )
end

#text?Boolean

Returns true if the body of the page is text-base, false otherwise.

Returns:

  • (Boolean)

    true if the body of the page is text-base, false otherwise.


429
430
431
432
# File 'lib/arachni/page.rb', line 429

def text?
    return false if !response
    response.text?
end

#titleString

Returns Title of the page.

Returns:

  • (String)

    Title of the page.


436
437
438
# File 'lib/arachni/page.rb', line 436

def title
    document.nodes_by_name( 'title' ).first.text rescue nil
end

#to_hHash Also known as: to_hash

Returns Converts the page data to a hash.

Returns:

  • (Hash)

    Converts the page data to a hash.


442
443
444
445
446
447
448
449
450
451
# File 'lib/arachni/page.rb', line 442

def to_h
    skip = [:@document, :@do_not_audit_elements, :@has_custom_elements, :@scope]

    instance_variables.inject({}) do |h, iv|
        next h if skip.include? iv

        h[iv.to_s.gsub( '@', '').to_sym] = try_dup( instance_variable_get( iv ) )
        h
    end.merge(@cache).tap { |h| h.delete :parser }
end

#to_initialization_options(deep = true) ⇒ Object


509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
# File 'lib/arachni/page.rb', line 509

def to_initialization_options( deep = true )
    h = {}
    h[:body] = @body if @body

    [:cookie_jar, :element_audit_whitelist, :metadata].each do |m|
        h[m] = instance_variable_get( "@#{m}".to_sym )

        if deep
            h[m] = try_dup( h[m] )
        end

        h.delete( m ) if !h[m]
    end

    ELEMENTS.each do |type|
        next if !@has_custom_elements.include?( type )
        h[type] = @cache[type]

        if !h[type] || h[type].empty?
            h.delete( type )
            next
        end

        h[type] = h[type].map { |e| c = e.dup; c.page = nil; c }
    end

    h[:response] = response
    h[:do_not_audit_elements] = @do_not_audit_elements

    h[:dom] = dom.to_h.keys.inject({}) do |dh, k|
        dh[k] = dom.send( k )

        if deep
            dh[k] = try_dup( dh[k] )
        end

        dh
    end

    h
end

#to_rpc_dataHash

Returns Data representing this instance that are suitable the RPC transmission.

Returns:

  • (Hash)

    Data representing this instance that are suitable the RPC transmission.


553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
# File 'lib/arachni/page.rb', line 553

def to_rpc_data
    data        = to_initialization_options( false ).my_stringify_keys(false)
    data['dom'] = dom.to_rpc_data
    data['element_audit_whitelist'] = element_audit_whitelist.to_a
    data['response'] = data['response'].to_rpc_data

    (ELEMENTS - [:headers]).map(&:to_s).each do |e|
        next if !data[e]
        data[e] = send(e).map(&:to_rpc_data)
    end

    data.delete 'cookie_jar'

    data
end

#to_sObject Also known as: inspect


454
455
456
# File 'lib/arachni/page.rb', line 454

def to_s
    "#<#{self.class}:#{object_id} @url=#{@url.inspect} @dom=#{@dom}>"
end

#update_element_audit_whitelist(list) ⇒ Set


214
215
216
217
218
219
# File 'lib/arachni/page.rb', line 214

def update_element_audit_whitelist( list )
    [list].flatten.each do |e|
        @element_audit_whitelist <<
            (e.is_a?( Integer ) ? e : e.coverage_hash )
    end
end

#update_metadataObject


479
480
481
482
483
484
485
# File 'lib/arachni/page.rb', line 479

def 
    ELEMENTS.each do |type|
        next if !@cache[type]

        @cache[type].each { |e|  e }
    end
end

#urlString

Returns URL of the page.

Returns:

  • (String)

    URL of the page.


250
251
252
# File 'lib/arachni/page.rb', line 250

def url
    @url ||= @response.url
end