Class: Arachni::BrowserCluster

Inherits:
Object
  • Object
show all
Includes:
UI::Output, Utilities
Defined in:
lib/arachni/browser_cluster.rb,
lib/arachni/browser_cluster/job.rb,
lib/arachni/browser_cluster/worker.rb,
lib/arachni/browser_cluster/job/result.rb,
lib/arachni/browser_cluster/jobs/taint_trace.rb,
lib/arachni/browser_cluster/jobs/browser_provider.rb,
lib/arachni/browser_cluster/jobs/taint_trace/result.rb,
lib/arachni/browser_cluster/jobs/resource_exploration.rb,
lib/arachni/browser_cluster/jobs/taint_trace/event_trigger.rb,
lib/arachni/browser_cluster/jobs/resource_exploration/result.rb,
lib/arachni/browser_cluster/jobs/taint_trace/event_trigger/result.rb,
lib/arachni/browser_cluster/jobs/resource_exploration/event_trigger.rb,
lib/arachni/browser_cluster/jobs/resource_exploration/event_trigger/result.rb

Overview

Real browser driver providing DOM/JS/AJAX support.

Author:

Defined Under Namespace

Modules: Jobs Classes: Error, Job, Worker

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Utilities

#available_port, #caller_name, #caller_path, #cookie_decode, #cookie_encode, #cookies_from_document, #cookies_from_file, #cookies_from_response, #exception_jail, #exclude_path?, #follow_protocol?, #form_decode, #form_encode, #forms_from_document, #forms_from_response, #generate_token, #get_path, #hms_to_seconds, #html_decode, #html_encode, #include_path?, #links_from_document, #links_from_response, #normalize_url, #page_from_response, #page_from_url, #parse_set_cookie, #path_in_domain?, #path_too_deep?, #port_available?, #rand_port, #random_seed, #redundant_path?, #remove_constants, #request_parse_body, #seconds_to_hms, #skip_page?, #skip_path?, #skip_resource?, #skip_response?, #to_absolute, #uri_decode, #uri_encode, #uri_parse, #uri_parse_query, #uri_parser, #uri_rewrite

Methods included from UI::Output

#debug?, #debug_off, #debug_on, #disable_only_positives, #included, #mute, #muted?, #only_positives, #only_positives?, #print_bad, #print_debug, #print_debug_backtrace, #print_debug_level_1, #print_debug_level_2, #print_debug_level_3, #print_error, #print_error_backtrace, #print_exception, #print_info, #print_line, #print_ok, #print_status, #print_verbose, #reroute_to_file, #reroute_to_file?, reset_output_options, #unmute, #verbose?, #verbose_on

Constructor Details

#initialize(options = {}) ⇒ BrowserCluster

Returns a new instance of BrowserCluster.

Parameters:

  • options (Hash) (defaults to: {})

Options Hash (options):

  • :pool_size (Integer) — default: 5

    Amount of browsers to add to the pool.

  • :time_to_live (Integer) — default: 10

    Restricts each browser’s lifetime to the given amount of pages. When that number is exceeded the current process is killed and a new one is pushed to the pool. Helps prevent memory leak issues.

Raises:

  • ArgumentError On missing ‘:handler` option.



89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# File 'lib/arachni/browser_cluster.rb', line 89

def initialize( options = {} )
    {
        pool_size: Options.browser_cluster.pool_size
    }.merge( options ).each do |k, v|
        begin
            send( "#{k}=", try_dup( v ) )
        rescue NoMethodError
            instance_variable_set( "@#{k}".to_sym, v )
        end
    end

    # Used to sync operations between workers per Job#id.
    @skip_states_per_job = {}

    # Callbacks for each job per Job#id. We need to keep track of this
    # here because jobs are serialized and off-loaded to disk and thus can't
    # contain Block or Proc objects.
    @job_callbacks = {}

    # Keeps track of the amount of pending jobs distributed across the
    # cluster, by Job#id. Once a job's count reaches 0, it's passed to
    # #job_done.
    @pending_jobs = Hash.new(0)
    @pending_job_counter = 0

    # Jobs are off-loaded to disk.
    @jobs = Support::Database::Queue.new

    # Worker pool holding BrowserCluster::Worker instances.
    @workers     = []

    # Stores visited resources from all workers.
    @sitemap     = {}
    @mutex       = Monitor.new
    @done_signal = Queue.new

    # Javascript token to share across all workers.
    @javascript_token = Utilities.generate_token

    @consumed_pids = []
    initialize_workers
end

Instance Attribute Details

#consumed_pidsObject (readonly)

Returns the value of attribute consumed_pids.



78
79
80
# File 'lib/arachni/browser_cluster.rb', line 78

def consumed_pids
  @consumed_pids
end

#javascript_tokenString (readonly)

Returns Javascript token used to namespace the custom JS environment.

Returns:

  • (String)

    Javascript token used to namespace the custom JS environment.



68
69
70
# File 'lib/arachni/browser_cluster.rb', line 68

def javascript_token
  @javascript_token
end

#pending_job_counterInteger (readonly)

Returns Number of pending jobs.

Returns:

  • (Integer)

    Number of pending jobs.



76
77
78
# File 'lib/arachni/browser_cluster.rb', line 76

def pending_job_counter
  @pending_job_counter
end

#pool_sizeInteger (readonly)

Returns Amount of browser instances in the pool.

Returns:

  • (Integer)

    Amount of browser instances in the pool.



60
61
62
# File 'lib/arachni/browser_cluster.rb', line 60

def pool_size
  @pool_size
end

#sitemapHash<String, Integer> (readonly)

Returns List of crawled URLs with their HTTP codes.

Returns:

  • (Hash<String, Integer>)

    List of crawled URLs with their HTTP codes.



64
65
66
# File 'lib/arachni/browser_cluster.rb', line 64

def sitemap
  @sitemap
end

#workersArray<Worker> (readonly)

Returns Worker pool.

Returns:



72
73
74
# File 'lib/arachni/browser_cluster.rb', line 72

def workers
  @workers
end

Instance Method Details

#callback_for(job) ⇒ Object



360
361
362
# File 'lib/arachni/browser_cluster.rb', line 360

def callback_for( job )
    @job_callbacks[job.id]
end

#decrease_pending_job(job) ⇒ Object



351
352
353
354
355
356
357
# File 'lib/arachni/browser_cluster.rb', line 351

def decrease_pending_job( job )
    synchronize do
        @pending_job_counter  -= 1
        @pending_jobs[job.id] -= 1
        job_done( job ) if @pending_jobs[job.id] <= 0
    end
end

#done?Bool

Returns ‘true` if there are no resources to analyze and no running workers.

Returns:

  • (Bool)

    ‘true` if there are no resources to analyze and no running workers.



260
261
262
263
# File 'lib/arachni/browser_cluster.rb', line 260

def done?
    fail_if_shutdown
    @pending_job_counter == 0
end

#explore(resource, options = {}, &block) ⇒ Object

Parameters:

See Also:



179
180
181
182
183
184
# File 'lib/arachni/browser_cluster.rb', line 179

def explore( resource, options = {}, &block )
    queue(
        Jobs::ResourceExploration.new( options.merge( resource: resource ) ),
        &block
    )
end

#handle_job_result(result) ⇒ Object

Parameters:



243
244
245
246
247
248
249
250
251
252
253
254
255
256
# File 'lib/arachni/browser_cluster.rb', line 243

def handle_job_result( result )
    return if @shutdown
    return if job_done? result.job

    synchronize do
        print_debug "Got job result: #{result}"

        exception_jail( false ) do
            @job_callbacks[result.job.id].call result
        end
    end

    nil
end

#job_done(job) ⇒ Object

Parameters:

  • job (Job)

    Job to mark as done. Will remove any callbacks and associated Worker states.



203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
# File 'lib/arachni/browser_cluster.rb', line 203

def job_done( job )
    synchronize do
        print_debug "Job done: #{job}"

        if !job.never_ending?
            @skip_states_per_job.delete job.id
            @job_callbacks.delete job.id
        end

        @pending_job_counter -= @pending_jobs[job.id]
        @pending_jobs[job.id] = 0

        if @pending_job_counter <= 0
            @pending_job_counter = 0
            @done_signal << nil
        end
    end

    true
end

#job_done?(job, fail_if_not_found = true) ⇒ Bool

Returns ‘true` if the `job` has been marked as finished, `false` otherwise.

Parameters:

Returns:

  • (Bool)

    ‘true` if the `job` has been marked as finished, `false` otherwise.

Raises:



230
231
232
233
234
235
236
237
238
# File 'lib/arachni/browser_cluster.rb', line 230

def job_done?( job, fail_if_not_found = true )
    return false if job.never_ending?

    synchronize do
        fail_if_job_not_found job if fail_if_not_found
        return false if !@pending_jobs.include?( job.id )
        @pending_jobs[job.id] == 0
    end
end

#popJob

Returns Pops a job from the queue.

Returns:

  • (Job)

    Pops a job from the queue.

See Also:



298
299
300
301
# File 'lib/arachni/browser_cluster.rb', line 298

def pop
    {} while job_done?( job = @jobs.pop )
    job
end

#push_to_sitemap(url, code) ⇒ Object



333
334
335
# File 'lib/arachni/browser_cluster.rb', line 333

def push_to_sitemap( url, code )
    synchronize { @sitemap[url] = code }
end

#queue(job, &block) ⇒ Object

Parameters:

Raises:



146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
# File 'lib/arachni/browser_cluster.rb', line 146

def queue( job, &block )
    fail_if_shutdown
    fail_if_job_done job

    @done_signal.clear

    synchronize do
        print_debug "Queueing: #{job}"

        @pending_job_counter  += 1
        @pending_jobs[job.id] += 1
        @job_callbacks[job.id] = block if block

        if !@job_callbacks[job.id]
            fail ArgumentError, "No callback set for job ID #{job.id}."
        end

        @jobs << job
    end

    nil
end

#shutdown(wait = true) ⇒ Object

Shuts the cluster down.



273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
# File 'lib/arachni/browser_cluster.rb', line 273

def shutdown( wait = true )
    @shutdown = true

    # Clear the jobs -- don't forget this, it also removes the disk files for
    # the contained items.
    @jobs.clear

    # Kill the browsers.
    @workers.each { |b| exception_jail( false ) { b.shutdown wait } }
    @workers.clear

    # Very important to leave these for last, they may contain data
    # necessary to cleanly handle interrupted jobs.
    @job_callbacks.clear
    @skip_states_per_job.clear
    @pending_jobs.clear

    true
end

#skip_state(job_id, state) ⇒ Object

Used to sync operations between browser workers.

Parameters:

  • job_id (Integer)

    Job ID.

  • state (String)

    State to skip in the future.



328
329
330
# File 'lib/arachni/browser_cluster.rb', line 328

def skip_state( job_id, state )
    synchronize { skip_states( job_id ) << state }
end

#skip_state?(job_id, state) ⇒ Boolean

Used to sync operations between browser workers.

Parameters:

  • job_id (Integer)

    Job ID.

  • state (String)

    Should the given state be skipped?

Returns:

  • (Boolean)

Raises:



314
315
316
317
318
# File 'lib/arachni/browser_cluster.rb', line 314

def skip_state?( job_id, state )
    synchronize do
        skip_states( job_id ).include? state
    end
end

#skip_states(id) ⇒ Object



343
344
345
346
347
348
# File 'lib/arachni/browser_cluster.rb', line 343

def skip_states( id )
    synchronize do
        @skip_states_per_job[id] ||=
            Support::LookUp::HashSet.new( hasher: :persistent_hash )
    end
end

#trace_taint(resource, options = {}, &block) ⇒ Object

Parameters:

See Also:



196
197
198
# File 'lib/arachni/browser_cluster.rb', line 196

def trace_taint( resource, options = {}, &block )
    queue( Jobs::TaintTrace.new( options.merge( resource: resource ) ), &block )
end

#update_skip_states(id, lookups) ⇒ Object



338
339
340
# File 'lib/arachni/browser_cluster.rb', line 338

def update_skip_states( id, lookups )
    synchronize { skip_states( id ).merge lookups }
end

#waitObject

Blocks until all resources have been analyzed.



266
267
268
269
270
# File 'lib/arachni/browser_cluster.rb', line 266

def wait
    fail_if_shutdown
    @done_signal.pop if !done?
    self
end

#with_browser(&block) ⇒ Object

Note:

Operates in non-blocking mode.

Parameters:

  • block (Block)

    Block to which to pass a Worker as soon as one is available.



136
137
138
# File 'lib/arachni/browser_cluster.rb', line 136

def with_browser( &block )
    queue( Jobs::BrowserProvider.new, &block )
end