Class: Arachni::BrowserCluster

Inherits:
Object
  • Object
show all
Includes:
UI::Output, Utilities
Defined in:
lib/arachni/browser_cluster.rb,
lib/arachni/browser_cluster/job.rb,
lib/arachni/browser_cluster/worker.rb,
lib/arachni/browser_cluster/job/result.rb,
lib/arachni/browser_cluster/jobs/taint_trace.rb,
lib/arachni/browser_cluster/jobs/browser_provider.rb,
lib/arachni/browser_cluster/jobs/taint_trace/result.rb,
lib/arachni/browser_cluster/jobs/resource_exploration.rb,
lib/arachni/browser_cluster/jobs/taint_trace/event_trigger.rb,
lib/arachni/browser_cluster/jobs/resource_exploration/result.rb,
lib/arachni/browser_cluster/jobs/taint_trace/event_trigger/result.rb,
lib/arachni/browser_cluster/jobs/resource_exploration/event_trigger.rb,
lib/arachni/browser_cluster/jobs/resource_exploration/event_trigger/result.rb

Overview

Real browser driver providing DOM/JS/AJAX support.

Author:

Defined Under Namespace

Modules: Jobs Classes: Error, Job, Worker

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Utilities

#available_port, #bytes_to_kilobytes, #bytes_to_megabytes, #caller_name, #caller_path, #cookie_decode, #cookie_encode, #cookies_from_document, #cookies_from_file, #cookies_from_response, #exception_jail, #exclude_path?, #follow_protocol?, #form_decode, #form_encode, #forms_from_document, #forms_from_response, #full_and_absolute_url?, #generate_token, #get_path, #hms_to_seconds, #html_decode, #html_encode, #include_path?, #links_from_document, #links_from_response, #normalize_url, #page_from_response, #page_from_url, #parse_set_cookie, #path_in_domain?, #path_too_deep?, #port_available?, #rand_port, #random_seed, #redundant_path?, #regexp_array_match, #remove_constants, #request_parse_body, #seconds_to_hms, #skip_page?, #skip_path?, #skip_resource?, #skip_response?, #to_absolute, #uri_decode, #uri_encode, #uri_parse, #uri_parse_query, #uri_parser, #uri_rewrite

Methods included from UI::Output

#debug?, #debug_off, #debug_on, #disable_only_positives, #included, #mute, #muted?, #only_positives, #only_positives?, #print_bad, #print_debug, #print_debug_backtrace, #print_debug_level_1, #print_debug_level_2, #print_debug_level_3, #print_error, #print_error_backtrace, #print_exception, #print_info, #print_line, #print_ok, #print_status, #print_verbose, #reroute_to_file, #reroute_to_file?, reset_output_options, #unmute, #verbose?, #verbose_on

Constructor Details

#initialize(options = {}) ⇒ BrowserCluster

Returns a new instance of BrowserCluster.

Parameters:

  • options (Hash) (defaults to: {})

Options Hash (options):

  • :pool_size (Integer) — default: 5

    Amount of browsers to add to the pool.

  • :time_to_live (Integer) — default: 10

    Restricts each browser’s lifetime to the given amount of pages. When that number is exceeded the current process is killed and a new one is pushed to the pool. Helps prevent memory leak issues.

Raises:

  • ArgumentError On missing ‘:handler` option.



83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# File 'lib/arachni/browser_cluster.rb', line 83

def initialize( options = {} )
    super()

    {
        pool_size: Options.browser_cluster.pool_size
    }.merge( options ).each do |k, v|
        begin
            send( "#{k}=", try_dup( v ) )
        rescue NoMethodError
            instance_variable_set( "@#{k}".to_sym, v )
        end
    end

    # Used to sync operations between workers per Job#id.
    @skip_states_per_job = {}

    # Callbacks for each job per Job#id. We need to keep track of this
    # here because jobs are serialized and off-loaded to disk and thus can't
    # contain Block or Proc objects.
    @job_callbacks = {}

    # Keeps track of the amount of pending jobs distributed across the
    # cluster, by Job#id. Once a job's count reaches 0, it's passed to
    # #job_done.
    @pending_jobs = Hash.new(0)
    @pending_job_counter = 0

    # Jobs are off-loaded to disk.
    @jobs = Support::Database::Queue.new

    # Worker pool holding BrowserCluster::Worker instances.
    @workers     = []

    # Stores visited resources from all workers.
    @sitemap     = {}
    @mutex       = Monitor.new
    @done_signal = Queue.new

    @consumed_pids = []
    initialize_workers
end

Instance Attribute Details

#consumed_pidsObject (readonly)

Returns the value of attribute consumed_pids.



72
73
74
# File 'lib/arachni/browser_cluster.rb', line 72

def consumed_pids
  @consumed_pids
end

#pending_job_counterInteger (readonly)

Returns Number of pending jobs.

Returns:

  • (Integer)

    Number of pending jobs.



70
71
72
# File 'lib/arachni/browser_cluster.rb', line 70

def pending_job_counter
  @pending_job_counter
end

#pool_sizeInteger (readonly)

Returns Amount of browser instances in the pool.

Returns:

  • (Integer)

    Amount of browser instances in the pool.



58
59
60
# File 'lib/arachni/browser_cluster.rb', line 58

def pool_size
  @pool_size
end

#sitemapHash<String, Integer> (readonly)

Returns List of crawled URLs with their HTTP codes.

Returns:

  • (Hash<String, Integer>)

    List of crawled URLs with their HTTP codes.



62
63
64
# File 'lib/arachni/browser_cluster.rb', line 62

def sitemap
  @sitemap
end

#workersArray<Worker> (readonly)

Returns Worker pool.

Returns:



66
67
68
# File 'lib/arachni/browser_cluster.rb', line 66

def workers
  @workers
end

Instance Method Details

#callback_for(job) ⇒ Object



372
373
374
# File 'lib/arachni/browser_cluster.rb', line 372

def callback_for( job )
    @job_callbacks[job.id]
end

#decrease_pending_job(job) ⇒ Object



363
364
365
366
367
368
369
# File 'lib/arachni/browser_cluster.rb', line 363

def decrease_pending_job( job )
    synchronize do
        @pending_job_counter  -= 1
        @pending_jobs[job.id] -= 1
        job_done( job ) if @pending_jobs[job.id] <= 0
    end
end

#done?Bool

Returns ‘true` if there are no resources to analyze and no running workers.

Returns:

  • (Bool)

    ‘true` if there are no resources to analyze and no running workers.



271
272
273
274
# File 'lib/arachni/browser_cluster.rb', line 271

def done?
    fail_if_shutdown
    @pending_job_counter == 0
end

#explore(resource, options = {}, &block) ⇒ Object

Parameters:

See Also:



188
189
190
191
192
193
# File 'lib/arachni/browser_cluster.rb', line 188

def explore( resource, options = {}, &block )
    queue(
        Jobs::ResourceExploration.new( options.merge( resource: resource ) ),
        &block
    )
end

#handle_job_result(result) ⇒ Object

Parameters:



254
255
256
257
258
259
260
261
262
263
264
265
266
267
# File 'lib/arachni/browser_cluster.rb', line 254

def handle_job_result( result )
    return if @shutdown
    return if job_done? result.job

    synchronize do
        print_debug "Got job result: #{result}"

        exception_jail( false ) do
            @job_callbacks[result.job.id].call result
        end
    end

    nil
end

#javascript_tokenString

Returns Javascript token used to namespace the custom JS environment.

Returns:

  • (String)

    Javascript token used to namespace the custom JS environment.



127
128
129
# File 'lib/arachni/browser_cluster.rb', line 127

def javascript_token
    Browser::Javascript::TOKEN
end

#job_done(job) ⇒ Object

Parameters:

  • job (Job)

    Job to mark as done. Will remove any callbacks and associated Worker states.



212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
# File 'lib/arachni/browser_cluster.rb', line 212

def job_done( job )
    synchronize do
        print_debug "Job done: #{job}"

        notify_on_job_done job

        if !job.never_ending?
            @skip_states_per_job.delete job.id
            @job_callbacks.delete job.id
        end

        @pending_job_counter -= @pending_jobs[job.id]
        @pending_jobs[job.id] = 0

        if @pending_job_counter <= 0
            @pending_job_counter = 0
            @done_signal << nil
        end
    end

    true
end

#job_done?(job, fail_if_not_found = true) ⇒ Bool

Returns ‘true` if the `job` has been marked as finished, `false` otherwise.

Parameters:

Returns:

  • (Bool)

    ‘true` if the `job` has been marked as finished, `false` otherwise.

Raises:



241
242
243
244
245
246
247
248
249
# File 'lib/arachni/browser_cluster.rb', line 241

def job_done?( job, fail_if_not_found = true )
    return false if job.never_ending?

    synchronize do
        fail_if_job_not_found job if fail_if_not_found
        return false if !@pending_jobs.include?( job.id )
        @pending_jobs[job.id] == 0
    end
end

#popJob

Returns Pops a job from the queue.

Returns:

  • (Job)

    Pops a job from the queue.

See Also:



309
310
311
312
313
# File 'lib/arachni/browser_cluster.rb', line 309

def pop
    {} while job_done?( job = @jobs.pop )
    notify_on_pop job
    job
end

#push_to_sitemap(url, code) ⇒ Object



345
346
347
# File 'lib/arachni/browser_cluster.rb', line 345

def push_to_sitemap( url, code )
    synchronize { @sitemap[url] = code }
end

#queue(job, &block) ⇒ Object

Parameters:

Raises:



145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
# File 'lib/arachni/browser_cluster.rb', line 145

def queue( job, &block )
    fail_if_shutdown
    fail_if_job_done job

    @done_signal.clear

    synchronize do
        print_debug "Queueing: #{job}"

        notify_on_queue job

        @pending_job_counter  += 1
        @pending_jobs[job.id] += 1
        @job_callbacks[job.id] = block if block

        if !@job_callbacks[job.id]
            fail ArgumentError, "No callback set for job ID #{job.id}."
        end

        @jobs << job
    end

    nil
end

#shutdown(wait = true) ⇒ Object

Shuts the cluster down.



284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
# File 'lib/arachni/browser_cluster.rb', line 284

def shutdown( wait = true )
    @shutdown = true

    # Clear the jobs -- don't forget this, it also removes the disk files for
    # the contained items.
    @jobs.clear

    # Kill the browsers.
    @workers.each { |b| exception_jail( false ) { b.shutdown wait } }
    @workers.clear

    # Very important to leave these for last, they may contain data
    # necessary to cleanly handle interrupted jobs.
    @job_callbacks.clear
    @skip_states_per_job.clear
    @pending_jobs.clear

    true
end

#skip_state(job_id, state) ⇒ Object

Used to sync operations between browser workers.

Parameters:

  • job_id (Integer)

    Job ID.

  • state (String)

    State to skip in the future.



340
341
342
# File 'lib/arachni/browser_cluster.rb', line 340

def skip_state( job_id, state )
    synchronize { skip_states( job_id ) << state }
end

#skip_state?(job_id, state) ⇒ Boolean

Used to sync operations between browser workers.

Parameters:

  • job_id (Integer)

    Job ID.

  • state (String)

    Should the given state be skipped?

Returns:

  • (Boolean)

Raises:



326
327
328
329
330
# File 'lib/arachni/browser_cluster.rb', line 326

def skip_state?( job_id, state )
    synchronize do
        skip_states( job_id ).include? state
    end
end

#skip_states(id) ⇒ Object



355
356
357
358
359
360
# File 'lib/arachni/browser_cluster.rb', line 355

def skip_states( id )
    synchronize do
        @skip_states_per_job[id] ||=
            Support::LookUp::HashSet.new( hasher: :persistent_hash )
    end
end

#trace_taint(resource, options = {}, &block) ⇒ Object

Parameters:

See Also:



205
206
207
# File 'lib/arachni/browser_cluster.rb', line 205

def trace_taint( resource, options = {}, &block )
    queue( Jobs::TaintTrace.new( options.merge( resource: resource ) ), &block )
end

#update_skip_states(id, lookups) ⇒ Object



350
351
352
# File 'lib/arachni/browser_cluster.rb', line 350

def update_skip_states( id, lookups )
    synchronize { skip_states( id ).merge lookups }
end

#waitObject

Blocks until all resources have been analyzed.



277
278
279
280
281
# File 'lib/arachni/browser_cluster.rb', line 277

def wait
    fail_if_shutdown
    @done_signal.pop if !done?
    self
end

#with_browser(&block) ⇒ Object

Note:

Operates in non-blocking mode.

Parameters:

  • block (Block)

    Block to which to pass a Worker as soon as one is available.



135
136
137
# File 'lib/arachni/browser_cluster.rb', line 135

def with_browser( &block )
    queue( Jobs::BrowserProvider.new, &block )
end