Class: Arachni::BrowserCluster
- Includes:
- UI::Output, Utilities
- Defined in:
- lib/arachni/browser_cluster.rb,
lib/arachni/browser_cluster/job.rb,
lib/arachni/browser_cluster/worker.rb,
lib/arachni/browser_cluster/job/result.rb,
lib/arachni/browser_cluster/jobs/taint_trace.rb,
lib/arachni/browser_cluster/jobs/browser_provider.rb,
lib/arachni/browser_cluster/jobs/taint_trace/result.rb,
lib/arachni/browser_cluster/jobs/resource_exploration.rb,
lib/arachni/browser_cluster/jobs/taint_trace/event_trigger.rb,
lib/arachni/browser_cluster/jobs/resource_exploration/result.rb,
lib/arachni/browser_cluster/jobs/taint_trace/event_trigger/result.rb,
lib/arachni/browser_cluster/jobs/resource_exploration/event_trigger.rb,
lib/arachni/browser_cluster/jobs/resource_exploration/event_trigger/result.rb
Overview
Real browser driver providing DOM/JS/AJAX support.
Defined Under Namespace
Modules: Jobs Classes: Error, Job, Worker
Instance Attribute Summary collapse
-
#consumed_pids ⇒ Object
readonly
Returns the value of attribute consumed_pids.
-
#pending_job_counter ⇒ Integer
readonly
Number of pending jobs.
-
#pool_size ⇒ Integer
readonly
Amount of browser instances in the pool.
-
#sitemap ⇒ Hash<String, Integer>
readonly
List of crawled URLs with their HTTP codes.
-
#workers ⇒ Array<Worker>
readonly
Worker pool.
Instance Method Summary collapse
- #callback_for(job) ⇒ Object
- #decrease_pending_job(job) ⇒ Object
-
#done? ⇒ Bool
‘true` if there are no resources to analyze and no running workers.
- #explore(resource, options = {}, &block) ⇒ Object
- #handle_job_result(result) ⇒ Object
-
#initialize(options = {}) ⇒ BrowserCluster
constructor
A new instance of BrowserCluster.
-
#javascript_token ⇒ String
Javascript token used to namespace the custom JS environment.
- #job_done(job) ⇒ Object
-
#job_done?(job, fail_if_not_found = true) ⇒ Bool
‘true` if the `job` has been marked as finished, `false` otherwise.
-
#pop ⇒ Job
Pops a job from the queue.
- #push_to_sitemap(url, code) ⇒ Object
- #queue(job, &block) ⇒ Object
-
#shutdown(wait = true) ⇒ Object
Shuts the cluster down.
-
#skip_state(job_id, state) ⇒ Object
Used to sync operations between browser workers.
-
#skip_state?(job_id, state) ⇒ Boolean
Used to sync operations between browser workers.
- #skip_states(id) ⇒ Object
- #trace_taint(resource, options = {}, &block) ⇒ Object
- #update_skip_states(id, lookups) ⇒ Object
-
#wait ⇒ Object
Blocks until all resources have been analyzed.
- #with_browser(&block) ⇒ Object
Methods included from Utilities
#available_port, #bytes_to_kilobytes, #bytes_to_megabytes, #caller_name, #caller_path, #cookie_decode, #cookie_encode, #cookies_from_document, #cookies_from_file, #cookies_from_response, #exception_jail, #exclude_path?, #follow_protocol?, #form_decode, #form_encode, #forms_from_document, #forms_from_response, #full_and_absolute_url?, #generate_token, #get_path, #hms_to_seconds, #html_decode, #html_encode, #include_path?, #links_from_document, #links_from_response, #normalize_url, #page_from_response, #page_from_url, #parse_set_cookie, #path_in_domain?, #path_too_deep?, #port_available?, #rand_port, #random_seed, #redundant_path?, #regexp_array_match, #remove_constants, #request_parse_body, #seconds_to_hms, #skip_page?, #skip_path?, #skip_resource?, #skip_response?, #to_absolute, #uri_decode, #uri_encode, #uri_parse, #uri_parse_query, #uri_parser, #uri_rewrite
Methods included from UI::Output
#debug?, #debug_off, #debug_on, #disable_only_positives, #included, #mute, #muted?, #only_positives, #only_positives?, #print_bad, #print_debug, #print_debug_backtrace, #print_debug_level_1, #print_debug_level_2, #print_debug_level_3, #print_error, #print_error_backtrace, #print_exception, #print_info, #print_line, #print_ok, #print_status, #print_verbose, #reroute_to_file, #reroute_to_file?, reset_output_options, #unmute, #verbose?, #verbose_on
Constructor Details
#initialize(options = {}) ⇒ BrowserCluster
Returns a new instance of BrowserCluster.
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
# File 'lib/arachni/browser_cluster.rb', line 83 def initialize( = {} ) super() { pool_size: Options.browser_cluster.pool_size }.merge( ).each do |k, v| begin send( "#{k}=", try_dup( v ) ) rescue NoMethodError instance_variable_set( "@#{k}".to_sym, v ) end end # Used to sync operations between workers per Job#id. @skip_states_per_job = {} # Callbacks for each job per Job#id. We need to keep track of this # here because jobs are serialized and off-loaded to disk and thus can't # contain Block or Proc objects. @job_callbacks = {} # Keeps track of the amount of pending jobs distributed across the # cluster, by Job#id. Once a job's count reaches 0, it's passed to # #job_done. @pending_jobs = Hash.new(0) @pending_job_counter = 0 # Jobs are off-loaded to disk. @jobs = Support::Database::Queue.new # Worker pool holding BrowserCluster::Worker instances. @workers = [] # Stores visited resources from all workers. @sitemap = {} @mutex = Monitor.new @done_signal = Queue.new @consumed_pids = [] initialize_workers end |
Instance Attribute Details
#consumed_pids ⇒ Object (readonly)
Returns the value of attribute consumed_pids.
72 73 74 |
# File 'lib/arachni/browser_cluster.rb', line 72 def consumed_pids @consumed_pids end |
#pending_job_counter ⇒ Integer (readonly)
Returns Number of pending jobs.
70 71 72 |
# File 'lib/arachni/browser_cluster.rb', line 70 def pending_job_counter @pending_job_counter end |
#pool_size ⇒ Integer (readonly)
Returns Amount of browser instances in the pool.
58 59 60 |
# File 'lib/arachni/browser_cluster.rb', line 58 def pool_size @pool_size end |
Instance Method Details
#callback_for(job) ⇒ Object
372 373 374 |
# File 'lib/arachni/browser_cluster.rb', line 372 def callback_for( job ) @job_callbacks[job.id] end |
#decrease_pending_job(job) ⇒ Object
363 364 365 366 367 368 369 |
# File 'lib/arachni/browser_cluster.rb', line 363 def decrease_pending_job( job ) synchronize do @pending_job_counter -= 1 @pending_jobs[job.id] -= 1 job_done( job ) if @pending_jobs[job.id] <= 0 end end |
#done? ⇒ Bool
Returns ‘true` if there are no resources to analyze and no running workers.
271 272 273 274 |
# File 'lib/arachni/browser_cluster.rb', line 271 def done? fail_if_shutdown @pending_job_counter == 0 end |
#explore(resource, options = {}, &block) ⇒ Object
188 189 190 191 192 193 |
# File 'lib/arachni/browser_cluster.rb', line 188 def explore( resource, = {}, &block ) queue( Jobs::ResourceExploration.new( .merge( resource: resource ) ), &block ) end |
#handle_job_result(result) ⇒ Object
254 255 256 257 258 259 260 261 262 263 264 265 266 267 |
# File 'lib/arachni/browser_cluster.rb', line 254 def handle_job_result( result ) return if @shutdown return if job_done? result.job synchronize do print_debug "Got job result: #{result}" exception_jail( false ) do @job_callbacks[result.job.id].call result end end nil end |
#javascript_token ⇒ String
Returns Javascript token used to namespace the custom JS environment.
127 128 129 |
# File 'lib/arachni/browser_cluster.rb', line 127 def javascript_token Browser::Javascript::TOKEN end |
#job_done(job) ⇒ Object
212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 |
# File 'lib/arachni/browser_cluster.rb', line 212 def job_done( job ) synchronize do print_debug "Job done: #{job}" notify_on_job_done job if !job.never_ending? @skip_states_per_job.delete job.id @job_callbacks.delete job.id end @pending_job_counter -= @pending_jobs[job.id] @pending_jobs[job.id] = 0 if @pending_job_counter <= 0 @pending_job_counter = 0 @done_signal << nil end end true end |
#job_done?(job, fail_if_not_found = true) ⇒ Bool
Returns ‘true` if the `job` has been marked as finished, `false` otherwise.
241 242 243 244 245 246 247 248 249 |
# File 'lib/arachni/browser_cluster.rb', line 241 def job_done?( job, fail_if_not_found = true ) return false if job.never_ending? synchronize do fail_if_job_not_found job if fail_if_not_found return false if !@pending_jobs.include?( job.id ) @pending_jobs[job.id] == 0 end end |
#pop ⇒ Job
Returns Pops a job from the queue.
309 310 311 312 313 |
# File 'lib/arachni/browser_cluster.rb', line 309 def pop {} while job_done?( job = @jobs.pop ) notify_on_pop job job end |
#push_to_sitemap(url, code) ⇒ Object
345 346 347 |
# File 'lib/arachni/browser_cluster.rb', line 345 def push_to_sitemap( url, code ) synchronize { @sitemap[url] = code } end |
#queue(job, &block) ⇒ Object
145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 |
# File 'lib/arachni/browser_cluster.rb', line 145 def queue( job, &block ) fail_if_shutdown fail_if_job_done job @done_signal.clear synchronize do print_debug "Queueing: #{job}" notify_on_queue job @pending_job_counter += 1 @pending_jobs[job.id] += 1 @job_callbacks[job.id] = block if block if !@job_callbacks[job.id] fail ArgumentError, "No callback set for job ID #{job.id}." end @jobs << job end nil end |
#shutdown(wait = true) ⇒ Object
Shuts the cluster down.
284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 |
# File 'lib/arachni/browser_cluster.rb', line 284 def shutdown( wait = true ) @shutdown = true # Clear the jobs -- don't forget this, it also removes the disk files for # the contained items. @jobs.clear # Kill the browsers. @workers.each { |b| exception_jail( false ) { b.shutdown wait } } @workers.clear # Very important to leave these for last, they may contain data # necessary to cleanly handle interrupted jobs. @job_callbacks.clear @skip_states_per_job.clear @pending_jobs.clear true end |
#skip_state(job_id, state) ⇒ Object
Used to sync operations between browser workers.
340 341 342 |
# File 'lib/arachni/browser_cluster.rb', line 340 def skip_state( job_id, state ) synchronize { skip_states( job_id ) << state } end |
#skip_state?(job_id, state) ⇒ Boolean
Used to sync operations between browser workers.
326 327 328 329 330 |
# File 'lib/arachni/browser_cluster.rb', line 326 def skip_state?( job_id, state ) synchronize do skip_states( job_id ).include? state end end |
#skip_states(id) ⇒ Object
355 356 357 358 359 360 |
# File 'lib/arachni/browser_cluster.rb', line 355 def skip_states( id ) synchronize do @skip_states_per_job[id] ||= Support::LookUp::HashSet.new( hasher: :persistent_hash ) end end |
#trace_taint(resource, options = {}, &block) ⇒ Object
205 206 207 |
# File 'lib/arachni/browser_cluster.rb', line 205 def trace_taint( resource, = {}, &block ) queue( Jobs::TaintTrace.new( .merge( resource: resource ) ), &block ) end |
#update_skip_states(id, lookups) ⇒ Object
350 351 352 |
# File 'lib/arachni/browser_cluster.rb', line 350 def update_skip_states( id, lookups ) synchronize { skip_states( id ).merge lookups } end |
#wait ⇒ Object
Blocks until all resources have been analyzed.
277 278 279 280 281 |
# File 'lib/arachni/browser_cluster.rb', line 277 def wait fail_if_shutdown @done_signal.pop if !done? self end |
#with_browser(&block) ⇒ Object
Operates in non-blocking mode.
135 136 137 |
# File 'lib/arachni/browser_cluster.rb', line 135 def with_browser( &block ) queue( Jobs::BrowserProvider.new, &block ) end |