Class: Arachni::RPC::Server::Spider
- Defined in:
- lib/arachni/rpc/server/spider.rb
Overview
Extends the regular Spider with high-performance distributed capabilities.
Constant Summary collapse
- BUFFER_SIZE =
Amount of URLs to buffer before distributing.
1000
- FILLUP_ATTEMPTS =
How many times to try and fill the buffer before distributing what’s in it.
200
Constants inherited from Spider
Instance Attribute Summary
Attributes inherited from Spider
Instance Method Summary collapse
- #after_each_run(&block) ⇒ Object
- #clear_distribution_filter ⇒ Object
-
#initialize(framework) ⇒ Spider
constructor
A new instance of Spider.
-
#local_sitemap ⇒ Hash<String, Integer>
URLs crawled by this Instance, along with their HTTP status codes.
- #on_first_run(&block) ⇒ Object
-
#peer_done(url) ⇒ Object
Sets a peer crawler’s state to finished.
- #run(*args) ⇒ Object
-
#signal_if_done(master) ⇒ Object
Signals the ‘master` Instance that this crawler has finished.
-
#sitemap ⇒ Array<String>
Crawled URLs.
-
#update_peers(peers, &block) ⇒ Object
Updates the list of Instances to assist in the crawl.
Methods inherited from Spider
#done?, #fancy_sitemap, #idle?, #on_complete, #on_each_page, #on_each_response, #paths, #pause, #paused?, #push, #resume, #running?, #url
Methods included from Utilities
#available_port, #cookie_encode, #cookies_from_document, #cookies_from_file, #cookies_from_response, #exception_jail, #exclude_path?, #extract_domain, #follow_protocol?, #form_decode, #form_encode, #form_parse_request_body, #forms_from_document, #forms_from_response, #generate_token, #get_path, #html_decode, #html_encode, #include_path?, #links_from_document, #links_from_response, #normalize_url, #page_from_response, #page_from_url, #parse_query, #parse_set_cookie, #parse_url_vars, #path_in_domain?, #path_too_deep?, #port_available?, #rand_port, #redundant_path?, #remove_constants, #seed, #skip_page?, #skip_path?, #skip_resource?, #to_absolute, #uri_decode, #uri_encode, #uri_parse, #uri_parser, #url_sanitize
Methods included from UI::Output
#debug?, #debug_off, #debug_on, #disable_only_positives, #error_logfile, #flush_buffer, #log_error, #mute, #muted?, old_reset_output_options, #only_positives, #only_positives?, #print_bad, #print_debug, #print_debug_backtrace, #print_debug_pp, #print_error, #print_error_backtrace, #print_info, #print_line, #print_ok, #print_status, #print_verbose, #reroute_to_file, #reroute_to_file?, reset_output_options, #set_buffer_cap, #set_error_logfile, #uncap_buffer, #unmute, #verbose, #verbose?
Constructor Details
#initialize(framework) ⇒ Spider
Returns a new instance of Spider.
39 40 41 42 43 44 45 46 47 48 49 50 |
# File 'lib/arachni/rpc/server/spider.rb', line 39 def initialize( framework ) super( framework.opts ) @framework = framework @peers = {} @done_signals = Hash.new( true ) @distribution_filter = Support::LookUp::Moolb.new @after_each_run_blocks = [] @on_first_run_blocks = [] end |
Instance Method Details
#after_each_run(&block) ⇒ Object
58 59 60 |
# File 'lib/arachni/rpc/server/spider.rb', line 58 def after_each_run( &block ) @after_each_run_blocks << block end |
#clear_distribution_filter ⇒ Object
52 53 54 |
# File 'lib/arachni/rpc/server/spider.rb', line 52 def clear_distribution_filter @distribution_filter.clear end |
#local_sitemap ⇒ Hash<String, Integer>
Returns URLs crawled by this Instance, along with their HTTP status codes.
137 138 139 |
# File 'lib/arachni/rpc/server/spider.rb', line 137 def local_sitemap @sitemap end |
#on_first_run(&block) ⇒ Object
64 65 66 |
# File 'lib/arachni/rpc/server/spider.rb', line 64 def on_first_run( &block ) @on_first_run_blocks << block end |
#peer_done(url) ⇒ Object
Sets a peer crawler’s state to finished. Exposed so that peers can signal the master once they’re done.
152 153 154 155 156 |
# File 'lib/arachni/rpc/server/spider.rb', line 152 def peer_done( url ) @done_signals[url] = true master_done_handler true end |
#run(*args) ⇒ Object
69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
# File 'lib/arachni/rpc/server/spider.rb', line 69 def run( *args ) @first_run_blocks ||= call_on_first_run if !solo? on_complete_blocks = @on_complete_blocks.dup @on_complete_blocks.clear end super( *args ) flush_url_distribution_buffer master_done_handler if slave? call_after_each_run end if !solo? @on_complete_blocks = on_complete_blocks.dup end sitemap end |
#signal_if_done(master) ⇒ Object
Signals the ‘master` Instance that this crawler has finished.
163 164 165 166 |
# File 'lib/arachni/rpc/server/spider.rb', line 163 def signal_if_done( master ) return if !done? master.spider.peer_done( framework.multi_self_url ){} end |
#sitemap ⇒ Array<String>
Returns Crawled URLs.
142 143 144 |
# File 'lib/arachni/rpc/server/spider.rb', line 142 def sitemap @distributed_sitemap || super end |
#update_peers(peers, &block) ⇒ Object
Updates the list of Instances to assist in the crawl.
103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
# File 'lib/arachni/rpc/server/spider.rb', line 103 def update_peers( peers, &block ) @peers_array = peers sorted_peers = @peers_array.inject( {} ) do |h, p| h[p[:url]] = framework.connect_to_instance( p ) h end.sort @peers = Hash[sorted_peers] @peers[framework.multi_self_url] = framework @peers = Hash[@peers.sort] @peer_urls = @peers.keys @peer_clients = @peers.values if !master? block.call if block_given? return true end each = proc do |peer, iter| peer.spider.update_peers( @peers_array | [self_instance_info] ) { iter.return } end map_peers( each, proc { block.call if block_given? } ) true end |