Module: Msf::Auxiliary::HttpCrawler
- Includes:
- Report
- Defined in:
- lib/msf/core/auxiliary/http_crawler.rb
Overview
This module provides methods for implementing a web crawler
Defined Under Namespace
Classes: MaximumPageCount, WebTarget
Instance Attribute Summary collapse
-
#form_count ⇒ Object
Returns the value of attribute form_count.
-
#request_count ⇒ Object
Returns the value of attribute request_count.
-
#targets ⇒ Object
Some accessors for stat tracking.
-
#url_count ⇒ Object
Returns the value of attribute url_count.
-
#url_total ⇒ Object
Returns the value of attribute url_total.
Instance Method Summary collapse
- #cleanup ⇒ Object
- #crawl_target(t) ⇒ Object
- #crawler_options(t) ⇒ Object
-
#crawler_process_page(t, page, cnt) ⇒ Object
Specific module implementations should redefine this method with whatever is meaningful to them.
- #dirbust? ⇒ Boolean
- #focus_crawl(page) ⇒ Object
- #get_connection_timeout ⇒ Object
-
#get_link_filter ⇒ Object
Scrub links that end in these extensions.
- #initialize(info = {}) ⇒ Object
- #max_crawl_threads ⇒ Object
- #max_crawl_time ⇒ Object
- #max_page_count ⇒ Object
-
#proxies ⇒ Object
Returns the configured proxy list.
-
#rhost ⇒ Object
Returns the target host.
-
#rport ⇒ Object
Returns the remote port.
-
#run ⇒ Object
Entry point for the crawler code.
- #setup ⇒ Object
-
#ssl ⇒ Object
Returns the boolean indicating SSL.
-
#ssl_version ⇒ Object
Returns the string indicating SSL version.
-
#vhost ⇒ Object
Returns the VHOST of the HTTP server.
Methods included from Report
#active_db?, #create_cracked_credential, #create_credential, #create_credential_and_login, #create_credential_login, #db, #db_warning_given?, #get_client, #get_host, #inside_workspace_boundary?, #invalidate_login, #mytask, #myworkspace, #myworkspace_id, #report_auth_info, #report_client, #report_exploit, #report_host, #report_loot, #report_note, #report_service, #report_vuln, #report_web_form, #report_web_page, #report_web_site, #report_web_vuln, #store_cred, #store_local, #store_loot
Methods included from Metasploit::Framework::Require
optionally, optionally_active_record_railtie, optionally_include_metasploit_credential_creation, #optionally_include_metasploit_credential_creation, optionally_require_metasploit_db_gem_engines
Instance Attribute Details
#form_count ⇒ Object
Returns the value of attribute form_count.
100 101 102 |
# File 'lib/msf/core/auxiliary/http_crawler.rb', line 100 def form_count @form_count end |
#request_count ⇒ Object
Returns the value of attribute request_count.
100 101 102 |
# File 'lib/msf/core/auxiliary/http_crawler.rb', line 100 def request_count @request_count end |
#targets ⇒ Object
Some accessors for stat tracking
99 100 101 |
# File 'lib/msf/core/auxiliary/http_crawler.rb', line 99 def targets @targets end |
#url_count ⇒ Object
Returns the value of attribute url_count.
100 101 102 |
# File 'lib/msf/core/auxiliary/http_crawler.rb', line 100 def url_count @url_count end |
#url_total ⇒ Object
Returns the value of attribute url_total.
100 101 102 |
# File 'lib/msf/core/auxiliary/http_crawler.rb', line 100 def url_total @url_total end |
Instance Method Details
#cleanup ⇒ Object
68 69 70 71 72 73 74 |
# File 'lib/msf/core/auxiliary/http_crawler.rb', line 68 def cleanup if @crawler @crawler.shutdown rescue nil @crawler = nil end super end |
#crawl_target(t) ⇒ Object
191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 |
# File 'lib/msf/core/auxiliary/http_crawler.rb', line 191 def crawl_target(t) cnt = 0 opts = (t) url = t.to_url @crawler = ::Anemone::Core.new([url], opts) @crawler.on_every_page do |page| cnt += 1 self.request_count += 1 # Extract any interesting data from the page crawler_process_page(t, page, cnt) # Blow up if we hit our maximum page count if cnt >= max_page_count print_error("Maximum page count reached for #{url}") raise MaximumPageCount, "Maximum page count reached" end end # Skip link processing based on a regular expression @crawler.skip_links_like( get_link_filter ) # Focus our crawling on interesting, but not over-crawled links @crawler.focus_crawl do |page| focus_crawl(page) end begin @crawler.run rescue MaximumPageCount # No need to print anything else rescue ::Timeout::Error # Bubble this up to the top-level handler raise $! rescue ::Exception => e # Ridiculous f'ing anonymous timeout exception which I've no idea # how it comes into existence. if e.to_s =~ /execution expired/ raise ::Timeout::Error else print_error("Crawler Exception: #{url} #{e} #{e.backtrace}") end ensure @crawler.shutdown rescue nil @crawler = nil end end |
#crawler_options(t) ⇒ Object
269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 |
# File 'lib/msf/core/auxiliary/http_crawler.rb', line 269 def (t) opts = {} opts[:user_agent] = datastore['UserAgent'] opts[:verbose] = false opts[:threads] = max_crawl_threads opts[:obey_robots_txt] = false opts[:redirect_limit] = datastore['RedirectLimit'] opts[:retry_limit] = datastore['RetryLimit'] opts[:accept_cookies] = true opts[:depth_limit] = false opts[:skip_query_strings] = false opts[:discard_page_bodies] = true opts[:framework] = framework opts[:module] = self opts[:timeout] = get_connection_timeout opts[:dirbust] = dirbust? if (t[:headers] and t[:headers].length > 0) opts[:inject_headers] = t[:headers] end if t[:cookies] opts[:cookies] = t[:cookies] end opts[:username] = t[:username] || '' opts[:password] = t[:password] || '' opts[:domain] = t[:domain] || 'WORKSTATION' if ssl opts[:ssl_version] = ssl_version end opts end |
#crawler_process_page(t, page, cnt) ⇒ Object
Specific module implementations should redefine this method with whatever is meaningful to them.
245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 |
# File 'lib/msf/core/auxiliary/http_crawler.rb', line 245 def crawler_process_page(t, page, cnt) return if page.nil? # Skip over pages that don't contain any info aka page is nil. We can't process these types of pages since there is no data to process. msg = "[#{"%.5d" % cnt}/#{"%.5d" % max_page_count}] #{page ? page.code || "ERR" : "ERR"} - #{@current_site.vhost} - #{page.url}" case page.code when 301,302 if page.headers and page.headers["location"] print_status(msg + " -> " + page.headers["location"].to_s) else print_status(msg) end when 500...599 # XXX: Log the fact that we hit an error page print_good(msg) when 401,403 print_good(msg) when 200 print_status(msg) when 404 print_error(msg) else print_error(msg) end end |
#dirbust? ⇒ Boolean
177 178 179 |
# File 'lib/msf/core/auxiliary/http_crawler.rb', line 177 def dirbust? datastore['DirBust'] end |
#focus_crawl(page) ⇒ Object
187 188 189 |
# File 'lib/msf/core/auxiliary/http_crawler.rb', line 187 def focus_crawl(page) page.links end |
#get_connection_timeout ⇒ Object
161 162 163 |
# File 'lib/msf/core/auxiliary/http_crawler.rb', line 161 def get_connection_timeout datastore['RequestTimeout'] end |
#get_link_filter ⇒ Object
Scrub links that end in these extensions. If more or less is desired by a particular module, this should get redefined.
183 184 185 |
# File 'lib/msf/core/auxiliary/http_crawler.rb', line 183 def get_link_filter /\.(js|png|jpe?g|bmp|gif|swf|jar|zip|gz|bz2|rar|pdf|docx?|pptx?)$/i end |
#initialize(info = {}) ⇒ Object
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
# File 'lib/msf/core/auxiliary/http_crawler.rb', line 13 def initialize(info = {}) super ( [ Opt::RHOST, Opt::RPORT(80), OptString.new('VHOST', [ false, "HTTP server virtual host" ]), OptString.new('URI', [ true, "The starting page to crawl", "/"]), Opt::Proxies, OptInt.new('MAX_PAGES', [ true, 'The maximum number of pages to crawl per URL', 500]), OptInt.new('MAX_MINUTES', [ true, 'The maximum number of minutes to spend on each URL', 5]), OptInt.new('MAX_THREADS', [ true, 'The maximum number of concurrent requests', 4]), OptString.new('HttpUsername', [false, 'The HTTP username to specify for authentication']), OptString.new('HttpPassword', [false, 'The HTTP password to specify for authentication']), OptString.new('DOMAIN', [ true, 'The domain to use for windows authentication', 'WORKSTATION']), OptBool.new('SSL', [ false, 'Negotiate SSL/TLS for outgoing connections', false]) ], self.class ) ( [ OptBool.new('DirBust', [ false, 'Bruteforce common URL paths', true]), OptInt.new('RequestTimeout', [false, 'The maximum number of seconds to wait for a reply', 15]), OptInt.new('RedirectLimit', [false, 'The maximum number of redirects for a single request', 5]), OptInt.new('RetryLimit', [false, 'The maximum number of attempts for a single request', 5]), OptString.new('UserAgent', [true, 'The User-Agent header to use for all requests', "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)" ]), OptString.new('BasicAuthUser', [false, 'The HTTP username to specify for basic authentication']), OptString.new('BasicAuthPass', [false, 'The HTTP password to specify for basic authentication']), OptString.new('HTTPAdditionalHeaders', [false, "A list of additional headers to send (separated by \\x01)"]), OptString.new('HTTPCookie', [false, "A HTTP cookie header to send with each request"]), Opt::SSLVersion ], self.class ) register_autofilter_ports([ 80, 8080, 443, 8000, 8888, 8880, 8008, 3000, 8443 ]) register_autofilter_services(%W{ http https }) begin require 'anemone' @anemone_loaded = true rescue ::Exception => e @anemone_loaded = false @anemone_error = e end end |
#max_crawl_threads ⇒ Object
173 174 175 |
# File 'lib/msf/core/auxiliary/http_crawler.rb', line 173 def max_crawl_threads datastore['MAX_THREADS'] end |
#max_crawl_time ⇒ Object
169 170 171 |
# File 'lib/msf/core/auxiliary/http_crawler.rb', line 169 def max_crawl_time datastore['MAX_MINUTES'] * 60.0 end |
#max_page_count ⇒ Object
165 166 167 |
# File 'lib/msf/core/auxiliary/http_crawler.rb', line 165 def max_page_count datastore['MAX_PAGES'] end |
#proxies ⇒ Object
Returns the configured proxy list
349 350 351 |
# File 'lib/msf/core/auxiliary/http_crawler.rb', line 349 def proxies datastore['Proxies'] end |
#rhost ⇒ Object
Returns the target host
314 315 316 |
# File 'lib/msf/core/auxiliary/http_crawler.rb', line 314 def rhost datastore['RHOST'] end |
#rport ⇒ Object
Returns the remote port
321 322 323 |
# File 'lib/msf/core/auxiliary/http_crawler.rb', line 321 def rport datastore['RPORT'] end |
#run ⇒ Object
Entry point for the crawler code
104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 |
# File 'lib/msf/core/auxiliary/http_crawler.rb', line 104 def run self.request_count = 0 self.form_count = 0 self.url_count = 0 self.url_total = 1 path,query = datastore['URI'].split('?', 2) query ||= "" t = WebTarget.new t.merge!({ :vhost => vhost, :host => rhost, :port => rport, :ssl => ssl, :path => path, :query => query, :info => "" }) if datastore['HttpUsername'] and datastore['HttpUsername'] != '' t[:username] = datastore['HttpUsername'].to_s t[:password] = datastore['HttpPassword'].to_s t[:domain] = datastore['DOMAIN'].to_s end if datastore['HTTPCookie'] t[:cookies] = {} datastore['HTTPCookie'].to_s.split(';').each do |pair| k,v = pair.strip.split('=', 2) next if not v t[:cookies][k] = v end end if datastore['HTTPAdditionalHeaders'] t[:headers] = datastore['HTTPAdditionalHeaders'].to_s.split("\x01").select{|x| x.to_s.length > 0} end t[:site] = report_web_site(:wait => true, :host => t[:host], :port => t[:port], :vhost => t[:vhost], :ssl => t[:ssl]) print_status("Crawling #{t.to_url}...") begin @current_vhost = t[:vhost] @current_site = t[:site] ::Timeout.timeout(max_crawl_time) { crawl_target(t) } rescue ::Timeout::Error print_error("Crawl of #{t.to_url} has reached the configured timeout") ensure @current_vhost = nil end print_status("Crawl of #{t.to_url} complete") end |
#setup ⇒ Object
63 64 65 66 |
# File 'lib/msf/core/auxiliary/http_crawler.rb', line 63 def setup raise RuntimeError, "Could not load Anemone/Nokogiri: #{@anemone_error}" if not @anemone_loaded super end |
#ssl ⇒ Object
Returns the boolean indicating SSL
335 336 337 |
# File 'lib/msf/core/auxiliary/http_crawler.rb', line 335 def ssl ((datastore.default?('SSL') and rport.to_i == 443) or datastore['SSL']) end |
#ssl_version ⇒ Object
Returns the string indicating SSL version
342 343 344 |
# File 'lib/msf/core/auxiliary/http_crawler.rb', line 342 def ssl_version datastore['SSLVersion'] end |
#vhost ⇒ Object
Returns the VHOST of the HTTP server.
328 329 330 |
# File 'lib/msf/core/auxiliary/http_crawler.rb', line 328 def vhost datastore['VHOST'] || datastore['RHOST'] end |