Class: Wmap::UrlChecker

Inherits:
Object
  • Object
show all
Includes:
Utils
Defined in:
lib/wmap/url_checker.rb

Overview

A quick checker class to identify / finger-print a URL / site

Constant Summary

Constants included from Wmap::Utils::UrlMagic

Wmap::Utils::UrlMagic::Max_http_timeout, Wmap::Utils::UrlMagic::User_agent

Constants included from Wmap::Utils::DomainRoot

Wmap::Utils::DomainRoot::File_ccsld, Wmap::Utils::DomainRoot::File_cctld, Wmap::Utils::DomainRoot::File_gtld, Wmap::Utils::DomainRoot::File_tld

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Utils

#cidr_2_ips, #file_2_hash, #file_2_list, #get_nameserver, #get_nameservers, #host_2_ip, #host_2_ips, #is_cidr?, #is_fqdn?, #is_ip?, #list_2_file, #reverse_dns_lookup, #sort_ips, #valid_dns_record?, #zone_transferable?

Methods included from Wmap::Utils::Logger

#wlog

Methods included from Wmap::Utils::UrlMagic

#create_absolute_url_from_base, #create_absolute_url_from_context, #host_2_url, #is_site?, #is_ssl?, #is_url?, #landing_location, #make_absolute, #normalize_url, #open_page, #redirect_location, #response_code, #response_headers, #url_2_host, #url_2_path, #url_2_port, #url_2_site, #urls_on_same_domain?

Methods included from Wmap::Utils::DomainRoot

#get_domain_root, #get_domain_root_by_ccsld, #get_domain_root_by_cctld, #get_domain_root_by_tlds, #get_sub_domain, #is_domain_root?, #print_ccsld, #print_cctld, #print_gtld

Constructor Details

#initialize(params = {}) ⇒ UrlChecker

Returns a new instance of UrlChecker.



20
21
22
23
24
25
26
27
28
29
30
31
32
# File 'lib/wmap/url_checker.rb', line 20

def initialize (params = {})
	# Set default instance variables
	@verbose=params.fetch(:verbose, false)
	@data_dir=params.fetch(:data_dir, File.dirname(__FILE__)+'/../../data/')
	Dir.mkdir(@data_dir) unless Dir.exist?(@data_dir)
	@http_timeout=params.fetch(:http_timeout, 5000)
	@max_parallel=params.fetch(:max_parallel, 40)
	@ssl_version=nil
	@url_code={}
	@url_redirection={}
	@url_finger_print={}
	@url_server={}
end

Instance Attribute Details

#data_dirObject

Returns the value of attribute data_dir.



18
19
20
# File 'lib/wmap/url_checker.rb', line 18

def data_dir
  @data_dir
end

#http_timeoutObject

Returns the value of attribute http_timeout.



18
19
20
# File 'lib/wmap/url_checker.rb', line 18

def http_timeout
  @http_timeout
end

#max_parallelObject

Returns the value of attribute max_parallel.



18
19
20
# File 'lib/wmap/url_checker.rb', line 18

def max_parallel
  @max_parallel
end

#verboseObject

Returns the value of attribute verbose.



18
19
20
# File 'lib/wmap/url_checker.rb', line 18

def verbose
  @verbose
end

Instance Method Details

#get_cert_cn(url) ⇒ Object Also known as: get_cn

Retrieve the X509 cert in the clear text from the remote web server, extract and return the common name field within the cert



197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
# File 'lib/wmap/url_checker.rb', line 197

def get_cert_cn (url)
	puts "Extract the common name field from a X509 cert: #{cert}" if @verbose
	cert=get_certificate(url)
	subject, cn = ""
	if cert =~ /\n(.+)Subject\:(.+)\n/i
		subject=$2
	end
	if subject =~/CN\=(.+)/i
		cn=$1
	end
	return cn
rescue Exception => ee
	puts "Error on method #{__method__} from #{url}: #{ee}" if @verbose
	return nil
end

#get_certificate(url) ⇒ Object Also known as: get_cert

Retrieve the remote web server certification, open it and return the cert content as a string



180
181
182
183
184
185
186
187
188
189
190
191
192
193
# File 'lib/wmap/url_checker.rb', line 180

def get_certificate (url)
	puts "Retrieve the remote web server SSL certificate in clear text: #{url}" if @verbose
	url=url.strip
	raise "Invalid URL string: #{url}" unless is_ssl?(url)
	client = HTTPClient.new
	client.ssl_config.verify_mode = OpenSSL::SSL::VERIFY_NONE
	response = client.get(url)
	cert = response.peer_cert
	cer = OpenSSL::X509::Certificate.new(cert)
	return cer.to_text
rescue Exception => ee
	puts "Exception on method #{__method__} from #{url}: #{ee}"
	return nil
end

#get_server_header(url) ⇒ Object

Test the URL / site and return the web server type from the HTTP header “server” field



123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# File 'lib/wmap/url_checker.rb', line 123

def get_server_header (url)
	puts "Retrieve the server header field from the url: #{url}" if @verbose
	server=String.new
	raise "Invalid url: #{url}" unless is_url?(url)
	url=url.strip.downcase
	timeo = @http_timeout/1000.0
	uri = URI.parse(url)
	code = response_code (url)
	http = Net::HTTP.new(uri.host, uri.port)
	http.open_timeout = timeo
	http.read_timeout = timeo
	if (url =~ /https\:/i)
		http.use_ssl = true
		# Bypass the remote web server cert validation test
		http.verify_mode = OpenSSL::SSL::VERIFY_NONE
		http.ssl_version = @ssl_version
	end
	request = Net::HTTP::Get.new(uri.request_uri)
	response = http.request(request)
	server=response["server"]
	server=server.gsub(/\,/,' ')
	return server
rescue Exception => ee
	puts "Exception on method get_server_header for URL #{url}: #{ee}" if @verbose
	@url_server[url]=server
	return server
end

#response_body_md5(url) ⇒ Object Also known as: md5

Use MD5 algorithm to fingerprint the URL / site response payload (web page content)



152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
# File 'lib/wmap/url_checker.rb', line 152

def response_body_md5(url)
	puts "MD5 finger print page body content: #{url}" if @verbose
	raise "Invalid url: #{url}" unless is_url?(url)
	url=url.strip.downcase
	timeo = @http_timeout/1000.0
	uri = URI.parse(url)
	fp=""
	http = Net::HTTP.new(uri.host, uri.port)
	http.open_timeout = timeo
	http.read_timeout = timeo
	if (url =~ /https\:/i)
		http.use_ssl = true
		# Bypass the remote web server cert validation test
		http.verify_mode = OpenSSL::SSL::VERIFY_NONE
		http.ssl_version = @ssl_version
	end
	request = Net::HTTP::Get.new(uri.request_uri)
	response = http.request(request)
	response_body = response.body.to_s
	fp=Digest::MD5.hexdigest(response_body) unless response_body.nil?
	@url_finger_print[url] = fp
	return fp
rescue Exception => ee
	puts "Exception on method #{__method__}: #{ee}" if @verbose
end

#url_worker(url) ⇒ Object Also known as: check

Main worker method to perform various checks on the URL / site



35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# File 'lib/wmap/url_checker.rb', line 35

def url_worker (url)
	puts "Checking out an unknown URL: #{url}" if @verbose
	url=url.strip.downcase
	raise "Invalid URL format: #{url}" unless is_url?(url)
	timestamp=Time.now
	host=url_2_host(url)
	ip=host_2_ip(host)
	port=url_2_port(url)
	code=10000
	if @url_code.key?(url)
		code=@url_code[url]
	else
		code=response_code(url)
	end
	if code>=300 && code < 400
		loc=landing_location(4,url)
	else
		loc=nil
	end
	if @url_finger_print.key?(url)
		fp=@url_finger_print[url]
	else
		fp=response_body_md5(url)
	end
	if @url_server.key?(url)
		server=@url_server[url]
	else
		server=get_server_header(url)
	end
	# save the data
	checker=Hash.new
	checker['ip']=ip
	checker['port']=port
	checker['url']=url
	checker['code']=code
	checker['redirection']=loc
	checker['md5']=fp
	checker['server']=server
	checker['timestamp']=timestamp
	if Wmap::CidrTracker.new(:data_dir=>@data_dir).ip_trusted?(ip)
		checker['status']="int_hosted"
	else
		checker['status']="ext_hosted"
	end
	return checker
rescue OpenSSL::SSL::SSLError => es  # handler to temporally hold the openssl bug in bay:  SSL_set_session: unable to find ssl method
	checker=Hash.new
	checker['ip']=ip
	checker['port']=port
	checker['url']=url
	checker['code']=20000
	checker['server']="Unknown SSL error: #{es}"
	checker['md']=nil
	checker['redirection']=nil
	checker['timestamp']=timestamp
	return checker
#rescue Exception => ee
#	puts "Exception on method #{__method__} for #{url}: #{ee}" # if @verbose
#	return nil
end

#url_workers(targets, num = @max_parallel) ⇒ Object Also known as: checks

Parallel scanner - by utilizing fork manager ‘parallel’ to spawn numbers of child processes on multiple urls simultaneously



98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# File 'lib/wmap/url_checker.rb', line 98

def url_workers (targets,num=@max_parallel)
	results=Array.new
	targets -= ["", nil]
	if targets.size > 0
		puts "Start the url checker on the targets:\n #{targets}"
		Parallel.map(targets, :in_processes => num) { |target|
			url_worker(target)
		}.each do |process|
			if process.nil?
				next
			elsif process.empty?
				#do nothing
			else
				results << process
			end
		end
	end
	return results
rescue Exception => ee
	puts "Exception on method #{__method__}: #{ee}" if @verbose
	return nil
end