Class: LinkChecker
- Inherits:
-
Object
show all
- Defined in:
- lib/link_checker.rb
Defined Under Namespace
Classes: Error, Good, Redirect, Result
Class Method Summary
collapse
Instance Method Summary
collapse
Constructor Details
Returns a new instance of LinkChecker.
11
12
13
14
15
|
# File 'lib/link_checker.rb', line 11
def initialize(params)
@options = params[:options] || {}
@target = params[:target] || './'
@return_code = 0
end
|
Class Method Details
.check_uri(uri, redirected = false) ⇒ Object
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
|
# File 'lib/link_checker.rb', line 30
def self.check_uri(uri, redirected=false)
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true if uri.scheme == "https"
http.start do
path = (uri.path.empty?) ? '/' : uri.path
http.request_get(path) do |response|
case response
when Net::HTTPSuccess then
if redirected
return Redirect.new(:final_destination_uri_string => uri.to_s)
else
return Good.new(:uri_string => uri.to_s)
end
when Net::HTTPRedirection then
return self.check_uri(URI(response['location']), true)
else
return Error.new(:uri_string => uri.to_s, :response => response)
end
end
end
end
|
.external_link_uri_strings(source) ⇒ Object
23
24
25
26
27
28
|
# File 'lib/link_checker.rb', line 23
def self.external_link_uri_strings(source)
Nokogiri::HTML(source).css('a').select {|link|
!link.attribute('href').nil? &&
link.attribute('href').value =~ /^https?\:\/\//
}.map{|link| link.attributes['href'].value}
end
|
Instance Method Details
#check_uris ⇒ Object
52
53
54
55
56
57
58
59
60
61
62
63
|
# File 'lib/link_checker.rb', line 52
def check_uris
begin
if @target =~ /^https?\:\/\//
check_uris_by_crawling
else
check_uris_in_files
end
rescue => error
puts "Error: #{error.to_s}".red
end
@return_code
end
|
#check_uris_by_crawling ⇒ Object
65
66
67
68
69
70
71
72
73
74
75
|
# File 'lib/link_checker.rb', line 65
def check_uris_by_crawling
threads = []
Anemone.crawl(@target) do |anemone|
anemone.storage = Anemone::Storage.PStore('link-checker-crawled-pages.pstore')
anemone.on_every_page do |crawled_page|
raise StandardError.new(crawled_page.error) if crawled_page.error
threads << start_link_check_thread(crawled_page.body, crawled_page.url.to_s)
end
end
threads.each{|thread| thread.join }
end
|
#check_uris_in_files ⇒ Object
77
78
79
80
81
82
83
|
# File 'lib/link_checker.rb', line 77
def check_uris_in_files
threads = []
html_file_paths.each do |file|
threads << start_link_check_thread(open(file), file)
end
threads.each{|thread| thread.join }
end
|
#html_file_paths ⇒ Object
17
18
19
20
21
|
# File 'lib/link_checker.rb', line 17
def html_file_paths
Find.find(@target).map {|path|
FileTest.file?(path) && (path =~ /\.html?$/) ? path : nil
}.reject{|path| path.nil?}
end
|
#report_results(file, results) ⇒ Object
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
|
# File 'lib/link_checker.rb', line 101
def report_results(file, results)
errors = results.select{|result| result.class.eql? Error}
warnings = results.select{|result| result.class.eql? Redirect}
@return_code = 1 unless errors.empty?
if @options[:warnings_are_errors]
@return_code = 1 unless warnings.empty?
errors = errors + warnings
warnings = []
end
Thread.exclusive do
if errors.empty?
message = "Checked: #{file}"
if warnings.empty? || @options[:no_warnings]
puts message.green
else
puts message.yellow
end
unless @options[:no_warnings]
warnings.each do |warning|
puts " Warning: #{warning.uri_string}".yellow
puts " Redirected to: #{warning.final_destination_uri_string}".yellow
end
end
else
puts "Problem: #{file}".red
errors.each do |error|
puts " Link: #{error.uri_string}".red
case error
when Redirect
puts " Redirected to: #{error.final_destination_uri_string}".red
when Error
puts " Response: #{error.error.to_s}".red
end
end
end
end
end
|
#start_link_check_thread(source, source_name) ⇒ Object
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
|
# File 'lib/link_checker.rb', line 85
def start_link_check_thread(source, source_name)
Thread.new do
results = self.class.external_link_uri_strings(source).map do |uri_string|
begin
uri = URI(uri_string)
response = self.class.check_uri(uri)
response.uri_string = uri_string
response
rescue => error
Error.new(:error => error.to_s, :uri_string => uri_string)
end
end
report_results(source_name, results)
end
end
|