Class: LinkChecker

Inherits:
Object
  • Object
show all
Defined in:
lib/link_checker.rb

Defined Under Namespace

Classes: Error, Good, Redirect, Result

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(params) ⇒ LinkChecker

Returns a new instance of LinkChecker.



11
12
13
14
15
# File 'lib/link_checker.rb', line 11

def initialize(params)
  @options = params[:options] || {}
  @target =  params[:target] || './'
  @return_code = 0
end

Class Method Details

.check_uri(uri, redirected = false) ⇒ Object



30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# File 'lib/link_checker.rb', line 30

def self.check_uri(uri, redirected=false)
  http = Net::HTTP.new(uri.host, uri.port)
  http.use_ssl = true if uri.scheme == "https"
  http.start do
    path = (uri.path.empty?) ? '/' : uri.path
    http.request_get(path) do |response|
      case response
      when Net::HTTPSuccess then
        if redirected
          return Redirect.new(:final_destination_uri_string => uri.to_s)
        else
          return Good.new(:uri_string => uri.to_s)
        end
      when Net::HTTPRedirection then
        return self.check_uri(URI(response['location']), true)
      else
        return Error.new(:uri_string => uri.to_s, :response => response)
      end
    end
  end
end


23
24
25
26
27
28
# File 'lib/link_checker.rb', line 23

def self.external_link_uri_strings(source)
  Nokogiri::HTML(source).css('a').select {|link|
      !link.attribute('href').nil? &&
      link.attribute('href').value =~ /^https?\:\/\//
  }.map{|link| link.attributes['href'].value}
end

Instance Method Details

#check_urisObject



52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/link_checker.rb', line 52

def check_uris
  begin
    if @target =~ /^https?\:\/\//
      check_uris_by_crawling
    else
      check_uris_in_files
    end
  rescue => error
    puts "Error: #{error.to_s}".red
  end
  @return_code
end

#check_uris_by_crawlingObject



65
66
67
68
69
70
71
72
73
74
75
# File 'lib/link_checker.rb', line 65

def check_uris_by_crawling
  threads = []
  Anemone.crawl(@target) do |anemone|
    anemone.storage = Anemone::Storage.PStore('link-checker-crawled-pages.pstore')
    anemone.on_every_page do |crawled_page|
      raise StandardError.new(crawled_page.error) if crawled_page.error
      threads << start_link_check_thread(crawled_page.body, crawled_page.url.to_s)
    end
  end
  threads.each{|thread| thread.join }
end

#check_uris_in_filesObject



77
78
79
80
81
82
83
# File 'lib/link_checker.rb', line 77

def check_uris_in_files
  threads = []
  html_file_paths.each do |file|
    threads << start_link_check_thread(open(file), file)
  end
  threads.each{|thread| thread.join }
end

#html_file_pathsObject



17
18
19
20
21
# File 'lib/link_checker.rb', line 17

def html_file_paths
  Find.find(@target).map {|path|
    FileTest.file?(path) && (path =~ /\.html?$/) ? path : nil
  }.reject{|path| path.nil?}
end

#report_results(file, results) ⇒ Object



101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# File 'lib/link_checker.rb', line 101

def report_results(file, results)
  errors = results.select{|result| result.class.eql? Error}
  warnings = results.select{|result| result.class.eql? Redirect}
  @return_code = 1 unless errors.empty?
  if @options[:warnings_are_errors]
    @return_code = 1 unless warnings.empty?
    errors = errors + warnings
    warnings = []
  end
  Thread.exclusive do
    if errors.empty?
      message = "Checked: #{file}"
      if warnings.empty? || @options[:no_warnings]
        puts message.green
      else
        puts message.yellow
      end
      unless @options[:no_warnings]
        warnings.each do |warning|
          puts "   Warning: #{warning.uri_string}".yellow
          puts "     Redirected to: #{warning.final_destination_uri_string}".yellow
        end
      end
    else
      puts "Problem: #{file}".red
      errors.each do |error|
        puts "   Link: #{error.uri_string}".red
        case error
        when Redirect
          puts "     Redirected to: #{error.final_destination_uri_string}".red
        when Error
          puts "     Response: #{error.error.to_s}".red
        end
      end
    end
  end
end


85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# File 'lib/link_checker.rb', line 85

def start_link_check_thread(source, source_name)
  Thread.new do
    results = self.class.external_link_uri_strings(source).map do |uri_string|
      begin
        uri = URI(uri_string)
        response = self.class.check_uri(uri)
        response.uri_string = uri_string
        response
      rescue => error
        Error.new(:error => error.to_s, :uri_string => uri_string)
      end
    end
    report_results(source_name, results)
  end
end