Class: Utterson::HtmlCheck

Inherits:
Object
  • Object
show all
Defined in:
lib/utterson/html_check.rb

Overview

Handle collecting URIs from HTML documents and both remote and local checking of them.

Constant Summary collapse

@@semaphore =
Mutex.new
@@checked_urls =
{}

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(opts = {}) ⇒ HtmlCheck

Returns a new instance of HtmlCheck.



16
17
18
19
20
# File 'lib/utterson/html_check.rb', line 16

def initialize(opts={})
  @file = opts[:file]
  @root = opts[:root]
  @errors = {}
end

Instance Attribute Details

#errorsObject (readonly)

Returns the value of attribute errors.



11
12
13
# File 'lib/utterson/html_check.rb', line 11

def errors
  @errors
end

Instance Method Details

#add_error(file, url, response) ⇒ Object



90
91
92
93
# File 'lib/utterson/html_check.rb', line 90

def add_error(file, url, response)
  @errors[file] = {} if @errors[file].nil?
  @errors[file][url] = response
end

#check_local_uri(url, file) ⇒ Object



80
81
82
83
84
85
86
87
88
# File 'lib/utterson/html_check.rb', line 80

def check_local_uri(url, file)
  url.gsub!(/\?.*$/, '')
  if url =~ /^\//
    path = File.expand_path(".#{url}", @root)
  else
    path = File.expand_path(url, File.dirname(file))
  end
  add_error(file, url, "File not found") unless File.exist? path
end

#check_remote_uri(url, file) ⇒ Object



65
66
67
68
69
70
71
72
73
74
75
76
77
78
# File 'lib/utterson/html_check.rb', line 65

def check_remote_uri(url, file)
  uri = URI(url.gsub(/^\/\//, 'http://'))

  response = Net::HTTP.start(uri.host, uri.port,
                             :use_ssl => uri.scheme == 'https') do |http|
    http.head uri.path.empty? ? "/" : uri.path
  end
  if response.code =~ /^[^23]/
    add_error(file, uri.to_s, response)
  end

rescue => e
  add_error(file, uri.to_s, e.message)
end

#check_uri(url, file) ⇒ Object



49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/utterson/html_check.rb', line 49

def check_uri(url, file)
  @@semaphore.synchronize do
    if @@checked_urls[url]
      return
    else
      @@checked_urls[url] = true
    end
  end

  if url =~ /^(https?:)?\/\//
    check_remote_uri url, file
  else
    check_local_uri url, file
  end
end

#collect_uris_from(f) ⇒ Object



39
40
41
42
43
44
45
46
47
# File 'lib/utterson/html_check.rb', line 39

def collect_uris_from(f)
  ret = []
  doc = Nokogiri::HTML(File.read(f))
  doc.traverse do |el|
    ret << el['src'] unless el['src'].nil?
    ret << el['href'] unless el['href'].nil?
  end
  ret
end

#runObject



26
27
28
29
30
31
32
33
34
35
36
37
# File 'lib/utterson/html_check.rb', line 26

def run
  Thread.new do
    collect_uris_from(@file).each do |u|
      check_uri(u, @file)
    end
    unless @result_handler.nil?
      @@semaphore.synchronize do
        @result_handler.call(errors: @errors, urls: @@checked_urls.count)
      end
    end
  end
end

#when_done(&handler) ⇒ Object



22
23
24
# File 'lib/utterson/html_check.rb', line 22

def when_done(&handler)
  @result_handler = handler
end