Class: Utterson::HtmlCheck
- Inherits:
-
Object
- Object
- Utterson::HtmlCheck
- Defined in:
- lib/utterson/html_check.rb
Overview
Handle collecting URIs from HTML documents and both remote and local checking of them.
Constant Summary collapse
- @@semaphore =
Mutex.new
- @@checked_urls =
{}
Instance Attribute Summary collapse
-
#errors ⇒ Object
readonly
Returns the value of attribute errors.
Instance Method Summary collapse
- #add_error(file, url, response) ⇒ Object
- #check_local_uri(url, file) ⇒ Object
- #check_remote_uri(url, file) ⇒ Object
- #check_uri(url, file) ⇒ Object
- #collect_uris_from(f) ⇒ Object
-
#initialize(opts = {}) ⇒ HtmlCheck
constructor
A new instance of HtmlCheck.
- #run ⇒ Object
- #when_done(&handler) ⇒ Object
Constructor Details
#initialize(opts = {}) ⇒ HtmlCheck
Returns a new instance of HtmlCheck.
16 17 18 19 20 |
# File 'lib/utterson/html_check.rb', line 16 def initialize(opts={}) @file = opts[:file] @root = opts[:root] @errors = {} end |
Instance Attribute Details
#errors ⇒ Object (readonly)
Returns the value of attribute errors.
11 12 13 |
# File 'lib/utterson/html_check.rb', line 11 def errors @errors end |
Instance Method Details
#add_error(file, url, response) ⇒ Object
90 91 92 93 |
# File 'lib/utterson/html_check.rb', line 90 def add_error(file, url, response) @errors[file] = {} if @errors[file].nil? @errors[file][url] = response end |
#check_local_uri(url, file) ⇒ Object
80 81 82 83 84 85 86 87 88 |
# File 'lib/utterson/html_check.rb', line 80 def check_local_uri(url, file) url.gsub!(/\?.*$/, '') if url =~ /^\// path = File.(".#{url}", @root) else path = File.(url, File.dirname(file)) end add_error(file, url, "File not found") unless File.exist? path end |
#check_remote_uri(url, file) ⇒ Object
65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
# File 'lib/utterson/html_check.rb', line 65 def check_remote_uri(url, file) uri = URI(url.gsub(/^\/\//, 'http://')) response = Net::HTTP.start(uri.host, uri.port, :use_ssl => uri.scheme == 'https') do |http| http.head uri.path.empty? ? "/" : uri.path end if response.code =~ /^[^23]/ add_error(file, uri.to_s, response) end rescue => e add_error(file, uri.to_s, e.) end |
#check_uri(url, file) ⇒ Object
49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
# File 'lib/utterson/html_check.rb', line 49 def check_uri(url, file) @@semaphore.synchronize do if @@checked_urls[url] return else @@checked_urls[url] = true end end if url =~ /^(https?:)?\/\// check_remote_uri url, file else check_local_uri url, file end end |
#collect_uris_from(f) ⇒ Object
39 40 41 42 43 44 45 46 47 |
# File 'lib/utterson/html_check.rb', line 39 def collect_uris_from(f) ret = [] doc = Nokogiri::HTML(File.read(f)) doc.traverse do |el| ret << el['src'] unless el['src'].nil? ret << el['href'] unless el['href'].nil? end ret end |
#run ⇒ Object
26 27 28 29 30 31 32 33 34 35 36 37 |
# File 'lib/utterson/html_check.rb', line 26 def run Thread.new do collect_uris_from(@file).each do |u| check_uri(u, @file) end unless @result_handler.nil? @@semaphore.synchronize do @result_handler.call(errors: @errors, urls: @@checked_urls.count) end end end end |
#when_done(&handler) ⇒ Object
22 23 24 |
# File 'lib/utterson/html_check.rb', line 22 def when_done(&handler) @result_handler = handler end |