Class: Ficon
- Inherits:
-
Object
- Object
- Ficon
- Defined in:
- lib/ficon.rb,
lib/ficon/cache.rb,
lib/ficon/image.rb,
lib/ficon/version.rb
Defined Under Namespace
Constant Summary collapse
- ALIVE =
URL health status constants
'alive'- DEAD =
'dead'- SICK =
'sick'- BLOCKED =
'blocked'- VERSION =
"0.7"
Instance Attribute Summary collapse
-
#final_uri ⇒ Object
readonly
Returns the value of attribute final_uri.
-
#site ⇒ Object
readonly
Returns the value of attribute site.
-
#url_status ⇒ Object
readonly
Returns the value of attribute url_status.
-
#user_agent ⇒ Object
Returns the value of attribute user_agent.
Class Method Summary collapse
- .clear_cache ⇒ Object
- .normalise(base, candidate) ⇒ Object
- .page_images(uri, doc) ⇒ Object
- .site_images(uri, doc) ⇒ Object
Instance Method Summary collapse
- #classify_exception_status(exception) ⇒ Object
- #classify_response_status(response) ⇒ Object
- #description ⇒ Object
- #doc ⇒ Object
-
#initialize(uri, user_agent: nil) ⇒ Ficon
constructor
A new instance of Ficon.
- #other_page_data(document) ⇒ Object
- #page_images ⇒ Object
- #process ⇒ Object
- #report ⇒ Object
- #site_icons ⇒ Object
- #title ⇒ Object
Constructor Details
#initialize(uri, user_agent: nil) ⇒ Ficon
Returns a new instance of Ficon.
21 22 23 24 25 26 27 28 |
# File 'lib/ficon.rb', line 21 def initialize(uri, user_agent: nil) @uri = Addressable::URI.heuristic_parse(uri) @final_uri = @uri @site = {} @url_status = nil @user_agent = user_agent || "FiconBot/#{VERSION} (Ruby icon finder; https://github.com/dkam/ficon)" process end |
Instance Attribute Details
#final_uri ⇒ Object (readonly)
Returns the value of attribute final_uri.
12 13 14 |
# File 'lib/ficon.rb', line 12 def final_uri @final_uri end |
#site ⇒ Object (readonly)
Returns the value of attribute site.
12 13 14 |
# File 'lib/ficon.rb', line 12 def site @site end |
#url_status ⇒ Object (readonly)
Returns the value of attribute url_status.
12 13 14 |
# File 'lib/ficon.rb', line 12 def url_status @url_status end |
#user_agent ⇒ Object
Returns the value of attribute user_agent.
13 14 15 |
# File 'lib/ficon.rb', line 13 def user_agent @user_agent end |
Class Method Details
.clear_cache ⇒ Object
98 99 100 |
# File 'lib/ficon.rb', line 98 def self.clear_cache Cache.clear_cache end |
.normalise(base, candidate) ⇒ Object
141 142 143 144 145 146 147 148 149 |
# File 'lib/ficon.rb', line 141 def self.normalise(base, candidate) parsed_candidate = URI(candidate) base = URI(base) unless base.is_a? URI parsed_candidate.host = base.host if parsed_candidate.host.nil? # Set relative URLs to absolute parsed_candidate.scheme = base.scheme if parsed_candidate.scheme.nil? # Set the schema if missing parsed_candidate.to_s end |
.page_images(uri, doc) ⇒ Object
132 133 134 135 136 137 138 139 |
# File 'lib/ficon.rb', line 132 def self.page_images(uri, doc) doc.xpath("//meta[@property='og:image']") .collect { |e| e.values.reject(&:empty?) }.flatten .collect { |v| v[/^http/] || v[%r{^/}] ? v : '/' + v }.collect do |result| normalise(uri, result) end.uniq.collect { |i| Image.new(i) }.sort_by(&:area).reverse end |
.site_images(uri, doc) ⇒ Object
110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
# File 'lib/ficon.rb', line 110 def self.site_images(uri, doc) results = [] # Get tile color for Windows tiles tile_color = doc.at_xpath("//meta[@name='msapplication-TileColor']/@content")&.value paths = "//meta[@name='msapplication-TileImage']|//link[@type='image/ico' or @type='image/vnd.microsoft.icon']|//link[@rel='icon' or @rel='shortcut icon' or @rel='apple-touch-icon-precomposed' or @rel='apple-touch-icon']" results += doc.xpath(paths).collect do |e| e.values.select do |v| v =~ /\.png$|\.jpg$|\.gif$|\.ico$|\.svg$|\.ico\?\d*$/ end end.flatten.collect { |v| v[/^http/] || v[%r{^/}] ? v : '/' + v } results.collect { |result| normalise(uri, result) }.uniq.collect do |url| # Check if this is a tile image to pass the color is_tile = doc.at_xpath("//meta[@name='msapplication-TileImage' and @content='#{url}' or @content='#{url.sub( uri.to_s, '' )}']") Image.new(url, is_tile ? tile_color : nil) end.sort_by(&:area).reverse end |
Instance Method Details
#classify_exception_status(exception) ⇒ Object
166 167 168 169 170 171 172 173 174 175 176 177 |
# File 'lib/ficon.rb', line 166 def classify_exception_status(exception) case exception when SocketError, Resolv::ResolvError DEAD # DNS resolution failures when Net::HTTPError, Timeout::Error, Errno::ECONNREFUSED SICK # Network issues worth retrying when OpenSSL::SSL::SSLError SICK # SSL certificate errors else SICK # Default to retryable for unknown errors end end |
#classify_response_status(response) ⇒ Object
151 152 153 154 155 156 157 158 159 160 161 162 163 164 |
# File 'lib/ficon.rb', line 151 def classify_response_status(response) case response.code.to_i when 200..299 ALIVE when 404, 410 DEAD when 401, 403, 429 BLOCKED when 500..599 SICK else SICK end end |
#description ⇒ Object
96 |
# File 'lib/ficon.rb', line 96 def description = @site[:description] |
#doc ⇒ Object
30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
# File 'lib/ficon.rb', line 30 def doc # First try to fetch to determine final URL response = fetch_url(@uri) unless @data return nil if response.nil? && @data.nil? # Use final URL for caching cache = Cache.new(@final_uri) @data ||= cache.data if @data.nil? && response @data = response.body.force_encoding('UTF-8') cache.data = @data cache.etag = response['etag'] if response['etag'] cache.not_before = response['last-modified'] if response['last-modified'] end @doc ||= Nokogiri::HTML(@data) @doc rescue Net::HTTPError, SocketError => e puts "HTTP Error: #{e.inspect}" nil rescue TypeError => e if /^http/.match?(@uri.to_s) puts "#{e.inspect}" puts "#{e.backtrace.join('\n')}" else puts 'Please prepend http:// or https:// to the URL' end nil rescue RuntimeError => e puts "#{e.}" nil end |
#other_page_data(document) ⇒ Object
102 103 104 105 106 107 108 |
# File 'lib/ficon.rb', line 102 def other_page_data(document) @site[:title] = document.at_xpath("//meta[@property='og:title']/@content")&.value || document.at_xpath('//title')&.text&.strip @site[:description] = document.at_xpath("//meta[@property='og:description']/@content")&.value canonical = document.at_xpath("//link[@rel='canonical']/@href")&.value @site[:canonical] = canonical unless canonical == @uri.to_s end |
#page_images ⇒ Object
92 |
# File 'lib/ficon.rb', line 92 def page_images = @site[:page_images] || [] |
#process ⇒ Object
65 66 67 68 69 70 71 72 73 74 75 76 |
# File 'lib/ficon.rb', line 65 def process document = doc if document @site[:images] = self.class.site_images(@uri, document) || [] @site[:page_images] = self.class.page_images(@uri, document) || [] other_page_data(document) else @site[:images] = [] @site[:page_images] = [] end nil end |
#report ⇒ Object
78 79 80 81 82 83 84 85 86 87 88 |
# File 'lib/ficon.rb', line 78 def report report_lines = [] report_lines << "Site icon: #{@site[:images].first}" report_lines << "Page icon: #{@site[:page_images].first}" report_lines << "Page title: #{@site[:title]}" report_lines << "Page description: #{@site[:description]}" report_lines << "Final URL: #{@final_uri}" if @final_uri.to_s != @uri.to_s report_lines << "Canonical URL: #{@site[:canonical]}" if @site[:canonical] report_lines << "URL Status: #{@url_status}" if @url_status report_lines.join("\n") + "\n" end |
#site_icons ⇒ Object
90 |
# File 'lib/ficon.rb', line 90 def site_icons = @site[:images] || [] |
#title ⇒ Object
94 |
# File 'lib/ficon.rb', line 94 def title = @site[:title] |