Class: Ficon

Inherits:
Object
  • Object
show all
Defined in:
lib/ficon.rb,
lib/ficon/cache.rb,
lib/ficon/image.rb,
lib/ficon/version.rb

Defined Under Namespace

Classes: Cache, Image

Constant Summary collapse

ALIVE =

URL health status constants

'alive'
DEAD =
'dead'
SICK =
'sick'
BLOCKED =
'blocked'
VERSION =
"0.7"

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(uri, user_agent: nil) ⇒ Ficon

Returns a new instance of Ficon.



21
22
23
24
25
26
27
28
# File 'lib/ficon.rb', line 21

def initialize(uri, user_agent: nil)
  @uri = Addressable::URI.heuristic_parse(uri)
  @final_uri = @uri
  @site = {}
  @url_status = nil
  @user_agent = user_agent || "FiconBot/#{VERSION} (Ruby icon finder; https://github.com/dkam/ficon)"
  process
end

Instance Attribute Details

#final_uriObject (readonly)

Returns the value of attribute final_uri.



12
13
14
# File 'lib/ficon.rb', line 12

def final_uri
  @final_uri
end

#siteObject (readonly)

Returns the value of attribute site.



12
13
14
# File 'lib/ficon.rb', line 12

def site
  @site
end

#url_statusObject (readonly)

Returns the value of attribute url_status.



12
13
14
# File 'lib/ficon.rb', line 12

def url_status
  @url_status
end

#user_agentObject

Returns the value of attribute user_agent.



13
14
15
# File 'lib/ficon.rb', line 13

def user_agent
  @user_agent
end

Class Method Details

.clear_cacheObject



98
99
100
# File 'lib/ficon.rb', line 98

def self.clear_cache
  Cache.clear_cache
end

.normalise(base, candidate) ⇒ Object



141
142
143
144
145
146
147
148
149
# File 'lib/ficon.rb', line 141

def self.normalise(base, candidate)
  parsed_candidate = URI(candidate)
  base = URI(base) unless base.is_a? URI

  parsed_candidate.host = base.host if parsed_candidate.host.nil? # Set relative URLs to absolute
  parsed_candidate.scheme = base.scheme if parsed_candidate.scheme.nil? # Set the schema if missing

  parsed_candidate.to_s
end

.page_images(uri, doc) ⇒ Object



132
133
134
135
136
137
138
139
# File 'lib/ficon.rb', line 132

def self.page_images(uri, doc)
  doc.xpath("//meta[@property='og:image']")
     .collect { |e| e.values.reject(&:empty?) }.flatten
     .collect { |v| v[/^http/] || v[%r{^/}] ? v : '/' + v }.collect do |result|
    normalise(uri,
              result)
  end.uniq.collect { |i| Image.new(i) }.sort_by(&:area).reverse
end

.site_images(uri, doc) ⇒ Object



110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# File 'lib/ficon.rb', line 110

def self.site_images(uri, doc)
  results = []

  # Get tile color for Windows tiles
  tile_color = doc.at_xpath("//meta[@name='msapplication-TileColor']/@content")&.value

  paths = "//meta[@name='msapplication-TileImage']|//link[@type='image/ico' or @type='image/vnd.microsoft.icon']|//link[@rel='icon' or @rel='shortcut icon' or @rel='apple-touch-icon-precomposed' or @rel='apple-touch-icon']"
  results += doc.xpath(paths).collect do |e|
    e.values.select do |v|
      v =~ /\.png$|\.jpg$|\.gif$|\.ico$|\.svg$|\.ico\?\d*$/
    end
  end.flatten.collect { |v| v[/^http/] || v[%r{^/}] ? v : '/' + v }

  results.collect { |result| normalise(uri, result) }.uniq.collect do |url|
    # Check if this is a tile image to pass the color
    is_tile = doc.at_xpath("//meta[@name='msapplication-TileImage' and @content='#{url}' or @content='#{url.sub(
      uri.to_s, ''
    )}']")
    Image.new(url, is_tile ? tile_color : nil)
  end.sort_by(&:area).reverse
end

Instance Method Details

#classify_exception_status(exception) ⇒ Object



166
167
168
169
170
171
172
173
174
175
176
177
# File 'lib/ficon.rb', line 166

def classify_exception_status(exception)
  case exception
  when SocketError, Resolv::ResolvError
    DEAD  # DNS resolution failures
  when Net::HTTPError, Timeout::Error, Errno::ECONNREFUSED
    SICK  # Network issues worth retrying
  when OpenSSL::SSL::SSLError
    SICK  # SSL certificate errors
  else
    SICK  # Default to retryable for unknown errors
  end
end

#classify_response_status(response) ⇒ Object



151
152
153
154
155
156
157
158
159
160
161
162
163
164
# File 'lib/ficon.rb', line 151

def classify_response_status(response)
  case response.code.to_i
  when 200..299
    ALIVE
  when 404, 410
    DEAD
  when 401, 403, 429
    BLOCKED
  when 500..599
    SICK
  else
    SICK
  end
end

#descriptionObject



96
# File 'lib/ficon.rb', line 96

def description = @site[:description]

#docObject



30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/ficon.rb', line 30

def doc
  # First try to fetch to determine final URL
  response = fetch_url(@uri) unless @data
  return nil if response.nil? && @data.nil?

  # Use final URL for caching
  cache = Cache.new(@final_uri)

  @data ||= cache.data

  if @data.nil? && response
    @data = response.body.force_encoding('UTF-8')
    cache.data = @data
    cache.etag = response['etag'] if response['etag']
    cache.not_before = response['last-modified'] if response['last-modified']
  end

  @doc ||= Nokogiri::HTML(@data)
  @doc
rescue Net::HTTPError, SocketError => e
  puts "HTTP Error: #{e.inspect}"
  nil
rescue TypeError => e
  if /^http/.match?(@uri.to_s)
    puts "#{e.inspect}"
    puts "#{e.backtrace.join('\n')}"
  else
    puts 'Please prepend http:// or https:// to the URL'
  end
  nil
rescue RuntimeError => e
  puts "#{e.message}"
  nil
end

#other_page_data(document) ⇒ Object



102
103
104
105
106
107
108
# File 'lib/ficon.rb', line 102

def other_page_data(document)
  @site[:title] =
    document.at_xpath("//meta[@property='og:title']/@content")&.value || document.at_xpath('//title')&.text&.strip
  @site[:description] = document.at_xpath("//meta[@property='og:description']/@content")&.value
  canonical = document.at_xpath("//link[@rel='canonical']/@href")&.value
  @site[:canonical] = canonical unless canonical == @uri.to_s
end

#page_imagesObject



92
# File 'lib/ficon.rb', line 92

def page_images = @site[:page_images] || []

#processObject



65
66
67
68
69
70
71
72
73
74
75
76
# File 'lib/ficon.rb', line 65

def process
  document = doc
  if document
    @site[:images] = self.class.site_images(@uri, document) || []
    @site[:page_images] = self.class.page_images(@uri, document) || []
    other_page_data(document)
  else
    @site[:images] = []
    @site[:page_images] = []
  end
  nil
end

#reportObject



78
79
80
81
82
83
84
85
86
87
88
# File 'lib/ficon.rb', line 78

def report
  report_lines = []
  report_lines << "Site icon: #{@site[:images].first}"
  report_lines << "Page icon: #{@site[:page_images].first}"
  report_lines << "Page title: #{@site[:title]}"
  report_lines << "Page description: #{@site[:description]}"
  report_lines << "Final URL: #{@final_uri}" if @final_uri.to_s != @uri.to_s
  report_lines << "Canonical URL: #{@site[:canonical]}" if @site[:canonical]
  report_lines << "URL Status: #{@url_status}" if @url_status
  report_lines.join("\n") + "\n"
end

#site_iconsObject



90
# File 'lib/ficon.rb', line 90

def site_icons = @site[:images] || []

#titleObject



94
# File 'lib/ficon.rb', line 94

def title = @site[:title]