Module: Twimage

Defined in:
lib/twimage.rb,
lib/twimage/image.rb,
lib/twimage/version.rb

Defined Under Namespace

Classes: Image, ImageNotFound, ImageURLInvalid, ServiceURLInvalid

Constant Summary collapse

USER_AGENT =
"Twimage #{Twimage::VERSION} http://github.com/cannikin/twimage"
SERVICES =
[{ :name => :twitpic,
:service_match => /twitpic\.com/,
:full_url_modifier => lambda { |url| url + '/full' },
:image_css_match => '#media-full img' },
              { :name => :yfrog,
:service_match => /yfrog\.com/,
:full_url_modifier => lambda { |url| url.gsub(/\.com/, '.com/z') },
:image_css_match => '#the-image img' },
              { :name => :instagram,
:service_match => [/instagr\.am/, /instagram\.com/],
:image_css_match => '.photo' },
              { :name => :twitter,
#:full_url_modifier => lambda { |url| url + '/large' },
:service_match => /twitter\.com/,
:image_regex_match => /"media_url_https":"(.*?)"/}]
VERSION =
"0.1.2"

Class Method Summary collapse

Class Method Details

.find_service(url) ⇒ Object

figure out which service this is by matching against regexes



45
46
47
48
49
50
51
52
53
# File 'lib/twimage.rb', line 45

def self.find_service(url)
  return SERVICES.find do |service|
    [service[:service_match]].flatten.find do |regex|
      puts url
      puts regex
      url.match(regex)
    end
  end
end

.get(url) ⇒ Object



33
34
35
36
37
38
39
40
41
# File 'lib/twimage.rb', line 33

def self.get(url)
  service_url = HTTParty.get(url, :headers => { 'User-Agent' => USER_AGENT }).request.path.to_s                                                                 # first point HTTParty at this URL and follow any redirects to get to the final page
  service = find_service(service_url)                                                                               # check the resulting service_url for which service we're hitting
  full_res_service_url = service[:full_url_modifier] ? service[:full_url_modifier].call(service_url) : service_url  # get the full res version of the service_url
  image_url = get_image_url(service, full_res_service_url)                                                          # get the URL to the image
  image = get_image(image_url)                                                                                      # get the image itself
  
  return Image.new(:service => service[:name], :service_url => service_url, :image_url => image_url, :image => image)
end

.get_image(url) ⇒ Object

download the actual image and put into a tempfile



86
87
88
89
90
91
92
93
94
# File 'lib/twimage.rb', line 86

def self.get_image(url)
  # get the image itself
  response = HTTParty.get(url, :headers => { 'User-Agent' => USER_AGENT })
  if response.code == 200
    return response.body.force_encoding('utf-8')
  else
    raise ImageURLInvalid, "The image_url #{url} was not found (returned a 404)"
  end
end

.get_image_url(service, url) ⇒ Object

tear apart the HTML on the returned service page and find the source of the image



57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# File 'lib/twimage.rb', line 57

def self.get_image_url(service, url)
  image_url = nil
  # get the content of the image page
  if service[:image_css_match]
    begin
      image_tag = Nokogiri::HTML(open(url, 'User-Agent' => USER_AGENT)).css(service[:image_css_match]).first
      image_url = image_tag['src']
    rescue OpenURI::HTTPError
      raise ServiceURLInvalid, "The service URL #{url} was not found (returned a 404)"
    end
    # get the URL to the actual image file
    
  elsif service[:image_regex_match]
    begin
      image_url = HTTParty.get(url, :headers => {'User-Agent' => USER_AGENT}).body.match(service[:image_regex_match])[1]
    rescue OpenURI::HTTPError
      raise ServiceURLInvalid, "The service URL #{url} was not found (returned a 404)"
    end
  end
  
  if image_url
    return image_url
  else
    raise ImageNotFound, "The service URL #{url} did not contain an identifiable image"
  end
end