Module: Twimage

Defined in:: lib/twimage.rb,
lib/twimage/image.rb,
lib/twimage/version.rb

Defined Under Namespace

Classes: Image, ImageNotFound, ImageURLInvalid, ServiceURLInvalid

Constant Summary collapse

USER_AGENT =

"Twimage #{Twimage::VERSION} http://github.com/cannikin/twimage"

SERVICES =

[{ :name => :twitpic,
:service_match => /twitpic\.com/,
:full_url_modifier => lambda { |url| url + '/full' },
:image_css_match => '#media-full img' },
              { :name => :yfrog,
:service_match => /yfrog\.com/,
:full_url_modifier => lambda { |url| url.gsub(/\.com/, '.com/z') },
:image_css_match => '#the-image img' },
              { :name => :instagram,
:service_match => [/instagr\.am/, /instagram\.com/],
:image_css_match => '.photo' },
              { :name => :twitter,
#:full_url_modifier => lambda { |url| url + '/large' },
:service_match => /twitter\.com/,
:image_regex_match => /"media_url_https":"(.*?)"/}]

VERSION =

"0.1.2"

Class Method Summary collapse

.find_service(url) ⇒ Object

figure out which service this is by matching against regexes.
.get(url) ⇒ Object
.get_image(url) ⇒ Object

download the actual image and put into a tempfile.
.get_image_url(service, url) ⇒ Object

tear apart the HTML on the returned service page and find the source of the image.

Class Method Details

.find_service(url) ⇒ `Object`

figure out which service this is by matching against regexes

# File 'lib/twimage.rb', line 45

def self.find_service(url)
  return SERVICES.find do |service|
    [service[:service_match]].flatten.find do |regex|
      puts url
      puts regex
      url.match(regex)
    end
  end
end

.get(url) ⇒ `Object`

# File 'lib/twimage.rb', line 33

def self.get(url)
  service_url = HTTParty.get(url, :headers => { 'User-Agent' => USER_AGENT }).request.path.to_s                                                                 # first point HTTParty at this URL and follow any redirects to get to the final page
  service = find_service(service_url)                                                                               # check the resulting service_url for which service we're hitting
  full_res_service_url = service[:full_url_modifier] ? service[:full_url_modifier].call(service_url) : service_url  # get the full res version of the service_url
  image_url = get_image_url(service, full_res_service_url)                                                          # get the URL to the image
  image = get_image(image_url)                                                                                      # get the image itself
  
  return Image.new(:service => service[:name], :service_url => service_url, :image_url => image_url, :image => image)
end

.get_image(url) ⇒ `Object`

download the actual image and put into a tempfile

# File 'lib/twimage.rb', line 86

def self.get_image(url)
  # get the image itself
  response = HTTParty.get(url, :headers => { 'User-Agent' => USER_AGENT })
  if response.code == 200
    return response.body.force_encoding('utf-8')
  else
    raise ImageURLInvalid, "The image_url #{url} was not found (returned a 404)"
  end
end

.get_image_url(service, url) ⇒ `Object`

tear apart the HTML on the returned service page and find the source of the image

# File 'lib/twimage.rb', line 57

def self.get_image_url(service, url)
  image_url = nil
  # get the content of the image page
  if service[:image_css_match]
    begin
      image_tag = Nokogiri::HTML(open(url, 'User-Agent' => USER_AGENT)).css(service[:image_css_match]).first
      image_url = image_tag['src']
    rescue OpenURI::HTTPError
      raise ServiceURLInvalid, "The service URL #{url} was not found (returned a 404)"
    end
    # get the URL to the actual image file
    
  elsif service[:image_regex_match]
    begin
      image_url = HTTParty.get(url, :headers => {'User-Agent' => USER_AGENT}).body.match(service[:image_regex_match])[1]
    rescue OpenURI::HTTPError
      raise ServiceURLInvalid, "The service URL #{url} was not found (returned a 404)"
    end
  end
  
  if image_url
    return image_url
  else
    raise ImageNotFound, "The service URL #{url} did not contain an identifiable image"
  end
end

Module: Twimage

Defined Under Namespace

Constant Summary collapse

Class Method Summary collapse

Class Method Details

.find_service(url) ⇒ Object

.get(url) ⇒ Object

.get_image(url) ⇒ Object

.get_image_url(service, url) ⇒ Object

.find_service(url) ⇒ `Object`

.get(url) ⇒ `Object`

.get_image(url) ⇒ `Object`

.get_image_url(service, url) ⇒ `Object`