Class: ImageDownloader::Images

Inherits:

Object

Object
ImageDownloader::Images

show all

Defined in:: lib/image_downloader/images.rb

Constant Summary collapse

MAX_FILE_NAME_LENGTH_ALLOWED =

IMAGE_EXTENSIONS =

["jpg","jpeg","png","gif","ico","svg","bmp"]

EMPTY_FILE_NAME =

'EMPTY_'

Instance Attribute Summary collapse

#absolute_src ⇒ Object

Returns the value of attribute absolute_src.
#file_name ⇒ Object

Returns the value of attribute file_name.
#file_path_name ⇒ Object

Returns the value of attribute file_path_name.
#page_host ⇒ Object

Returns the value of attribute page_host.
#src ⇒ Object

Returns the value of attribute src.

Instance Method Summary collapse

#download(user_agent) ⇒ Object
#download_by_segment(http, request, user_agent) ⇒ Object
#download_simple(http, request, user_agent) ⇒ Object
#initialize(page_host, src, h = {}) ⇒ Images constructor

A new instance of Images.

Constructor Details

#initialize(page_host, src, h = {}) ⇒ `Images`

Returns a new instance of Images.

# File 'lib/image_downloader/images.rb', line 9

def initialize(page_host,src,h = {})
  @page_host = page_host
  @src = src

  # for fix Errno::ENAMETOOLONG & empty file name
  file_name_suffix = @src.sub(/.*\//,'')
  file_name_suffix = EMPTY_FILE_NAME + rand(100000).to_s if !file_name_suffix || file_name_suffix.empty?
  if file_name_suffix.size > MAX_FILE_NAME_LENGTH_ALLOWED
    file_name_suffix = file_name_suffix[-MAX_FILE_NAME_LENGTH_ALLOWED..file_name_suffix.size]
  end

  @file_name = h[:file_name_prefix] + file_name_suffix
  @file_path_name = h[:catalog_path] + '/' + @file_name
  @absolute_src = ((@src =~ /http/) ? @src : ('http://' + page_host + '/' +  @src.sub(/^\/+/,'')))
end

Instance Attribute Details

#absolute_src ⇒ `Object`

Returns the value of attribute absolute_src.



3
4
5

# File 'lib/image_downloader/images.rb', line 3

def absolute_src
  @absolute_src
end

#file_name ⇒ `Object`

Returns the value of attribute file_name.



3
4
5

# File 'lib/image_downloader/images.rb', line 3

def file_name
  @file_name
end

#file_path_name ⇒ `Object`

Returns the value of attribute file_path_name.



3
4
5

# File 'lib/image_downloader/images.rb', line 3

def file_path_name
  @file_path_name
end

#page_host ⇒ `Object`

Returns the value of attribute page_host.



3
4
5

# File 'lib/image_downloader/images.rb', line 3

def page_host
  @page_host
end

#src ⇒ `Object`

Returns the value of attribute src.



3
4
5

# File 'lib/image_downloader/images.rb', line 3

def src
  @src
end

Instance Method Details

#download(user_agent) ⇒ `Object`

# File 'lib/image_downloader/images.rb', line 25

def download(user_agent)
  url = URI.parse(self.absolute_src)
  request = Net::HTTP::Get.new(url.path)
  Net::HTTP.start(url.host) {|http|
    # for exclude 403 and 404 errors from web servers (e.g. detect current client as script)
    # you can use:
    # - watir (with js support and other ...), but vary vary slow
    # - mechanize (main web client), slow
    # - wget, quick, but cannot support some ability (403, 404 responses)
    # - sockets, independent request, quick, but low-level (many lines of code)
    self.download_by_segment(http,request,user_agent)
    # self.download_simple(http,request,user_agent)
  }
rescue URI::InvalidURIError
  p "Error: bad URI: #{self.absolute_src}"  if $debug_option
end

#download_by_segment(http, request, user_agent) ⇒ `Object`

# File 'lib/image_downloader/images.rb', line 42

def download_by_segment(http,request,user_agent)
  file = open(self.file_path_name, "wb")
  begin
    http.request_get(request.path, "User-Agent"=> user_agent) do |response|
      response.read_body do |segment|
        file.write(segment)
      end
    end
  ensure
    file.close()
  end
end

#download_simple(http, request, user_agent) ⇒ `Object`

# File 'lib/image_downloader/images.rb', line 55

def download_simple(http,request,user_agent)
  response = http.get(request.path, "User-Agent"=> user_agent)
  open(self.file_path_name, "wb") { |file|
    file.write(response.body)
  }
end

Class: ImageDownloader::Images

Constant Summary collapse

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(page_host, src, h = {}) ⇒ Images

Instance Attribute Details

#absolute_src ⇒ Object

#file_name ⇒ Object

#file_path_name ⇒ Object

#page_host ⇒ Object

#src ⇒ Object