Class: ImageDownloader::Images

Inherits:
Object
  • Object
show all
Defined in:
lib/image_downloader/images.rb

Constant Summary collapse

MAX_FILE_NAME_LENGTH_ALLOWED =
200
IMAGE_EXTENSIONS =
["jpg","jpeg","png","gif","ico","svg","bmp"]
EMPTY_FILE_NAME =
'EMPTY_'

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(page_host, src, h = {}) ⇒ Images

Returns a new instance of Images.



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# File 'lib/image_downloader/images.rb', line 9

def initialize(page_host,src,h = {})
  @page_host = page_host
  @src = src

  # for fix Errno::ENAMETOOLONG & empty file name
  file_name_suffix = @src.sub(/.*\//,'')
  file_name_suffix = EMPTY_FILE_NAME + rand(100000).to_s if !file_name_suffix || file_name_suffix.empty?
  if file_name_suffix.size > MAX_FILE_NAME_LENGTH_ALLOWED
    file_name_suffix = file_name_suffix[-MAX_FILE_NAME_LENGTH_ALLOWED..file_name_suffix.size]
  end

  @file_name = h[:file_name_prefix] + file_name_suffix
  @file_path_name = h[:catalog_path] + '/' + @file_name
  @absolute_src = ((@src =~ /http/) ? @src : ('http://' + page_host + '/' +  @src.sub(/^\/+/,'')))
end

Instance Attribute Details

#absolute_srcObject

Returns the value of attribute absolute_src.



3
4
5
# File 'lib/image_downloader/images.rb', line 3

def absolute_src
  @absolute_src
end

#file_nameObject

Returns the value of attribute file_name.



3
4
5
# File 'lib/image_downloader/images.rb', line 3

def file_name
  @file_name
end

#file_path_nameObject

Returns the value of attribute file_path_name.



3
4
5
# File 'lib/image_downloader/images.rb', line 3

def file_path_name
  @file_path_name
end

#page_hostObject

Returns the value of attribute page_host.



3
4
5
# File 'lib/image_downloader/images.rb', line 3

def page_host
  @page_host
end

#srcObject

Returns the value of attribute src.



3
4
5
# File 'lib/image_downloader/images.rb', line 3

def src
  @src
end

Instance Method Details

#download(user_agent) ⇒ Object



25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# File 'lib/image_downloader/images.rb', line 25

def download(user_agent)
  url = URI.parse(self.absolute_src)
  request = Net::HTTP::Get.new(url.path)
  Net::HTTP.start(url.host) {|http|
    # for exclude 403 and 404 errors from web servers (e.g. detect current client as script)
    # you can use:
    # - watir (with js support and other ...), but vary vary slow
    # - mechanize (main web client), slow
    # - wget, quick, but cannot support some ability (403, 404 responses)
    # - sockets, independent request, quick, but low-level (many lines of code)
    self.download_by_segment(http,request,user_agent)
    # self.download_simple(http,request,user_agent)
  }
rescue URI::InvalidURIError
  p "Error: bad URI: #{self.absolute_src}"  if $debug_option
end

#download_by_segment(http, request, user_agent) ⇒ Object



42
43
44
45
46
47
48
49
50
51
52
53
# File 'lib/image_downloader/images.rb', line 42

def download_by_segment(http,request,user_agent)
  file = open(self.file_path_name, "wb")
  begin
    http.request_get(request.path, "User-Agent"=> user_agent) do |response|
      response.read_body do |segment|
        file.write(segment)
      end
    end
  ensure
    file.close()
  end
end

#download_simple(http, request, user_agent) ⇒ Object



55
56
57
58
59
60
# File 'lib/image_downloader/images.rb', line 55

def download_simple(http,request,user_agent)
  response = http.get(request.path, "User-Agent"=> user_agent)
  open(self.file_path_name, "wb") { |file|
    file.write(response.body)
  }
end