Class: IDl
- Inherits:
-
Object
- Object
- IDl
- Defined in:
- lib/idl.rb
Instance Method Summary collapse
- #fetch(urls, target_dir = './') ⇒ Object
- #harvest(url, linked = false, target_dir = './') ⇒ Object
-
#initialize ⇒ IDl
constructor
A new instance of IDl.
- #unique_filepath(url, target_dir) ⇒ Object
Constructor Details
#initialize ⇒ IDl
Returns a new instance of IDl.
13 14 15 |
# File 'lib/idl.rb', line 13 def initialize @image_extensions = %w(jpg jpeg png gif tif tiff).collect {|ext| ".#{ext}"} end |
Instance Method Details
#fetch(urls, target_dir = './') ⇒ Object
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
# File 'lib/idl.rb', line 38 def fetch(urls, target_dir='./') EM.run do request_pool = EM::MultiRequest.new request_pool.callback do puts 'All requests finished.' EM.stop end urls.uniq.each do |url| puts "Enqueuing [#{url}]" request = EM::HttpRequest.new(url).get request.stream do |chunk| FileUtils.mkdir_p target_dir unless File.exists? target_dir request.file = File.open unique_filepath(url, target_dir), 'wb' unless request.file request.file.write chunk end request.callback do puts "Image [#{url}] was downloaded successfully." end request_pool.add request.object_id, request end end end |
#harvest(url, linked = false, target_dir = './') ⇒ Object
17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
# File 'lib/idl.rb', line 17 def harvest(url, linked=false, target_dir='./') doc = Nokogiri.parse open url if linked urls = [] doc.css('a[href]').each do |a| if path = URI(a['href']).path and path.downcase.end_with?(*@image_extensions) urls << URI(url).merge(a['href']).to_s end end else urls = doc.css('img[src]').collect do |img| URI(url).merge(img['src']).to_s end end if urls self.fetch urls, target_dir end end |
#unique_filepath(url, target_dir) ⇒ Object
66 67 68 69 70 71 72 73 74 75 76 77 78 |
# File 'lib/idl.rb', line 66 def unique_filepath(url, target_dir) filename = File.basename URI(url).path filepath = target_dir + filename ext = File.extname filename suffix = 1 while File.exists? filepath suffix = suffix + 1 filepath = target_dir + File.basename(filename, ext) + '-' + suffix.to_s + ext end return filepath end |