Class: Retriever::FetchFiles

Inherits:
Fetch
  • Object
show all
Defined in:
lib/retriever/fetchfiles.rb

Overview

receives target url and RR options returns an array of all unique files (based on given filetype)

found on the target site

Constant Summary

Constants inherited from Fetch

Retriever::Fetch::HR

Instance Attribute Summary

Attributes inherited from Fetch

#max_pages, #result, #t

Instance Method Summary collapse

Methods inherited from Fetch

#dump, #errlog, #filter_out_querystrings, #good_response?, #lg, #start, #write

Constructor Details

#initialize(url, options) ⇒ FetchFiles

Returns a new instance of FetchFiles.



7
8
9
10
11
12
13
14
15
16
17
18
# File 'lib/retriever/fetchfiles.rb', line 7

def initialize(url, options)
  super
  start
  temp_file_collection = @page_one.parse_files(@page_one.parse_internal)
  @result.concat(temp_file_collection) if temp_file_collection.size > 0
  lg("#{@result.size} new files found")

  async_crawl_and_collect
  # done, make sure progress bar says we are done
  @progressbar.finish if @progress
  @result.sort_by! { |x| x.length }
end

Instance Method Details

#autodownloadObject



34
35
36
37
38
39
40
41
42
43
44
# File 'lib/retriever/fetchfiles.rb', line 34

def autodownload
  # go through the fetched file URL collection and download each one.
  puts HR
  puts '### Initiating Autodownload...'
  puts HR
  puts "#{@result.count} - #{@file_ext}'s Located"
  puts HR
  move_to_download_dir
  iterate_thru_collection_and_download
  Dir.chdir('..')
end

#download_file(path) ⇒ Object



20
21
22
23
24
25
26
27
28
29
30
31
32
# File 'lib/retriever/fetchfiles.rb', line 20

def download_file(path)
  path = filter_out_querystrings(path)
  # given valid url, downloads file to current directory in /rr-downloads/
  arr = path.split('/')
  shortname = arr.pop
  puts "Initiating Download of: #{shortname}"
  File.open(shortname, 'wb') do |saved_file|
    open(path) do |read_file|
      saved_file.write(read_file.read)
    end
  end
  puts '  SUCCESS: Download Complete'
end