Class: RWGet::Controller
- Inherits:
-
Object
- Object
- RWGet::Controller
- Defined in:
- lib/rwget/controller.rb
Instance Attribute Summary collapse
-
#options ⇒ Object
readonly
Returns the value of attribute options.
Class Method Summary collapse
Instance Method Summary collapse
- #close ⇒ Object
-
#initialize(options) ⇒ Controller
constructor
A new instance of Controller.
- #key_for(uri) ⇒ Object
- #legal?(link) ⇒ Boolean
- #start ⇒ Object
Constructor Details
#initialize(options) ⇒ Controller
Returns a new instance of Controller.
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
# File 'lib/rwget/controller.rb', line 11 def initialize() @options = @options[:user_agent] ||= "Ruby/Wget" @options[:accept_patterns] ||= [] @options[:reject_patterns] ||= [] %w[quota depth wait limit_rate time_limit].each do |key| key = key.to_sym @options[key] = @options[key].to_i end @queue = ([:queue_class] ? self.class.resolve_class([:queue_class]) : RWGet::Queue).new() @fetch = ([:fetch_class] ? self.class.resolve_class([:fetch_class]) : RWGet::Fetch).new() @store = ([:store_class] ? self.class.resolve_class([:store_class]) : RWGet::Store).new() @links = ([:links_class] ? self.class.resolve_class([:links_class]) : RWGet::Links).new() @dupes = ([:dupes_class] ? self.class.resolve_class([:dupes_class]) : RWGet::Dupes).new() end |
Instance Attribute Details
#options ⇒ Object (readonly)
Returns the value of attribute options.
3 4 5 |
# File 'lib/rwget/controller.rb', line 3 def @options end |
Class Method Details
.resolve_class(string) ⇒ Object
5 6 7 8 9 |
# File 'lib/rwget/controller.rb', line 5 def self.resolve_class(string) string.split("::").inject(Kernel) do |const, string| const.const_get(string) end end |
Instance Method Details
#close ⇒ Object
116 117 118 119 120 |
# File 'lib/rwget/controller.rb', line 116 def close [@queue, @fetch, @store, @links, @dupes].each do |obj| obj.close if obj.respond_to?(:close) end end |
#key_for(uri) ⇒ Object
104 105 106 107 108 109 110 111 112 113 114 |
# File 'lib/rwget/controller.rb', line 104 def key_for(uri) arr = [] arr << [:prefix] if [:prefix] arr << @start_time if [:timestampize] arr << uri.scheme if [:protocol_directories] arr << uri.host unless [:no_host_directories] paths = uri.path.split("/") paths << paths.pop + "?" + uri.query if uri.query paths.shift if paths.first.to_s.empty? File.join(arr + paths) end |
#legal?(link) ⇒ Boolean
89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
# File 'lib/rwget/controller.rb', line 89 def legal?(link) unless [:span_hosts] || @original_hosts.include?(link.host) puts "can't span hosts: #{link}" return false end link = link.to_s legal = [:accept_patterns].empty? puts "accepted by default: #{link}" if legal legal ||= [:accept_patterns].any?{|p| link =~ p} puts "not in accept patterns: #{link}" if !legal rejected = [:reject_patterns].any?{|p| link =~ p} puts "in reject patterns: #{link}" if rejected legal && !rejected end |
#start ⇒ Object
30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
# File 'lib/rwget/controller.rb', line 30 def start @start_time = Time.now.to_i.to_s @start = Time.now @original_hosts = Set.new [:seeds].each do |seed| @queue.put(seed, 0) @original_hosts << URI.parse(seed).host end downloaded = 0 while ([:quota] == 0 || downloaded < [:quota]) && ([:time_limit] == 0 || Time.now - @start < [:time_limit]) url, depth = @queue.get unless url puts "no more urls" return end if [:depth] > 0 && depth > [:depth] next end uri = URI.parse(url) while [:limit_rate] > 0 && downloaded / (Time.now - @start) > [:limit_rate] puts "sleeping until under rate limit" sleep 1 end puts "download rate: #{downloaded / (Time.now - @start)}bps" puts "downloading #{uri}" effective_url, tmpfile = @fetch.fetch(uri, [:user_agent]) if tmpfile downloaded += File.size(tmpfile.path) puts "parsing links" @links.urls(effective_url, tmpfile).each do |link| legal = legal?(link) dupe = @dupes.dupe?(link) puts "dupe: #{link}" if dupe if legal && !dupe puts "adding link: #{link}" @queue.put(link, depth + 1) end end key = key_for(uri) puts "storing at #{key}" @store.put(key, tmpfile) sleep [:wait] tmpfile.close rescue nil else puts "unable to download" end end puts "hit time/quota" end |