Class: ArxivSync::Downloader
- Inherits:
-
Object
- Object
- ArxivSync::Downloader
- Defined in:
- lib/arxivsync/downloader.rb
Instance Method Summary collapse
-
#initialize(initial_params = {}) ⇒ Downloader
constructor
A new instance of Downloader.
- #make_request(params) ⇒ Object
- #retry_request ⇒ Object
- #start(&b) ⇒ Object
Constructor Details
#initialize(initial_params = {}) ⇒ Downloader
Returns a new instance of Downloader.
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 |
# File 'lib/arxivsync/downloader.rb', line 3 def initialize(initial_params={}) @initial_params = initial_params if @initial_params[:from] == Date.today puts "Last responseDate was today. arXiv lacks date granularity beyond the day level; please wait before continuing harvest.".light_yellow return false end unless @initial_params[:resumptionToken] @initial_params[:metadataPrefix] ||= 'arXivRaw' end @last_params = nil @oai = OAI::Client.new('http://export.arxiv.org/oai2') end |
Instance Method Details
#make_request(params) ⇒ Object
49 50 51 52 53 54 55 56 57 58 59 60 61 |
# File 'lib/arxivsync/downloader.rb', line 49 def make_request(params) puts "Making OAI request with params: #{params.inspect}".light_magenta @last_params = params.clone # list_records will nuke our params begin return @oai.list_records(params) rescue Faraday::Error::TimeoutError puts "Request timed out; retrying in 20 seconds".light_yellow sleep 20 return retry_request end end |
#retry_request ⇒ Object
45 46 47 |
# File 'lib/arxivsync/downloader.rb', line 45 def retry_request make_request(@last_params) end |
#start(&b) ⇒ Object
19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
# File 'lib/arxivsync/downloader.rb', line 19 def start(&b) # Make the initial request resp = make_request(@initial_params) # Continue to make requests until the server stops sending # resumption tokens while true if !resp.resumption_token || resp.resumption_token.empty? if resp.doc.to_s.include?("Retry after 20 seconds") # Rate limitation puts "Honoring 503 and sleeping for 20 seconds...".light_yellow sleep 20 resp = retry_request else # No resumption_token and no retry should mean we're finished b.call(resp) puts "Finished archiving~!".bold.light_green break end else # We have a resumption_token, keep going! b.call(resp) resp = make_request(resumptionToken: resp.resumption_token) end end return self end |