Class: ArxivSync::Downloader

Inherits:
Object
  • Object
show all
Defined in:
lib/arxivsync/downloader.rb

Instance Method Summary collapse

Constructor Details

#initialize(initial_params = {}) ⇒ Downloader

Returns a new instance of Downloader.



3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# File 'lib/arxivsync/downloader.rb', line 3

def initialize(initial_params={})
  @initial_params = initial_params

  if @initial_params[:from] == Date.today
    puts "Last responseDate was today. arXiv lacks date granularity beyond the day level; please wait before continuing harvest.".light_yellow
    return false
  end

  unless @initial_params[:resumptionToken]
    @initial_params[:metadataPrefix] ||= 'arXivRaw'
  end
  @last_params = nil

  @oai = OAI::Client.new('http://export.arxiv.org/oai2')
end

Instance Method Details

#make_request(params) ⇒ Object



49
50
51
52
53
54
55
56
57
58
59
60
61
# File 'lib/arxivsync/downloader.rb', line 49

def make_request(params)
  puts "Making OAI request with params: #{params.inspect}".light_magenta

  @last_params = params.clone # list_records will nuke our params

  begin
    return @oai.list_records(params)
  rescue Faraday::Error::TimeoutError
    puts "Request timed out; retrying in 20 seconds".light_yellow
    sleep 20
    return retry_request
  end
end

#retry_requestObject



45
46
47
# File 'lib/arxivsync/downloader.rb', line 45

def retry_request
  make_request(@last_params)
end

#start(&b) ⇒ Object



19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# File 'lib/arxivsync/downloader.rb', line 19

def start(&b)
  # Make the initial request
  resp = make_request(@initial_params)

  # Continue to make requests until the server stops sending
  # resumption tokens
  while true
    if !resp.resumption_token || resp.resumption_token.empty?
      if resp.doc.to_s.include?("Retry after 20 seconds") # Rate limitation
        puts "Honoring 503 and sleeping for 20 seconds...".light_yellow
        sleep 20
        resp = retry_request
      else # No resumption_token and no retry should mean we're finished
        b.call(resp)
        puts "Finished archiving~!".bold.light_green
        break
      end
    else # We have a resumption_token, keep going!
      b.call(resp)
      resp = make_request(resumptionToken: resp.resumption_token)
    end
  end

  return self
end