Class: OAI::Harvester::Harvest
- Inherits:
-
Object
- Object
- OAI::Harvester::Harvest
- Defined in:
- lib/oai/harvester/harvest.rb,
lib/oai/harvester/logging.rb
Constant Summary collapse
- DIRECTORY_LAYOUT =
"%Y/%m".freeze
Instance Method Summary collapse
-
#initialize(*args) ⇒ Harvest
constructor
A new instance of Harvest.
- #orig_call ⇒ Object
- #orig_harvest ⇒ Object
-
#orig_init ⇒ Harvest
A new instance of Harvest.
- #orig_start ⇒ Object
- #start(sites = nil, interactive = false) ⇒ Object
Constructor Details
#initialize(*args) ⇒ Harvest
Returns a new instance of Harvest.
9 10 11 12 13 14 15 16 17 |
# File 'lib/oai/harvester/harvest.rb', line 9 def initialize(config = nil, directory = nil, date = nil, to = nil) @config = config || Config.load @directory = directory || @config.storage @from = date @from.freeze @until = to @until.freeze @parser = defined?(XML::Document) ? 'libxml' : 'rexml' end |
Instance Method Details
#orig_call ⇒ Object
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
# File 'lib/oai/harvester/logging.rb', line 8 def call(url, opts) # Preserve original options = opts.dup records = 0; client = OAI::Client.new(url, :parser => @parser) provider_config = client.identify file = Tempfile.new('oai_data') gz = Zlib::GzipWriter.new(file) gz << "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n" gz << "<records>" begin response = client.list_records() response.each do |rec| gz << rec._source records += 1 end puts "#{records} records retrieved" if @interactive # Get a full response by iterating with the resumption tokens. # Not very Ruby like. Should fix OAI::Client to handle resumption # tokens internally. while(response.resumption_token and not response.resumption_token.empty?) puts "\nresumption token recieved, continuing" if @interactive response = client.list_records(:resumption_token => response.resumption_token) response.each do |rec| gz << rec._source records += 1 end puts "#{records} records retrieved" if @interactive end gz << "</records>" ensure gz.close file.close end [file, records] end |
#orig_harvest ⇒ Object
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
# File 'lib/oai/harvester/logging.rb', line 7 def harvest(site) opts = (@config.sites[site]) if @until harvest_time = @until.to_time.utc else harvest_time = Time.now.utc end if OAI::Const::Granularity::LOW == granularity(opts[:url]) opts[:until] = harvest_time.strftime("%Y-%m-%d") opts[:from] = @from.strftime("%Y-%m-%d") if @from else opts[:until] = harvest_time.xmlschema opts[:from] = @from.xmlschema if @from end # Allow a from date to be passed in opts[:from] = earliest(opts[:url]) unless opts[:from] opts.delete(:set) if 'all' == opts[:set] begin # Connect, and download file, records = call(opts.delete(:url), opts) # Move document to storage directory if configured if @directory directory_layout = @config.layouts[site] if @config.layouts dir = File.join(@directory, date_based_directory(harvest_time, directory_layout)) FileUtils.mkdir_p dir FileUtils.mv(file.path, File.join(dir, "#{site}-#{filename(Time.parse(opts[:from]), harvest_time)}.xml.gz")) else puts "no configured destination for temp file" if @interactive end @config.sites[site]['last'] = harvest_time rescue OAI::NoMatchException puts "No new records available" if @interactive rescue OAI::Exception => ex raise ex if not @interactive puts ex. end end |
#orig_init ⇒ Harvest
Returns a new instance of Harvest.
9 10 11 12 13 14 15 16 17 |
# File 'lib/oai/harvester/logging.rb', line 9 def initialize(config = nil, directory = nil, date = nil, to = nil) @config = config || Config.load @directory = directory || @config.storage @from = date @from.freeze @until = to @until.freeze @parser = defined?(XML::Document) ? 'libxml' : 'rexml' end |
#orig_start ⇒ Object
6 7 8 9 10 11 12 13 14 15 16 |
# File 'lib/oai/harvester/logging.rb', line 6 def start(sites = nil, interactive = false) @interactive = interactive sites = (@config.sites.keys rescue {}) unless sites begin sites.each do |site| harvest(site) end ensure @config.save end end |
#start(sites = nil, interactive = false) ⇒ Object
19 20 21 22 23 24 25 26 27 28 29 |
# File 'lib/oai/harvester/harvest.rb', line 19 def start(sites = nil, interactive = false) @interactive = interactive sites = (@config.sites.keys rescue {}) unless sites begin sites.each do |site| harvest(site) end ensure @config.save end end |