Module: Cul::Hydra::Indexer
- Defined in:
- lib/cul_hydra/indexer.rb
Constant Summary collapse
- NUM_FEDORA_RETRY_ATTEMPTS =
3
- DELAY_BETWEEN_FEDORA_RETRY_ATTEMPTS =
5.seconds
Class Method Summary collapse
- .descend_from(pid, pids_to_omit = nil, verbose_output = false) ⇒ Object
- .index_pid(pid, skip_generic_resources = false, verbose_output = false) ⇒ Object
- .recursively_index_fedora_objects(top_pid, pids_to_omit = nil, skip_generic_resources = false, verbose_output = false) ⇒ Object
Class Method Details
.descend_from(pid, pids_to_omit = nil, verbose_output = false) ⇒ Object
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
# File 'lib/cul_hydra/indexer.rb', line 6 def self.descend_from(pid, pids_to_omit=nil, verbose_output=false) if pid.blank? raise 'Please supply a pid (e.g. rake recursively_index_fedora_objects pid=ldpd:123)' end begin unless ActiveFedora::Base.exists?(pid) raise 'Could not find Fedora object with pid: ' + pid end if pids_to_omit.present? && pids_to_omit.include?(pid) puts 'Skipping topmost object in this set (' + pid + ') because it has been intentionally omitted...' if verbose_output else puts 'Indexing topmost object in this set (' + pid + ')...' if verbose_output puts 'If this is a BagAggregator with a lot of members, this may take a while...' if verbose_output yield pid end puts 'Recursively retreieving and indexing all members of ' + pid + '...' unique_pids = Cul::Hydra::RisearchMembers.get_recursive_member_pids(pid, true) total_number_of_members = unique_pids.length puts 'Recursive search found ' + total_number_of_members.to_s + ' members.' if verbose_output if pids_to_omit.present? unique_pids = unique_pids - pids_to_omit total_number_of_members = unique_pids.length puts 'After checking against the list of omitted pids, the total number of objects to index will be: ' + total_number_of_members.to_s if verbose_output end i = 1 if total_number_of_members > 0 unique_pids.each {|pid| puts 'Recursing on ' + i.to_s + ' of ' + total_number_of_members.to_s + ' members (' + pid + ')...' if verbose_output yield pid i += 1 } end rescue RestClient::Unauthorized => e = "Skipping #{pid} due to error: " + e. + '. Problem with Fedora object?' puts logger.error if defined?(logger) end puts 'Recursion complete!' end |
.index_pid(pid, skip_generic_resources = false, verbose_output = false) ⇒ Object
69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
# File 'lib/cul_hydra/indexer.rb', line 69 def self.index_pid(pid, skip_generic_resources=false, verbose_output=false) # We found an object with the desired PID. Let's reindex it begin active_fedora_object = nil NUM_FEDORA_RETRY_ATTEMPTS.times { |i| begin active_fedora_object = ActiveFedora::Base.find(pid, :cast => true) break rescue RestClient::RequestTimeout, Errno::EHOSTUNREACH => e remaining_attempts = (NUM_FEDORA_RETRY_ATTEMPTS-1) - i if remaining_attempts == 0 raise e else Rails.logger.error "Error: Could not connect to fedora. (#{e.class.to_s + ': ' + e.}). Will retry #{remaining_attempts} more #{remaining_attempts == 1 ? 'time' : 'times'} (after a #{DELAY_BETWEEN_FEDORA_RETRY_ATTEMPTS} second delay)." sleep DELAY_BETWEEN_FEDORA_RETRY_ATTEMPTS end end } if skip_generic_resources && active_fedora_object.is_a?(GenericResource) puts 'Object was skipped because GenericResources are being skipped and it is a GenericResource.' else active_fedora_object.update_index puts 'done.' if verbose_output end rescue SystemExit, Interrupt => e # Allow system interrupt (ctrl+c) raise e rescue Exception => e puts "Encountered problem with #{pid}. Skipping record. Exception: #{e.}" end end |
.recursively_index_fedora_objects(top_pid, pids_to_omit = nil, skip_generic_resources = false, verbose_output = false) ⇒ Object
61 62 63 64 65 66 67 |
# File 'lib/cul_hydra/indexer.rb', line 61 def self.recursively_index_fedora_objects(top_pid, pids_to_omit=nil, skip_generic_resources=false, verbose_output=false) descend_from(top_pid, pids_to_omit, verbose_output) do |pid| self.index_pid(pid, skip_generic_resources, verbose_output) end end |