Class: Assembly::Utils

Inherits:
Object
  • Object
show all
Defined in:
lib/assembly-utils/utils.rb,
lib/assembly-utils/version.rb

Overview

Main Utils class

Constant Summary collapse

WFS =
Dor::WorkflowService
REPO =
'dor'
VERSION =

Project version number

'1.4.6'

Class Method Summary collapse

Class Method Details

.apo_workflow_defined?(druid, workflow) ⇒ boolean

Determines if the specifed APO object contains a specified workflow defined in it DEPRACATED NOW THAT REIFED WORKFLOWS ARE USED Example:

Assembly::Utils.apo_workflow_defined?('druid:oo000oo0001','assembly')

> true



361
362
363
364
365
366
367
# File 'lib/assembly-utils/utils.rb', line 361

def self.apo_workflow_defined?(druid, workflow)
  puts '************WARNING - THIS METHOD MAY NOT BE USEFUL ANYMORE SINCE WORKFLOWS ARE NO LONGER DEFINED IN THE APO**************'
  obj = Dor::Item.find(druid)
  raise 'object not an APO' if obj..objectType.first != 'adminPolicy'
  xml_doc = Nokogiri::XML(obj..content)
  xml_doc.xpath("//#{workflow}").size == 1 || xml_doc.xpath("//*[@id='#{workflow}']").size == 1
end

.claim_druid(pid) ⇒ boolean

Claim a specific druid as already used to be sure it won’t get used again. Not needed for normal purposes, only if you manually register something in Fedora Admin outside of DOR services gem.

Example:

puts Assembly::Utils.claim_druid('aa000aa0001')
> true


59
60
61
62
63
64
65
# File 'lib/assembly-utils/utils.rb', line 59

def self.claim_druid(pid)
  sc   = Dor::Config.suri
  url  = "#{sc.url}/suri2/namespaces/#{sc.id_namespace}"
  rcr  = RestClient::Resource.new(url, :user => sc.user, :password => sc.pass)
  resp = rcr["identifiers/#{pid}"].put('')
  resp.code == '204'
end

.cleanup(params = {}) ⇒ Object

Cleanup a list of objects and associated files given a list of druids. WARNING: VERY DESTRUCTIVE. This method only works when this gem is used in a project that is configured to connect to DOR

Example:

Assembly::Utils.cleanup(:druids=>['druid:aa000aa0001','druid:aa000aa0002'],:steps=>[:stacks,:dor,:stage,:symlinks,:workflows])


184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
# File 'lib/assembly-utils/utils.rb', line 184

def self.cleanup(params = {})
  druids  = params[:druids]  || []
  steps   = params[:steps]   || []
  dry_run = params[:dry_run] || false

  allowed_steps = {:stacks => 'This will remove all files from the stacks that were shelved for the objects',
                  :dor => 'This will delete objects from Fedora',
                  :stage => "This will delete the staged content in #{Assembly::ASSEMBLY_WORKSPACE}",
                  :symlinks => "This will remove the symlink from #{Assembly::DOR_WORKSPACE}",
                  :workflows => 'This will remove the accessionWF and assemblyWF workflows'}

  num_steps = 0

  puts 'THIS IS A DRY RUN' if dry_run

  Assembly::Utils.confirm "Run on '#{ENV['ROBOT_ENVIRONMENT']}'? Any response other than 'y' or 'yes' will stop the cleanup now."
  Assembly::Utils.confirm 'Are you really sure you want to run on production?  CLEANUP IS NOT REVERSIBLE' if ENV['ROBOT_ENVIRONMENT'] == 'production'

  steps.each do |step|
    if allowed_steps.keys.include?(step)
      Assembly::Utils.confirm "Run step '#{step}'?  #{allowed_steps[step]}.  Any response other than 'y' or 'yes' will stop the cleanup now."
      num_steps += 1 # count the valid steps found and agreed to
    end
  end
  raise 'no valid steps specified for cleanup' if num_steps == 0
  raise 'no druids provided' if druids.size == 0

  druids.each {|pid| Assembly::Utils.cleanup_object(pid, steps, dry_run)}
end

.cleanup_object(pid, steps, dry_run = false) ⇒ Object

Cleanup a single objects and associated files given a druid. WARNING: VERY DESTRUCTIVE. This method only works when this gem is used in a project that is configured to connect to DOR

Example:

Assembly::Utils.cleanup_object('druid:aa000aa0001',[:stacks,:dor,:stage,:symlinks,:workflows])


228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
# File 'lib/assembly-utils/utils.rb', line 228

def self.cleanup_object(pid, steps, dry_run = false)
  # start up an SSH session if we are going to try and remove content from the stacks
  ssh_session = Net::SSH.start(Dor::Config.stacks.host, Dor::Config.stacks.user, :auth_methods => %w(gssapi-with-mic publickey hostbased password keyboard-interactive)) if steps.include?(:stacks) && defined?(stacks_server)

  druid_tree = DruidTools::Druid.new(pid).tree
  puts "Cleaning up #{pid}"
  if steps.include?(:dor)
    puts "-- deleting #{pid} from Fedora #{ENV['ROBOT_ENVIRONMENT']}"
    Assembly::Utils.unregister(pid) unless dry_run
  end
  if steps.include?(:symlinks)
    path_to_symlinks = []
    path_to_symlinks << File.join(Assembly::DOR_WORKSPACE, druid_tree)
    path_to_symlinks << Assembly::Utils.get_staging_path(pid, Assembly::DOR_WORKSPACE)
    path_to_symlinks.each do |path|
      if File.directory?(path)
        puts "-- deleting folder #{path} (WARNING: should have been a symlink)"
        FileUtils.rm_rf path unless dry_run
      elsif File.symlink?(path)
        puts "-- deleting symlink #{path}"
        File.delete(path) unless dry_run
      else
        puts "-- Skipping #{path}: not a folder or symlink"
      end
    end
  end
  if steps.include?(:stage)
    path_to_content = Assembly::Utils.get_staging_path(pid, Assembly::ASSEMBLY_WORKSPACE)
    puts "-- deleting folder #{path_to_content}"
    FileUtils.rm_rf path_to_content if !dry_run && File.exist?(path_to_content)
  end
  if steps.include?(:stacks)
    path_to_content = Dor::DigitalStacksService.stacks_storage_dir(pid)
    puts "-- removing files from the stacks on #{stacks_server} at #{path_to_content}"
    ssh_session.exec!("rm -fr #{path_to_content}") unless dry_run
  end
  if steps.include?(:workflows)
    puts "-- deleting #{pid} accessionWF and assemblyWF workflows from Fedora #{ENV['ROBOT_ENVIRONMENT']}"
    unless dry_run
      Dor::WorkflowService.delete_workflow('dor', pid, 'accessionWF')
      Dor::WorkflowService.delete_workflow('dor', pid, 'assemblyWF')
    end
  end
rescue Exception => e
  puts "** cleaning up failed for #{pid} with #{e.message}"
ensure
  ssh_session.close if ssh_session
end

.clear_stray_workflowsObject

Clear stray workflows - remove any workflow steps for orphaned objects. This method only works when this gem is used in a project that is configured to connect to DOR



459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
# File 'lib/assembly-utils/utils.rb', line 459

def self.clear_stray_workflows
  repo      = 'dor'
  wf        = 'assemblyWF'
  msg       = 'Integration testing'
  wfs       = Dor::WorkflowService
  steps     = Assembly::ASSEMBLY_WF_STEPS.map { |s| s[0] }
  completed = steps[0]

  steps.each do |waiting|
    druids = wfs.get_objects_for_workstep completed, waiting, repo, wf
    druids.each do |dru|
      params = [repo, dru, wf, waiting, msg]
      resp = wfs.update_workflow_error_status *params
      puts "updated: resp=#{resp} params=#{params.inspect}"
    end
  end
end

.confirm(message) ⇒ Object

Used by the cleanup to ask user for confirmation of each step. Any response other than ‘yes’ raises an error



785
786
787
788
789
# File 'lib/assembly-utils/utils.rb', line 785

def self.confirm(message)
  puts message
  response = gets.chomp.downcase
  raise 'Exiting' if response != 'y' && response != 'yes'
end

.delete_all_workflows(pid, repo = 'dor') ⇒ Object

Delete all workflows for the given PID. Destructive and should only be used when deleting an object from DOR. This method only works when this gem is used in a project that is configured to connect to DOR

e.g. Assembly::Utils.delete_all_workflows(‘druid:oo000oo0001’)



442
443
444
# File 'lib/assembly-utils/utils.rb', line 442

def self.delete_all_workflows(pid, repo = 'dor')
  Dor::WorkflowService.get_workflows(pid).each {|workflow| Dor::WorkflowService.delete_workflow(repo, pid, workflow)}
end

.delete_from_dor(pid) ⇒ Object

Delete an object from DOR. This method only works when this gem is used in a project that is configured to connect to DOR

Example:

Assembly::Utils.delete_from_dor('druid:aa000aa0001')


284
285
286
287
288
# File 'lib/assembly-utils/utils.rb', line 284

def self.delete_from_dor(pid)
  Dor::Config.fedora.client["objects/#{pid}"].delete
  Dor::SearchService.solr.delete_by_id(pid)
  Dor::SearchService.solr.commit
end

.export_objects(pids, output_dir) ⇒ Object

Export one or more objects given a single or array of pids, with output to the specified directory as FOXML files

Example:

Assembly::Utils.export_objects(['druid:aa000aa0001','druid:bb000bb0001'],'/tmp')


74
75
76
77
# File 'lib/assembly-utils/utils.rb', line 74

def self.export_objects(pids, output_dir)
  pids = [pids] if pids.class == String
  pids.each {|pid| ActiveFedora::FixtureExporter.export_to_path(pid, output_dir)}
end

.get_druids_by_sourceid(source_ids) ⇒ array

Get a list of druids that match the given array of source IDs. This method only works when this gem is used in a project that is configured to connect to DOR

Example:

puts Assembly::Utils.get_druids_by_sourceid(['revs-01','revs-02'])
> ['druid:aa000aa0001','druid:aa000aa0002']


102
103
104
105
106
# File 'lib/assembly-utils/utils.rb', line 102

def self.get_druids_by_sourceid(source_ids)
  druids = []
  source_ids.each {|sid| druids << Dor::SearchService.query_by_id(sid)}
  druids.flatten
end

.get_druids_from_log(progress_log_file, completed = true) ⇒ array

Read in a list of druids from a pre-assembly progress load file and load into an array.

Example:

druids = Assembly::Utils.get_druids_from_log '/dor/preassembly/sohp_accession_log.yaml'
puts druids
> ['aa000aa0001', 'aa000aa0002']


653
654
655
656
657
658
659
# File 'lib/assembly-utils/utils.rb', line 653

def self.get_druids_from_log(progress_log_file, completed = true)
  druids = []
   docs = YAML.load_stream(Assembly::Utils.read_file(progress_log_file))
   docs = docs.documents if docs.respond_to? :documents
   docs.each { |obj| druids << obj[:pid] if obj[:pre_assem_finished] == completed}
   druids
end

.get_errored_objects_for_workstep(workflow, step, tag = '') ⇒ hash

Get a list of druids that have errored out in a particular workflow and step

e.g. result=Assembly::Utils.get_errored_objects_for_workstep(‘accessionWF’,‘content-metadata’,‘Project : Revs’)

> - Item error; caused by #<Rubydora::FedoraInvalidRequest: Error modifying datastream contentMetadata for druid:qd556jq0580. See logger for details>”



610
611
612
613
614
615
616
617
618
619
620
621
622
# File 'lib/assembly-utils/utils.rb', line 610

def self.get_errored_objects_for_workstep(workflow, step, tag = '')
  result = Dor::WorkflowService.get_errored_objects_for_workstep workflow, step, 'dor'
  return result if tag == ''
  filtered_result = {}
  result.each do |druid, error|
    begin
      item = Dor::Item.find(druid)
      filtered_result.merge!(druid => error) if item.tags.include? tag
    rescue
    end
  end
  filtered_result
end

.get_staging_path(pid, base_path = nil) ⇒ string

Get the staging directory tree given a druid, and optionally prepend a basepath. Deprecated and should not be needed anymore.

Example:

puts Assembly::Utils.get_staging_path('aa000aa0001','tmp')
> "tmp/aa/000/aa/0001"


28
29
30
31
# File 'lib/assembly-utils/utils.rb', line 28

def self.get_staging_path(pid, base_path = nil)
  d = DruidTools::Druid.new(pid, base_path)
  File.dirname(d.path)
end

.get_workflow_status(druid, workflow, step) ⇒ string

Show the workflow status of a specific step in a specific workflow for the provided druid. This method only works when this gem is used in a project that is configured to connect to DOR

Example:

puts Assembly::Utils.get_workflow_status('druid:aa000aa0001','assemblyWF','jp2-create')
> "completed"


165
166
167
# File 'lib/assembly-utils/utils.rb', line 165

def self.get_workflow_status(druid, workflow, step)
  Dor::WorkflowService.get_workflow_status('dor', druid, workflow, step)
end

.import_objects(source_dir) ⇒ Object

Import all of the FOXML files in the specified directory into Fedora

Example:

Assembly::Utils.import_objects('/tmp')


85
86
87
88
89
90
91
92
# File 'lib/assembly-utils/utils.rb', line 85

def self.import_objects(source_dir)
  Dir.chdir(source_dir)
  files = Dir.glob('*.foxml.xml')
  files.each do |file|
    pid = ActiveFedora::FixtureLoader.import_to_fedora(File.join(source_dir, file))
    ActiveFedora::FixtureLoader.index(pid)
  end
end

.in_accessioning?(pid) ⇒ boolean

Check if the object is currently in accessioning This method only works when this gem is used in a project that is configured to connect to the workflow service.

Example:

Assembly::Utils.in_accessioning?('druid:oo000oo0001')
> false


497
498
499
# File 'lib/assembly-utils/utils.rb', line 497

def self.in_accessioning?(pid)
  WFS.get_active_lifecycle(REPO, pid, 'submitted') ? true : false
end

.ingest_hold?(pid) ⇒ boolean

Check if the object is on ingest hold This method only works when this gem is used in a project that is configured to connect to the workflow service.

Example:

Assembly::Utils.ingest_hold?('druid:oo000oo0001')
> false


509
510
511
# File 'lib/assembly-utils/utils.rb', line 509

def self.ingest_hold?(pid)
  WFS.get_workflow_status(REPO, pid, 'accessionWF', 'sdr-ingest-transfer') == 'hold'
end

.insert_workflow(pid, workflow, repo = 'dor') ⇒ boolean

Insert the specified workflow into the specified object.

Example:

puts Assembly::Utils.insert_workflow('druid:aa000aa0001','accessionWF')
> true


44
45
46
47
48
# File 'lib/assembly-utils/utils.rb', line 44

def self.insert_workflow(pid, workflow, repo = 'dor')
  url = "#{Dor::Config.dor.service_root}/objects/#{pid}/apo_workflows/#{workflow}"
  result = RestClient.post url, {}
  [200, 201, 202, 204].include?(result.code) && result
end

.is_apo?(druid) ⇒ boolean

Determines if the specifed object is an APO Example:

Assembly::Utils.is_apo?('druid:oo000oo0001')

> true



377
378
379
380
381
382
# File 'lib/assembly-utils/utils.rb', line 377

def self.is_apo?(druid)
  obj = Dor::Item.find(druid)
  obj..objectType.first == 'adminPolicy'
rescue
  return false
end

.is_ingested?(pid) ⇒ boolean

Check if the object is fully accessioned and ingested. This method only works when this gem is used in a project that is configured to connect to the workflow service.

Example:

Assembly::Utils.is_ingested?('druid:oo000oo0001')
> false


485
486
487
# File 'lib/assembly-utils/utils.rb', line 485

def self.is_ingested?(pid)
  WFS.get_lifecycle(REPO, pid, 'accessioned') ? true : false
end

.is_submitted?(pid) ⇒ boolean

Check if the object is submitted This method only works when this gem is used in a project that is configured to connect to the workflow service.

Example:

Assembly::Utils.('druid:oo000oo0001')
> false


521
522
523
# File 'lib/assembly-utils/utils.rb', line 521

def self.(pid)
  WFS.get_lifecycle(REPO, pid, 'submitted').nil?
end

.load_config(filename) ⇒ hash

Read in a YAML configuration file from disk and return a hash

Example:

config_filename='/thumpers/dpgthumper2-smpl/SC1017_SOHP/sohp_prod_accession.yaml'
config=Assembly::Utils.load_config(config_filename)
puts config['progress_log_file']
> "/dor/preassembly/sohp_accession_log.yaml"


671
672
673
# File 'lib/assembly-utils/utils.rb', line 671

def self.load_config(filename)
  YAML.load(Assembly::Utils.read_file(filename))
end

.read_druids_from_file(csv_filename) ⇒ array

Get a list of druids from a CSV file which has a heading of “druid” and put them into a Ruby array. Useful if you want to import a report from argo

Example:

Assembly::Utils.read_druids_from_file('download.csv') # ['druid:xxxxx', 'druid:yyyyy']


589
590
591
592
593
594
595
596
597
598
# File 'lib/assembly-utils/utils.rb', line 589

def self.read_druids_from_file(csv_filename)
  rows = CsvMapper.import(csv_filename) do read_attributes_from_file end
  druids = []
  rows.each do |row|
    druid = row.druid
    druid = "druid:#{druid}" unless druid.include?('druid:')
    druids << druid
  end
  druids
end

.read_file(filename) ⇒ string

Read in a file from disk



679
680
681
# File 'lib/assembly-utils/utils.rb', line 679

def self.read_file(filename)
  File.readable?(filename) ? IO.read(filename) : ''
end

.reindex(pid) ⇒ Object

Reindex the supplied PID in solr.

e.g. Assembly::Utils.reindex(‘druid:oo000oo0001’)



451
452
453
454
455
# File 'lib/assembly-utils/utils.rb', line 451

def self.reindex(pid)
  obj = Dor.load_instance pid
  solr_doc = obj.to_solr
  Dor::SearchService.solr.add(solr_doc, :add_attributes => {:commitWithin => 1000}) unless obj.nil?
end

.remove_duplicate_tags(druids) ⇒ Object

Removes any duplicate tags within each druid



768
769
770
771
772
773
774
775
776
777
778
779
780
# File 'lib/assembly-utils/utils.rb', line 768

def self.remove_duplicate_tags(druids)
  druids.each do |druid|
    i = Dor::Item.find(druid)
    next unless i && i.tags.size > 1 # multiple tags
    i.tags.each do |tag|
      next unless (i.tags.select {|t| t == tag}).size > 1 # tag is duplicate
      i.remove_tag(tag)
      i.add_tag(tag)
      puts "Saving #{druid} to remove duplicate tag='#{tag}'"
      i.save
    end
  end
end

.replace_datastreams(druids, datastream_name, new_content, publish = false) ⇒ Object

Replace a specific datastream for a series of objects in DOR with new content

Example:

druids=%w{druid:aa111aa1111 druid:bb222bb2222}
new_content='<xml><more nodes>this should be the whole datastream</more nodes></xml>'
datastream='rightsMetadata'
Assembly::Utils.replace_datastreams(druids,datastream,new_content)


318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
# File 'lib/assembly-utils/utils.rb', line 318

def self.replace_datastreams(druids, datastream_name, new_content, publish = false)
  druids.each do |druid|
    obj = Dor::Item.find(druid)
    ds = obj.datastreams[datastream_name]
    if ds
      ds.content = new_content
      ds.save
      puts "replaced #{datastream_name} for #{druid}"
      if publish
        obj.
        puts '--object re-published'
      end
    else
      puts "#{datastream_name} does not exist for #{druid}"
    end
  end
end

.republish(druids) ⇒ Object

Republish a list of druids. Only works when run from a server with access rights to the stacks (e.g. lyberservices-prod)

Example:

druids=%w{druid:aa111aa1111 druid:bb222bb2222}
Assembly::Utils.republish(druids)


343
344
345
346
347
348
349
# File 'lib/assembly-utils/utils.rb', line 343

def self.republish(druids)
  druids.each do |druid|
    obj = Dor::Item.find(druid)
    obj.
    puts "republished #{druid}"
  end
end

.reset_errored_objects_for_workstep(workflow, step, tag = '') ⇒ hash

Reset any objects in a specific workflow step and state that have errored out back to waiting

e.g. result = Assembly::Utils.reset_errored_objects_for_workstep(‘accessionWF’, ‘content-metadata’)

> - Item error; caused by #<Rubydora::FedoraInvalidRequest: Error modifying datastream contentMetadata for druid:qd556jq0580. See logger for details>”



634
635
636
637
638
639
640
# File 'lib/assembly-utils/utils.rb', line 634

def self.reset_errored_objects_for_workstep(workflow, step, tag = '')
  result = get_errored_objects_for_workstep workflow, step, tag
  druids = []
  result.each {|k, v| druids << k}
  reset_workflow_states(:druids => druids, :steps => {workflow => [step]}) if druids.size > 0
  result
end

.reset_workflow_states(params = {}) ⇒ Object

Reset the workflow states for a list of druids given a list of workflow names and steps. Provide a list of druids in an array, and a hash containing workflow names (e.g. ‘assemblyWF’ or ‘accessionWF’) as the keys, and arrays of steps as the corresponding values (e.g. [‘checksum-compute’,‘jp2-create’]) and they will all be reset to “waiting”. This method only works when this gem is used in a project that is configured to connect to DOR

Example:

druids = ['druid:aa111aa1111', 'druid:bb222bb2222']
steps = {'assemblyWF' => ['checksum-compute'], 'accessionWF' => ['content-metadata', 'descriptive-metadata']}
Assembly::Utils.reset_workflow_states(:druids => druids, :steps => steps)


563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
# File 'lib/assembly-utils/utils.rb', line 563

def self.reset_workflow_states(params = {})
  druids    = params[:druids] || []
  workflows = params[:steps]  || {}
  state     = params[:state]  || 'waiting'
  druids.each do |druid|
    puts "** #{druid}"
    begin
      workflows.each do |workflow, steps|
        steps.each do |step|
          puts "Updating #{workflow}:#{step} to #{state}"
          Dor::WorkflowService.update_workflow_status 'dor', druid, workflow, step, state
        end
      end
    rescue Exception => e
      puts "an error occurred trying to update workflows for #{druid} with message #{e.message}"
    end
  end
end

.set_workflow_step_to_error(pid, step) ⇒ Object

Set the workflow step for the given PID to an error state



427
428
429
430
431
432
433
# File 'lib/assembly-utils/utils.rb', line 427

def self.set_workflow_step_to_error(pid, step)
  wf_name = Assembly::ASSEMBLY_WF
  msg     = 'Integration testing'
  params  = ['dor', pid, wf_name, step, msg]
  resp    = Dor::WorkflowService.update_workflow_error_status *params
  raise 'update_workflow_error_status() returned false.' unless resp == true
end

.solr_doc_parser(doc, check_status_in_dor = false) ⇒ string

Used by the completion_report and project_tag_report in the pre-assembly project



689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
# File 'lib/assembly-utils/utils.rb', line 689

def self.solr_doc_parser(doc, check_status_in_dor = false)
  druid = doc[:id]

  if Solrizer::VERSION < '3.0'
    label = doc[:objectLabel_t]
    title = doc[:public_dc_title_t].nil? ? '' : doc[:public_dc_title_t].first

    if check_status_in_dor
      accessioned = get_workflow_status(druid, 'accessionWF', 'publish') == 'completed'
      shelved     = get_workflow_status(druid, 'accessionWF', 'shelve')  == 'completed'
    else
      accessioned = doc[:wf_wps_facet].nil? ? false : doc[:wf_wps_facet].include?('accessionWF:publish:completed')
      shelved     = doc[:wf_wps_facet].nil? ? false : doc[:wf_wps_facet].include?('accessionWF:shelve:completed')
    end
    source_id = doc[:source_id_t]
    files     = doc[:content_file_t]
  else
    label = doc[Solrizer.solr_name('objectLabel', :displayable)]
    title = doc.fetch(Solrizer.solr_name('public_dc_title', :displayable), []).first || ''

    if check_status_in_dor
      accessioned = get_workflow_status(druid, 'accessionWF', 'publish') == 'completed'
      shelved     = get_workflow_status(druid, 'accessionWF', 'shelve')  == 'completed'
    else
      accessioned = doc.fetch(Solrizer.solr_name('wf_wps', :symbol), []).include?('accessionWF:publish:completed')
      shelved     = doc.fetch(Solrizer.solr_name('wf_wps', :symbol), []).include?('accessionWF:shelve:completed')
    end
    source_id = doc[Solrizer.solr_name('source_id', :symbol)]
    files     = doc[Solrizer.solr_name('content_file', :symbol)]
  end

  if files.nil?
    file_type_list = ''
    num_files = 0
  else
    num_files = files.size
    # count the amount of each file type
    file_types = Hash.new(0)
    unless num_files == 0
      files.each {|file| file_types[File.extname(file)] += 1}
      file_type_list = file_types.map{|k, v| "#{k}=#{v}"}.join(' | ')
    end
  end

  val = druid.split(/:/).last
  purl_link = File.join(Assembly::PURL_BASE_URL, val)
  [druid, label, title, source_id, accessioned, shelved, purl_link, num_files, file_type_list]
end

.symbolize_keys(h) ⇒ hash

Takes a hash data structure and recursively converts all hash keys from strings to symbols.

Example:

Assembly::Utils.symbolize_keys({'dude' => 'is cool', 'i' => 'am too'})
> {:dude => "is cool", :i => "am too"}


746
747
748
749
750
751
752
753
754
# File 'lib/assembly-utils/utils.rb', line 746

def self.symbolize_keys(h)
  if h.instance_of? Hash
    h.inject({}) { |hh, (k, v)| hh[k.to_sym] = symbolize_keys(v); hh }
  elsif h.instance_of? Array
    h.map { |v| symbolize_keys(v) }
  else
    h
  end
end

.unregister(pid) ⇒ boolean

Unregister a DOR object, which includes deleting it and deleting all its workflows



415
416
417
418
419
420
421
# File 'lib/assembly-utils/utils.rb', line 415

def self.unregister(pid)
  Assembly::Utils.delete_all_workflows pid
  Assembly::Utils.delete_from_dor pid
  true
rescue
  return false
end

.update_datastreams(druids, datastream_name, find_content, replace_content) ⇒ Object

Update a specific datastream for a series of objects in DOR by searching and replacing content

Example:

druids = %w{druid:aa111aa1111 druid:bb222bb2222}
find_content = 'FooBarBaz'
replace_content = 'Stanford Rules'
datastream = 'rightsMetadata'
Assembly::Utils.update_datastreams(druids, datastream, find_content, replace_content)


397
398
399
400
401
402
403
404
405
406
407
408
409
410
# File 'lib/assembly-utils/utils.rb', line 397

def self.update_datastreams(druids, datastream_name, find_content, replace_content)
  druids.each do |druid|
    obj = Dor::Item.find(druid)
    ds = obj.datastreams[datastream_name]
    if ds
      updated_content = ds.content.gsub(find_content, replace_content)
      ds.content = updated_content
      ds.save
      puts "updated #{datastream_name} for #{druid}"
    else
      puts "#{datastream_name} does not exist for #{druid}"
    end
  end
end

.update_rights_metadata(druids, apo_druid, publish = false) ⇒ Object

Quicky update rights metadata for any existing list of objects using default rights metadata pulled from the supplied APO

Example:

druids=%w{druid:aa111aa1111 druid:bb222bb2222}
apo_druid='druid:cc222cc2222'
Assembly::Utils.(druids,apo_druid)


300
301
302
303
304
# File 'lib/assembly-utils/utils.rb', line 300

def self.(druids, apo_druid, publish = false)
  apo = Dor::Item.find(apo_druid)
  rights_md = apo.datastreams['defaultObjectRights']
  replace_datastreams(druids, 'rightsMetadata', rights_md.content, publish)
end

.updates_allowed?(pid) ⇒ boolean

Check if the updates are allowed on the object This method only works when this gem is used in a project that is configured to connect to the workflow service.

Example:

Assembly::Utils.updates_allowed?('druid:oo000oo0001')
> false


533
534
535
# File 'lib/assembly-utils/utils.rb', line 533

def self.updates_allowed?(pid)
  !self.in_accessioning?(pid) && self.is_ingested?(pid)
end

.values_to_symbols!(h) ⇒ hash

Takes a hash and converts its string values to symbols – not recursively. Example:

Assembly::Utils.values_to_symbols!({'dude' => 'iscool', 'i' => 'amtoo'})
> {'i' => :amtoo, 'dude' => :iscool}


762
763
764
# File 'lib/assembly-utils/utils.rb', line 762

def self.values_to_symbols!(h)
  h.each { |k, v| h[k] = v.to_sym if v.class == String }
end

.versioning_required?(pid) ⇒ boolean

Check if versioning is required for the object This method only works when this gem is used in a project that is configured to connect to the workflow service.

Example:

Assembly::Utils.versioning_required?('druid:oo000oo0001')
> false


545
546
547
# File 'lib/assembly-utils/utils.rb', line 545

def self.versioning_required?(pid)
  !((!self.is_ingested?(pid) && self.ingest_hold?(pid)) || (!self.is_ingested?(pid) && !self.(pid)))
end

.workflow_status(params = {}) ⇒ string

Show the workflow status of specific steps in assembly and/or accession workflows for the provided druids. This method only works when this gem is used in a project that is configured to connect to DOR

Example:

Assembly::Utils.workflow_status(:druids=>['druid:aa000aa0001','druid:aa000aa0002'],:workflows=>[:assembly,:accession],:filename=>'output.csv')


120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
# File 'lib/assembly-utils/utils.rb', line 120

def self.workflow_status(params = {})

  druids    = params[:druids] || []
  workflows = params[:workflows] || [:assembly]
  filename  = params[:filename] || ''
  accession_steps = %w(content-metadata descriptive-metadata rights-metadata remediate-object shelve publish)
  assembly_steps  = %w(jp2-create checksum-compute exif-collect accessioning-initiate)

  puts 'Generating report'

  csv = CSV.open(filename, 'w') if filename != ''

  header = ['druid']
  header << assembly_steps  if workflows.include?(:assembly)
  header << accession_steps if workflows.include?(:accession)
  csv << header.flatten if filename != ''
  puts header.join(',')

  druids.each do |druid|
    output = [druid]
    assembly_steps.each  {|step| output << get_workflow_status(druid, 'assemblyWF', step )} if workflows.include?(:assembly)
    accession_steps.each {|step| output << get_workflow_status(druid, 'accessionWF', step)} if workflows.include?(:accession)
    csv << output if filename != ''
    puts output.join(',')
  end

  if filename != ''
    csv.close
    puts "Report generated in #{filename}"
  end

end