Class: Origen::Application::LSFManager

Inherits:
Object
  • Object
show all
Includes:
Callbacks
Defined in:
lib/origen/application/lsf_manager.rb

Overview

This class is responsible for co-ordinating and monitoring all submissions to the LSF. This is in contrast to Origen::Application::LSF which is an API for talking to the LSF.

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Callbacks

#register_callback_listener

Constructor Details

#initializeLSFManager

Returns a new instance of LSFManager.



14
15
16
17
18
# File 'lib/origen/application/lsf_manager.rb', line 14

def initialize
  unless File.exist?(log_file_directory)
    FileUtils.mkdir_p(log_file_directory)
  end
end

Instance Attribute Details

#current_commandObject

This will be set by the command dispatcher to reflect the currently executing command. If LSF jobs are spawned with the same command then any options passed to the parent command will automatically be forwarded to the children.



12
13
14
# File 'lib/origen/application/lsf_manager.rb', line 12

def current_command
  @current_command
end

Instance Method Details

#add_command_option(*opts) ⇒ Object



500
501
502
503
# File 'lib/origen/application/lsf_manager.rb', line 500

def add_command_option(*opts)
  @command_options ||= []
  @command_options += opts
end

#build_log(options = {}) ⇒ Object

Build the log file from the completed jobs



255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
# File 'lib/origen/application/lsf_manager.rb', line 255

def build_log(options = {})
  @completed_patterns = 0
  @total_vectors = 0
  @total_duration = 0
  @completed_files = 0
  @changed_patterns = 0
  @changed_files = 0
  @new_patterns = 0
  @new_files = 0
  @failed_patterns = 0
  @failed_files = 0
  log_method = options[:log_file] ? options[:log_file] : :info
  Origen.log.send(log_method, '*' * 70)
  completed_jobs.each do |job|
    File.open(log_file(job[:id])) do |f|
      last_line_blank = false
      f.readlines.each do |line|
        # Capture and combine the per job stats that look like this:
        #   Total patterns:   1              1347      0.003674
        #   New patterns:     0
        #   Changed patterns: 1
        #   FAILED patterns:  1
        #   Total files:      1
        #   New files:        0
        #   Changed files:    0
        #   FAILED files:     1

        line.gsub!(/\e\[\d+m/, '') # Remove any coloring
        if line =~ /Total patterns:\s+(\d+)/
          @completed_patterns = Regexp.last_match[1].to_i
        elsif line =~ /Total vectors:\s+(\d+)/
          @total_vectors = Regexp.last_match[1].to_i
        elsif line =~ /Total duration:\s+(\d+\.\d+)/
          @total_duration = Regexp.last_match[1].to_f
        elsif line =~ /Total files:\s+(\d+)/
          @completed_files = Regexp.last_match[1].to_i
        elsif line =~ /Changed patterns:\s+(\d+)/
          @changed_patterns = Regexp.last_match[1].to_i
        elsif line =~ /Changed files:\s+(\d+)/
          @changed_files = Regexp.last_match[1].to_i
        elsif line =~ /New patterns:\s+(\d+)/
          @new_patterns = Regexp.last_match[1].to_i
        elsif line =~ /New files:\s+(\d+)/
          @new_files = Regexp.last_match[1].to_i
        elsif line =~ /FAILED patterns:\s+(\d+)/
          @failed_patterns = Regexp.last_match[1].to_i
        elsif line =~ /FAILED files:\s+(\d+)/
          @failed_files = Regexp.last_match[1].to_i
        elsif line =~ /ERROR!/
          stats.errors += 1
          Origen.log.send :relog, line, options
        else
          # Compress multiple blank lines
          if line =~ /^\s*$/ || line =~ /.*\|\|\s*$/
            unless last_line_blank
              Origen.log.send(log_method, nil)
              last_line_blank = true
            end
          else
            # Screen std origen output
            unless line =~ /  origen save/ ||
                   line =~ /Insecure world writable dir/ ||
                   line =~ /To save all of/
              line.strip!
              Origen.log.send :relog, line, options
              last_line_blank = false
            end
          end
        end
      rescue
        # Sometimes illegal UTF-8 characters can get into crash dumps, if this
        # happens just print the line out rather than die
        Origen.log.error line
      end
    end
    stats.completed_patterns += @completed_patterns
    stats.total_vectors += @total_vectors
    stats.total_duration += @total_duration
    stats.completed_files += @completed_files
    stats.changed_patterns += @changed_patterns
    stats.changed_files += @changed_files
    stats.new_patterns += @new_patterns
    stats.new_files += @new_files
    stats.failed_patterns += @failed_patterns
    stats.failed_files += @failed_files
  end
  Origen.log.send(log_method, '*' * 70)
  stats.print_summary
end

#classify_jobsObject



509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
# File 'lib/origen/application/lsf_manager.rb', line 509

def classify_jobs
  clear_caches
  queuing_job_ids = lsf.queuing_job_ids
  running_job_ids = lsf.running_job_ids
  remote_jobs.each do |_id, job|
    # If the status has already been determined send it straight to the bucket
    if job[:status]
      send("#{job[:status]}_jobs") << job
    else
      if job[:lsf_id] == :error
        job[:status] = :lost
        lost_jobs << job
      elsif job_completed?(job[:id])
        if job_passed?(job[:id])
          job[:status] = :passed
          passed_jobs << job
        elsif job_failed?(job[:id])
          job[:status] = :failed
          failed_jobs << job
        end
      else
        if running_job_ids.include?(job[:lsf_id])
          running_jobs << job
          # Once we have assigned a job as running make sure the job is marked as started
          # It can flicker back to queued if the started file takes a long time to arrive
          # from the remote host
          job_started(job[:lsf_id])
        elsif queuing_job_ids.include?(job[:lsf_id])
          queuing_jobs << job
        elsif job_started?(job[:id])
          # There can be considerable latency between the job writing the passed/failed
          # file remotely and it showing up on the local machine.
          # Give some buffer to that before declaring the file lost.
          if job[:completed_at]
            if (Time.now - job[:completed_at]) < 60
              running_jobs << job
            else
              lost_jobs << job
            end
          else
            job[:completed_at] = Time.now
            running_jobs << job
          end
        # Give jobs submitted less than a minute ago the benefit of the
        # doubt, they may not have shown up in bjobs yet
        elsif (Time.now - job[:submitted_at]) < 60
          queuing_jobs << job
        else
          lost_jobs << job
        end
      end
    end
  end
end

#clear(options) ⇒ Object

Clear jobs from memory



207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
# File 'lib/origen/application/lsf_manager.rb', line 207

def clear(options)
  if options[:type]
    if options[:type] == :all
      File.delete(remote_jobs_file) if File.exist?(remote_jobs_file)
      @remote_jobs = {}
      nil
    else
      send("#{options[:type]}_jobs").each do |job|
        remote_jobs.delete(job[:id])
      end
    end
  else
    remote_jobs.delete(options[:id])
  end
end

#clear_allObject



223
224
225
226
227
228
229
230
231
# File 'lib/origen/application/lsf_manager.rb', line 223

def clear_all
  File.delete(remote_jobs_file) if File.exist?(remote_jobs_file)
  if File.exist?(log_file_directory)
    FileUtils.rm_rf(log_file_directory)
  end
  FileUtils.mkdir_p(log_file_directory)
  @remote_jobs = {}
  clear_caches
end

#clear_cachesObject



564
565
566
567
568
569
570
# File 'lib/origen/application/lsf_manager.rb', line 564

def clear_caches
  @running_jobs = nil
  @queuing_jobs = nil
  @passed_jobs = nil
  @failed_jobs = nil
  @lost_jobs = nil
end

#command_options(command_str) ⇒ Object



455
456
457
458
459
460
461
462
463
464
# File 'lib/origen/application/lsf_manager.rb', line 455

def command_options(command_str)
  command_str.sub(/origen\s*/, '') =~ /(\w+)/
  command = Regexp.last_match[1]
  command = ORIGEN_COMMAND_ALIASES[command] || command
  if command == current_command
    @command_options
  else
    ''
  end
end

#command_options=(opts) ⇒ Object

This will be called by the command dispatcher to record any options that were passed in when launching the current command. These will be automatically appended if the current command spawns any LSF jobs that will invoke the same command.



470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
# File 'lib/origen/application/lsf_manager.rb', line 470

def command_options=(opts)
  # Ensure these options are removed, these are either incompatible with the LSF,
  # or will already have been added elsewhere
  {
    ['-h', '--help']        => false,
    ['-w', '--wait']        => false,
    ['-d', '--debug']       => false,
    ['-c', '--continue']    => false,
    '--exec_remote'         => false,
    ['-t', '--target']      => '*',
    ['-e', '--environment'] => '*',
    '--id'                  => '*',
    ['-l', '--lsf']         => %w(add clear)
  }.each do |names, values|
    [names].flatten.each do |name|
      ix = opts.index(name)
      if ix
        opts.delete_at(ix)
        [values].flatten.each do |value|
          if value && (value == '*' || opts[ix] == value)
            opts.delete_at(ix)
          end
        end
      end
    end
  end
  @command_options ||= []
  @command_options += opts
end

#command_prefix(id, dependents) ⇒ Object



441
442
443
444
445
446
447
448
449
450
451
452
453
# File 'lib/origen/application/lsf_manager.rb', line 441

def command_prefix(id, dependents)
  # define prefix as a blank string if Origen.site_config.lsf_command_prefix is not defined
  if Origen.site_config.lsf_command_prefix
    prefix = Origen.site_config.lsf_command_prefix
  else
    prefix = ''
  end
  prefix += "cd #{Origen.root}; origen l --execute --id #{id} "
  unless dependents.empty?
    prefix += "--dependents #{dependents.join(',')} "
  end
  prefix
end

#completed_jobsObject



580
581
582
# File 'lib/origen/application/lsf_manager.rb', line 580

def completed_jobs
  passed_jobs + failed_jobs
end

#execute_remotely(options = {}) ⇒ Object



647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
# File 'lib/origen/application/lsf_manager.rb', line 647

def execute_remotely(options = {})
  job_started(options[:id])
  begin
    if options[:dependents]
      wait_for_completion(ids:                      options[:dependents],
                          poll_duration_in_seconds: 1,
                          # Don't wait long by the time this runs the LSF
                          # should have guaranteed the job has run
                          timeout_in_seconds:       120)
      unless options[:dependents].all? { |id| job_passed?(id) }
        File.open(log_file(options[:id]), 'w') do |f|
          f.puts "*** ERROR! *** #{options[:cmd].join(' ')} ***"
          f.puts 'Dependents failed!'
        end
        fail 'Dependents failed!'
      end
    end
    if options[:cmd].is_a?(Array)
      cmd = options[:cmd].join(' ')
    else
      cmd = options[:cmd]
    end
    output = `#{cmd} 2>&1`
    File.open(log_file(options[:id]), 'w') do |f|
      f.write output
    end
    if $CHILD_STATUS.success?
      job_passed(options[:id])
    else
      job_failed(options[:id])
    end
  rescue
    job_failed(options[:id])
  end
end

#extract_ids(jobs_or_ids) ⇒ Object



419
420
421
# File 'lib/origen/application/lsf_manager.rb', line 419

def extract_ids(jobs_or_ids)
  jobs_or_ids.map { |j| j.is_a?(Hash) ? j[:id] : j }
end

#failed_file(id) ⇒ Object



359
360
361
# File 'lib/origen/application/lsf_manager.rb', line 359

def failed_file(id)
  "#{log_file_directory}/#{log_file_name(id)}.failed"
end

#failed_jobsObject

Failed jobs are those that started to produce a log file but did not complete



589
590
591
# File 'lib/origen/application/lsf_manager.rb', line 589

def failed_jobs
  @failed_jobs ||= []
end

#generate_job_idObject



623
624
625
# File 'lib/origen/application/lsf_manager.rb', line 623

def generate_job_id
  "#{Time.now.to_f}".gsub('.', '')
end

#job_completed?(id) ⇒ Boolean

Returns trus if the given job ID generated a complete file when run on the LSF. The complete file is created at the end of a job run and its presence indicates that the job ran and got past the generation/compile stage without crashing.

Returns:

  • (Boolean)


602
603
604
605
# File 'lib/origen/application/lsf_manager.rb', line 602

def job_completed?(id)
  job_started?(id) &&
    (job_passed?(id) || job_failed?(id))
end

#job_failed(id) ⇒ Object

Register that the given job ID has failed on the LSF



378
379
380
# File 'lib/origen/application/lsf_manager.rb', line 378

def job_failed(id)
  `touch #{failed_file(id)}`
end

#job_failed?(id) ⇒ Boolean

Returns:

  • (Boolean)


619
620
621
# File 'lib/origen/application/lsf_manager.rb', line 619

def job_failed?(id)
  File.exist?(failed_file(id))
end

#job_passed(id) ⇒ Object

Register that the given job ID has completed successfully on the LSF



373
374
375
# File 'lib/origen/application/lsf_manager.rb', line 373

def job_passed(id)
  `touch #{passed_file(id)}`
end

#job_passed?(id) ⇒ Boolean

Returns:

  • (Boolean)


615
616
617
# File 'lib/origen/application/lsf_manager.rb', line 615

def job_passed?(id)
  File.exist?(passed_file(id))
end

#job_running?(id) ⇒ Boolean

Returns:

  • (Boolean)


607
608
609
# File 'lib/origen/application/lsf_manager.rb', line 607

def job_running?(id)
  !job_completed?(id)
end

#job_started(id) ⇒ Object



382
383
384
# File 'lib/origen/application/lsf_manager.rb', line 382

def job_started(id)
  `touch #{started_file(id)}`
end

#job_started?(id) ⇒ Boolean

Returns:

  • (Boolean)


611
612
613
# File 'lib/origen/application/lsf_manager.rb', line 611

def job_started?(id)
  File.exist?(started_file(id))
end

#log_file(id) ⇒ Object

Returns the logfile that should be used by a given process on the LSF, this should be be guaranteed to be unique



347
348
349
# File 'lib/origen/application/lsf_manager.rb', line 347

def log_file(id)
  "#{log_file_directory}/#{log_file_name(id)}"
end

#log_file_directoryObject



368
369
370
# File 'lib/origen/application/lsf_manager.rb', line 368

def log_file_directory
  "#{Origen.root}/.lsf/remote_logs"
end

#log_file_name(id) ⇒ Object



363
364
365
366
# File 'lib/origen/application/lsf_manager.rb', line 363

def log_file_name(id)
  # host = `hostname`.strip
  "#{id}.txt"
end

#lost_jobsObject

Lost jobs are ones that for whatever reason did not start, or at least get far enough to log that they started



595
596
597
# File 'lib/origen/application/lsf_manager.rb', line 595

def lost_jobs
  @lost_jobs ||= []
end

#lsfObject

Picks and returns either the application’s LSF instance or the global LSF instance



21
22
23
24
25
26
27
# File 'lib/origen/application/lsf_manager.rb', line 21

def lsf
  if Origen.running_globally?
    Origen.lsf!
  else
    Origen.app.lsf
  end
end

#on_origen_shutdown(_options = {}) ⇒ Object



637
638
639
# File 'lib/origen/application/lsf_manager.rb', line 637

def on_origen_shutdown(_options = {})
  save_remote_jobs if @remote_jobs
end

#outstanding_jobs?Boolean

Returns:

  • (Boolean)


202
203
204
# File 'lib/origen/application/lsf_manager.rb', line 202

def outstanding_jobs?
  (running_jobs + queuing_jobs).size > 0
end

#passed_file(id) ⇒ Object



351
352
353
# File 'lib/origen/application/lsf_manager.rb', line 351

def passed_file(id)
  "#{log_file_directory}/#{log_file_name(id)}.passed"
end

#passed_jobsObject



584
585
586
# File 'lib/origen/application/lsf_manager.rb', line 584

def passed_jobs
  @passed_jobs ||= []
end


137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
# File 'lib/origen/application/lsf_manager.rb', line 137

def print_details(options = {})
  if options[:id]
    Origen.log.info "Job: #{options[:id]}"
    Origen.log.info '----' + '-' * options[:id].length
    print_details_of(remote_jobs[options[:id]])
  else
    options[:type] ||= :all
    if options[:type] == :all || options[:type] == :queuing
      Origen.log.info ''
      Origen.log.info 'Queuing'
      Origen.log.info '-------'
      queuing_jobs.each { |j| print_details_of(j) }
    end
    if options[:type] == :all || options[:type] == :running
      Origen.log.info ''
      Origen.log.info 'Running'
      Origen.log.info '-------'
      running_jobs.each { |j| print_details_of(j) }
    end
    if options[:type] == :all || options[:type] == :lost
      Origen.log.info ''
      Origen.log.info 'Lost'
      Origen.log.info '----'
      lost_jobs.each { |j| print_details_of(j) }
    end
    if options[:type] == :all || options[:type] == :passed
      Origen.log.info ''
      Origen.log.info 'Passed'
      Origen.log.info '------'
      passed_jobs.each { |j| print_details_of(j) }
    end
    if options[:type] == :all || options[:type] == :failed
      Origen.log.info ''
      Origen.log.info 'Failed'
      Origen.log.info '------'
      failed_jobs.each { |j| print_details_of(j) }
    end
  end
end


177
178
179
180
181
182
# File 'lib/origen/application/lsf_manager.rb', line 177

def print_details_of(job)
  Origen.log.info "#{job[:command]} #{job[:switches]}".gsub(' --exec_remote', '')
  Origen.log.info "ID: #{job[:id]}"
  Origen.log.info "Submitted: #{time_ago(job[:submitted_at])}"
  Origen.log.info ''
end


87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# File 'lib/origen/application/lsf_manager.rb', line 87

def print_status(options = {})
  options = {
    print_insructions: true
  }.merge(options)
  if options[:verbose]
    print_details(options)
  end
  Origen.log.info ''
  Origen.log.info 'LSF Status'
  Origen.log.info '----------'
  Origen.log.info "Queuing:    #{queuing_jobs.size}"
  Origen.log.info "Running:    #{running_jobs.size}"
  Origen.log.info "Lost:       #{lost_jobs.size}"
  Origen.log.info ''
  Origen.log.info "Passed:     #{passed_jobs.size}"
  Origen.log.info "Failed:     #{failed_jobs.size}"
  Origen.log.info ''
  if options[:print_insructions]
    Origen.log.info 'Common tasks'
    Origen.log.info '------------'
    if queuing_jobs.size > 0
      Origen.log.info 'Queuing'
      Origen.log.info ' Show details: origen l -v -t queuing'
      Origen.log.info ' Re-submit:    origen l -r -t queuing'
    end
    if running_jobs.size > 0
      Origen.log.info 'Running'
      Origen.log.info ' Show details: origen l -v -t running'
      Origen.log.info ' Re-submit:    origen l -r -t running'
    end
    if lost_jobs.size > 0
      Origen.log.info 'Lost'
      Origen.log.info ' Show details: origen l -v -t lost'
      Origen.log.info ' Re-submit:    origen l -r -t lost'
    end
    if passed_jobs.size > 0
      Origen.log.info 'Passed'
      Origen.log.info ' Build log:    origen l -l'
    end
    if failed_jobs.size > 0
      Origen.log.info 'Failed'
      Origen.log.info ' Show details: origen l -v -t failed'
      Origen.log.info ' Re-submit:    origen l -r -t failed'
    end
    Origen.log.info ''
    Origen.log.info 'Reset the LSF manager (clear all jobs): origen lsf -c -t all'
    Origen.log.info ''
  end
end

#queuing_jobsObject



576
577
578
# File 'lib/origen/application/lsf_manager.rb', line 576

def queuing_jobs
  @queuing_jobs ||= []
end

#remote_jobsObject



505
506
507
# File 'lib/origen/application/lsf_manager.rb', line 505

def remote_jobs
  @remote_jobs ||= restore_remote_jobs || {}
end

#remote_jobs_fileObject



29
30
31
# File 'lib/origen/application/lsf_manager.rb', line 29

def remote_jobs_file
  "#{Origen.root}/.lsf/remote_jobs"
end

#restore_remote_jobsObject



627
628
629
630
631
632
633
634
635
# File 'lib/origen/application/lsf_manager.rb', line 627

def restore_remote_jobs
  if File.exist?(remote_jobs_file)
    File.open(remote_jobs_file) do |f|
      Marshal.load(f)
    rescue
      nil
    end
  end
end

#resubmit(options) ⇒ Object

Resubmit jobs



234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
# File 'lib/origen/application/lsf_manager.rb', line 234

def resubmit(options)
  if options[:type]
    if options[:type] == :all
      remote_jobs.each do |_id, job|
        resubmit_job(job)
      end
    else
      send("#{options[:type]}_jobs").each do |job|
        resubmit_job(job)
      end
    end
  else
    resubmit_job(remote_jobs[options[:id]])
  end
end

#resubmit_job(job) ⇒ Object



386
387
388
389
390
391
392
393
394
395
# File 'lib/origen/application/lsf_manager.rb', line 386

def resubmit_job(job)
  [log_file(job[:id]), passed_file(job[:id]), failed_file(job[:id]), started_file(job[:id])].each do |file|
    FileUtils.rm_f(file) if File.exist?(file)
  end
  job[:lsf_id] = lsf.submit(command_prefix(job[:id], job[:dependents_ids]) + job[:command] + job[:switches], dependents: job[:dependents_lsf_ids])
  job[:status] = nil
  job[:completed_at] = nil
  job[:submitted_at] = Time.now
  job[:submissions] += 1
end

#running_jobsObject



572
573
574
# File 'lib/origen/application/lsf_manager.rb', line 572

def running_jobs
  @running_jobs ||= []
end

#save_remote_jobsObject



641
642
643
644
645
# File 'lib/origen/application/lsf_manager.rb', line 641

def save_remote_jobs
  File.open(remote_jobs_file, 'w') do |f|
    Marshal.dump(@remote_jobs, f)
  end
end

#started_file(id) ⇒ Object



355
356
357
# File 'lib/origen/application/lsf_manager.rb', line 355

def started_file(id)
  "#{log_file_directory}/#{log_file_name(id)}.started"
end

#statsObject



250
251
252
# File 'lib/origen/application/lsf_manager.rb', line 250

def stats
  Origen.app.stats
end

#submit_job(command, options = {}) ⇒ Object



397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
# File 'lib/origen/application/lsf_manager.rb', line 397

def submit_job(command, options = {})
  options = {
    lsf_option_string: ''
  }.merge(options)
  switches = [' ', options[:lsf_option_string], command_options(command)].flatten.compact.join(' ')
  id = generate_job_id
  dependents_ids = extract_ids([options[:depend], options[:depends], options[:dependent], options[:dependents]].flatten.compact)
  dependents_lsf_ids = dependents_ids.map { |dep_id| remote_jobs[dep_id][:lsf_id] }
  lsf_id = lsf.submit(command_prefix(id, dependents_ids) + command + switches, dependents: dependents_lsf_ids)
  job_attrs = {
    id:                 id,
    lsf_id:             lsf_id,
    command:            command,
    submitted_at:       Time.now,
    submissions:        1,
    switches:           switches,
    dependents_ids:     dependents_ids,
    dependents_lsf_ids: dependents_lsf_ids
  }
  remote_jobs[id] = job_attrs
end

#submit_origen_job(cmd, options = {}) ⇒ Object



423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
# File 'lib/origen/application/lsf_manager.rb', line 423

def submit_origen_job(cmd, options = {})
  if options[:action]
    action = options[:action] == :pattern ? ' generate' : " #{options[:action]}"
  else
    action = ''
  end

  str = "#{action} #{cmd}".strip
  str.sub!('origen ', '') if str =~ /^origen /

  # Append the --exec_remote switch to all Origen commands, this allows command
  # processing to be altered based on whether it is running locally or
  # remotely by testing Origen.running_remotely?
  str += ' --exec_remote'

  submit_job("origen #{str}", options)
end

#time_ago(time) ⇒ Object



184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
# File 'lib/origen/application/lsf_manager.rb', line 184

def time_ago(time)
  seconds = (Time.now - time).to_i
  if seconds < 60
    unit = 'second'
    number = seconds
  elsif seconds < 3600
    unit = 'minute'
    number = seconds / 60
  elsif seconds < 86_400
    unit = 'hour'
    number = seconds / 3600
  else
    unit = 'day'
    number = seconds / 86_400
  end
  "#{number} #{unit}#{number > 1 ? 's' : ''} ago"
end

#wait_for_completion(options = {}) ⇒ Object

Waits for all jobs to complete, will retry lost jobs (optionally failed jobs).

Alternatively supply an :id or an array of :ids to wait only for specific job(s) to complete.



38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# File 'lib/origen/application/lsf_manager.rb', line 38

def wait_for_completion(options = {})
  options = {
    max_lost_retries:         10,
    max_fail_retries:         0,
    poll_duration_in_seconds: 10,
    timeout_in_seconds:       3600
  }.merge(options)
  options[:start_time] ||= Time.now
  if Time.now - options[:start_time] < options[:timeout_in_seconds]
    # When waiting for ids we will hold by monitoring for the result
    # files directly, rather than using the generatic classify routine.
    # This is because the most common use case for this is when jobs
    # are idling remotely on the LSF and don't want to run into contention
    # issues when multiple processes try to classify/save the status.
    if options[:id] || options[:ids]
      ids = extract_ids([options[:id], options[:ids]].flatten.compact)
      if ids.any? { |id| job_running?(id) }
        sleep options[:poll_duration_in_seconds]
        wait_for_completion(options)
      end

    else
      classify_jobs
      print_status(print_insructions: false)
      sleep options[:poll_duration_in_seconds]
      classify_jobs
      resumitted = false
      lost_jobs.each do |job|
        if job[:submissions] < options[:max_lost_retries] + 1
          resubmit_job(job)
          resumitted = true
        end
      end
      failed_jobs.each do |job|
        if job[:submissions] < options[:max_fail_retries] + 1
          resubmit_job(job)
          resumitted = true
        end
      end
      classify_jobs
      if outstanding_jobs? || resumitted
        wait_for_completion(options)
      else
        print_status
      end
    end
  end
end