Class: Bio::SGE

Inherits:
Object
  • Object
show all
Defined in:
lib/bio-sge.rb

Constant Summary collapse

@@slice =

Number of files per directory

1000
@@template =

Template string for script generation

<<'END'
#$ -S /usr/local/bin/ruby

work_dir = "%WORK_DIR%"

offset = ENV["SGE_TASK_ID"].to_i
limit  = ENV["SGE_TASK_STEPSIZE"].to_i
last   = ENV["SGE_TASK_LAST"].to_i

slice = slice_old = nil

offset.upto(offset + limit) do |task_id|
  break if task_id > last

  slice_old = slice
  slice = (task_id - 1) / %SLICE% + 1
  output_dir = "%OUTPUT_DIR%/#{slice}"
  error_dir = "%ERROR_DIR%/#{slice}"
  Dir.mkdir(output_dir) if slice_old != slice and ! File.directory?(output_dir)
  Dir.mkdir(error_dir)  if slice_old != slice and ! File.directory?(error_dir)

  input_file  = "%INPUT_DIR%/#{slice}/#{task_id}"
  output_file = "%OUTPUT_DIR%/#{slice}/#{task_id}"
  error_file  = "%ERROR_DIR%/#{slice}/#{task_id}"

  query = input_file
  target = "%TARGET%"

  if File.exists?(query)
    system("%COMMAND% > #{output_file} 2> #{error_file}")
  end
end
END

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(query = nil, target = nil, command = nil, sge_opts = nil) {|_self| ... } ⇒ SGE

Returns a new instance of SGE.

Yields:

  • (_self)

Yield Parameters:

  • _self (Bio::SGE)

    the object that the method was called on



341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
# File 'lib/bio-sge.rb', line 341

def initialize(query = nil, target = nil, command = nil, sge_opts = nil)
  @work_dir = Dir.pwd
  @query = "#{@work_dir}/#{query}"
  @target = "#{@work_dir}/#{target}"
  @command = command
  @sge_opts = sge_opts

  yield(self) if block_given?

  @log_dir = "log"
  @input_dir = "input"
  @output_dir = "output"
  @error_dir = "error"
  @script_file = "script.rb"
  @count_file = "count.txt"
end

Instance Attribute Details

#commandObject

Returns the value of attribute command.



337
338
339
# File 'lib/bio-sge.rb', line 337

def command
  @command
end

#countObject

Returns the value of attribute count.



337
338
339
# File 'lib/bio-sge.rb', line 337

def count
  @count
end

#error_dirObject

Returns the value of attribute error_dir.



339
340
341
# File 'lib/bio-sge.rb', line 339

def error_dir
  @error_dir
end

#input_dirObject

Returns the value of attribute input_dir.



339
340
341
# File 'lib/bio-sge.rb', line 339

def input_dir
  @input_dir
end

#log_dirObject

Returns the value of attribute log_dir.



339
340
341
# File 'lib/bio-sge.rb', line 339

def log_dir
  @log_dir
end

#output_dirObject

Returns the value of attribute output_dir.



339
340
341
# File 'lib/bio-sge.rb', line 339

def output_dir
  @output_dir
end

#queryObject

Returns the value of attribute query.



337
338
339
# File 'lib/bio-sge.rb', line 337

def query
  @query
end

#sge_optsObject

Returns the value of attribute sge_opts.



337
338
339
# File 'lib/bio-sge.rb', line 337

def sge_opts
  @sge_opts
end

#targetObject

Returns the value of attribute target.



337
338
339
# File 'lib/bio-sge.rb', line 337

def target
  @target
end

#task_maxObject

Returns the value of attribute task_max.



338
339
340
# File 'lib/bio-sge.rb', line 338

def task_max
  @task_max
end

#task_minObject

Returns the value of attribute task_min.



338
339
340
# File 'lib/bio-sge.rb', line 338

def task_min
  @task_min
end

#task_stepObject

Returns the value of attribute task_step.



338
339
340
# File 'lib/bio-sge.rb', line 338

def task_step
  @task_step
end

#work_dirObject

Returns the value of attribute work_dir.



339
340
341
# File 'lib/bio-sge.rb', line 339

def work_dir
  @work_dir
end

Instance Method Details

#cleanObject



399
400
401
402
# File 'lib/bio-sge.rb', line 399

def clean
  rmtree(@count_file)
  rmtree(@input_dir)
end

#clearObject



392
393
394
395
396
397
# File 'lib/bio-sge.rb', line 392

def clear
  rmtree(@script_file)
  rmtree(@output_dir)
  rmtree(@error_dir)
  rmtree(@log_dir)
end

#distcleanObject



404
405
406
407
# File 'lib/bio-sge.rb', line 404

def distclean
  clear
  clean
end

#extractObject



441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
# File 'lib/bio-sge.rb', line 441

def extract
  return if File.exists?(@count_file)

  slice = slice_old = nil
  @count = 0
  File.open(@count_file, "a") do |count_file|
    Bio::FlatFile.auto(@query) do |ff|
      ff.each do |entry|
        @count += 1
        $stderr.print "Extracting ... #{@count} (#{entry.entry_id}) "
        if (@task_min and @count < @task_min) or (@task_max and @count > @task_max)
          $stderr.puts "skip."
          next
        else
          slice_old = slice
          slice = (@count - 1) / @@slice + 1
          slice_dir = "#{@input_dir}/#{slice}"
          mkpath(slice_dir) if slice_old != slice
          File.open("#{slice_dir}/#{@count}", "w") do |file|
            file.puts ff.entry_raw
          end
          count_file.puts [@count, entry.entry_id].join("\t")
          $stderr.puts "done."
        end
      end
    end
  end
end

#mkpath(dir) ⇒ Object



409
410
411
412
413
414
415
416
417
# File 'lib/bio-sge.rb', line 409

def mkpath(dir)
  $stderr.print "Creating #{dir} ... "
  if File.directory?(dir)
    $stderr.puts "skip (already exists)."
  else
    FileUtils.mkpath(dir)
    $stderr.puts "done."
  end
end

#prepareObject



358
359
360
361
362
# File 'lib/bio-sge.rb', line 358

def prepare
  setup
  script
  extract
end

#rmtree(file) ⇒ Object



386
387
388
389
390
# File 'lib/bio-sge.rb', line 386

def rmtree(file)
  $stderr.print "Deleting #{file} ... "
  FileUtils.rmtree(file)
  $stderr.puts "done."
end

#scriptObject



426
427
428
429
430
431
432
433
434
435
436
437
438
439
# File 'lib/bio-sge.rb', line 426

def script
  sge_script = @@template.dup
  sge_script.gsub!('%WORK_DIR%', @work_dir)
  sge_script.gsub!('%INPUT_DIR%', @input_dir)
  sge_script.gsub!('%OUTPUT_DIR%', @output_dir)
  sge_script.gsub!('%ERROR_DIR%', @error_dir)
  sge_script.gsub!('%TARGET%', @target)
  sge_script.gsub!('%COMMAND%', @command)
  sge_script.gsub!('%SLICE%', @@slice.to_s)

  File.open(@script_file, "w") do |file|
    file.puts sge_script
  end
end

#setupObject



419
420
421
422
423
424
# File 'lib/bio-sge.rb', line 419

def setup
  mkpath(@log_dir)
  mkpath(@input_dir)
  mkpath(@output_dir)
  mkpath(@error_dir)
end

#submitObject



364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
# File 'lib/bio-sge.rb', line 364

def submit
  unless @count
    $stderr.puts "Reading #{@count_file} ..."
    @count = File.readlines(@count_file).last[/^\d+/].to_i
    $stderr.puts "done."
  end

  task_min = @task_min || 1
  task_max = @task_max || @count
  task_step = @task_step || 1000

  # system upper limit is 75000
  limit = 50000
  task_min.step(task_max, limit) do |offset|
    opts = "#{@sge_opts} -o #{@log_dir} -e #{@log_dir} -cwd"
    span = "-t #{offset}-#{[offset + limit, task_max].min}:#{task_step}"
    qsub = "qsub #{opts} #{span} #{@script_file}"
    $stderr.puts "Submitting ... #{qsub}"
    system(qsub)
  end
end