Class: Bio::SGE
- Inherits:
-
Object
- Object
- Bio::SGE
- Defined in:
- lib/bio-sge.rb
Constant Summary collapse
- @@slice =
Number of files per directory
1000
- @@template =
Template string for script generation
<<'END' #$ -S /usr/local/bin/ruby work_dir = "%WORK_DIR%" offset = ENV["SGE_TASK_ID"].to_i limit = ENV["SGE_TASK_STEPSIZE"].to_i last = ENV["SGE_TASK_LAST"].to_i slice = slice_old = nil offset.upto(offset + limit) do |task_id| break if task_id > last slice_old = slice slice = (task_id - 1) / %SLICE% + 1 output_dir = "%OUTPUT_DIR%/#{slice}" error_dir = "%ERROR_DIR%/#{slice}" Dir.mkdir(output_dir) if slice_old != slice and ! File.directory?(output_dir) Dir.mkdir(error_dir) if slice_old != slice and ! File.directory?(error_dir) input_file = "%INPUT_DIR%/#{slice}/#{task_id}" output_file = "%OUTPUT_DIR%/#{slice}/#{task_id}" error_file = "%ERROR_DIR%/#{slice}/#{task_id}" query = input_file target = "%TARGET%" if File.exists?(query) system("%COMMAND% > #{output_file} 2> #{error_file}") end end END
Instance Attribute Summary collapse
-
#command ⇒ Object
Returns the value of attribute command.
-
#count ⇒ Object
Returns the value of attribute count.
-
#error_dir ⇒ Object
Returns the value of attribute error_dir.
-
#input_dir ⇒ Object
Returns the value of attribute input_dir.
-
#log_dir ⇒ Object
Returns the value of attribute log_dir.
-
#output_dir ⇒ Object
Returns the value of attribute output_dir.
-
#query ⇒ Object
Returns the value of attribute query.
-
#sge_opts ⇒ Object
Returns the value of attribute sge_opts.
-
#target ⇒ Object
Returns the value of attribute target.
-
#task_max ⇒ Object
Returns the value of attribute task_max.
-
#task_min ⇒ Object
Returns the value of attribute task_min.
-
#task_step ⇒ Object
Returns the value of attribute task_step.
-
#work_dir ⇒ Object
Returns the value of attribute work_dir.
Instance Method Summary collapse
- #clean ⇒ Object
- #clear ⇒ Object
- #distclean ⇒ Object
- #extract ⇒ Object
-
#initialize(query = nil, target = nil, command = nil, sge_opts = nil) {|_self| ... } ⇒ SGE
constructor
A new instance of SGE.
- #mkpath(dir) ⇒ Object
- #prepare ⇒ Object
- #rmtree(file) ⇒ Object
- #script ⇒ Object
- #setup ⇒ Object
- #submit ⇒ Object
Constructor Details
#initialize(query = nil, target = nil, command = nil, sge_opts = nil) {|_self| ... } ⇒ SGE
Returns a new instance of SGE.
341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 |
# File 'lib/bio-sge.rb', line 341 def initialize(query = nil, target = nil, command = nil, sge_opts = nil) @work_dir = Dir.pwd @query = "#{@work_dir}/#{query}" @target = "#{@work_dir}/#{target}" @command = command @sge_opts = sge_opts yield(self) if block_given? @log_dir = "log" @input_dir = "input" @output_dir = "output" @error_dir = "error" @script_file = "script.rb" @count_file = "count.txt" end |
Instance Attribute Details
#command ⇒ Object
Returns the value of attribute command.
337 338 339 |
# File 'lib/bio-sge.rb', line 337 def command @command end |
#count ⇒ Object
Returns the value of attribute count.
337 338 339 |
# File 'lib/bio-sge.rb', line 337 def count @count end |
#error_dir ⇒ Object
Returns the value of attribute error_dir.
339 340 341 |
# File 'lib/bio-sge.rb', line 339 def error_dir @error_dir end |
#input_dir ⇒ Object
Returns the value of attribute input_dir.
339 340 341 |
# File 'lib/bio-sge.rb', line 339 def input_dir @input_dir end |
#log_dir ⇒ Object
Returns the value of attribute log_dir.
339 340 341 |
# File 'lib/bio-sge.rb', line 339 def log_dir @log_dir end |
#output_dir ⇒ Object
Returns the value of attribute output_dir.
339 340 341 |
# File 'lib/bio-sge.rb', line 339 def output_dir @output_dir end |
#query ⇒ Object
Returns the value of attribute query.
337 338 339 |
# File 'lib/bio-sge.rb', line 337 def query @query end |
#sge_opts ⇒ Object
Returns the value of attribute sge_opts.
337 338 339 |
# File 'lib/bio-sge.rb', line 337 def sge_opts @sge_opts end |
#target ⇒ Object
Returns the value of attribute target.
337 338 339 |
# File 'lib/bio-sge.rb', line 337 def target @target end |
#task_max ⇒ Object
Returns the value of attribute task_max.
338 339 340 |
# File 'lib/bio-sge.rb', line 338 def task_max @task_max end |
#task_min ⇒ Object
Returns the value of attribute task_min.
338 339 340 |
# File 'lib/bio-sge.rb', line 338 def task_min @task_min end |
#task_step ⇒ Object
Returns the value of attribute task_step.
338 339 340 |
# File 'lib/bio-sge.rb', line 338 def task_step @task_step end |
#work_dir ⇒ Object
Returns the value of attribute work_dir.
339 340 341 |
# File 'lib/bio-sge.rb', line 339 def work_dir @work_dir end |
Instance Method Details
#clean ⇒ Object
399 400 401 402 |
# File 'lib/bio-sge.rb', line 399 def clean rmtree(@count_file) rmtree(@input_dir) end |
#clear ⇒ Object
392 393 394 395 396 397 |
# File 'lib/bio-sge.rb', line 392 def clear rmtree(@script_file) rmtree(@output_dir) rmtree(@error_dir) rmtree(@log_dir) end |
#distclean ⇒ Object
404 405 406 407 |
# File 'lib/bio-sge.rb', line 404 def distclean clear clean end |
#extract ⇒ Object
441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 |
# File 'lib/bio-sge.rb', line 441 def extract return if File.exists?(@count_file) slice = slice_old = nil @count = 0 File.open(@count_file, "a") do |count_file| Bio::FlatFile.auto(@query) do |ff| ff.each do |entry| @count += 1 $stderr.print "Extracting ... #{@count} (#{entry.entry_id}) " if (@task_min and @count < @task_min) or (@task_max and @count > @task_max) $stderr.puts "skip." next else slice_old = slice slice = (@count - 1) / @@slice + 1 slice_dir = "#{@input_dir}/#{slice}" mkpath(slice_dir) if slice_old != slice File.open("#{slice_dir}/#{@count}", "w") do |file| file.puts ff.entry_raw end count_file.puts [@count, entry.entry_id].join("\t") $stderr.puts "done." end end end end end |
#mkpath(dir) ⇒ Object
409 410 411 412 413 414 415 416 417 |
# File 'lib/bio-sge.rb', line 409 def mkpath(dir) $stderr.print "Creating #{dir} ... " if File.directory?(dir) $stderr.puts "skip (already exists)." else FileUtils.mkpath(dir) $stderr.puts "done." end end |
#prepare ⇒ Object
358 359 360 361 362 |
# File 'lib/bio-sge.rb', line 358 def prepare setup script extract end |
#rmtree(file) ⇒ Object
386 387 388 389 390 |
# File 'lib/bio-sge.rb', line 386 def rmtree(file) $stderr.print "Deleting #{file} ... " FileUtils.rmtree(file) $stderr.puts "done." end |
#script ⇒ Object
426 427 428 429 430 431 432 433 434 435 436 437 438 439 |
# File 'lib/bio-sge.rb', line 426 def script sge_script = @@template.dup sge_script.gsub!('%WORK_DIR%', @work_dir) sge_script.gsub!('%INPUT_DIR%', @input_dir) sge_script.gsub!('%OUTPUT_DIR%', @output_dir) sge_script.gsub!('%ERROR_DIR%', @error_dir) sge_script.gsub!('%TARGET%', @target) sge_script.gsub!('%COMMAND%', @command) sge_script.gsub!('%SLICE%', @@slice.to_s) File.open(@script_file, "w") do |file| file.puts sge_script end end |
#setup ⇒ Object
419 420 421 422 423 424 |
# File 'lib/bio-sge.rb', line 419 def setup mkpath(@log_dir) mkpath(@input_dir) mkpath(@output_dir) mkpath(@error_dir) end |
#submit ⇒ Object
364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 |
# File 'lib/bio-sge.rb', line 364 def submit unless @count $stderr.puts "Reading #{@count_file} ..." @count = File.readlines(@count_file).last[/^\d+/].to_i $stderr.puts "done." end task_min = @task_min || 1 task_max = @task_max || @count task_step = @task_step || 1000 # system upper limit is 75000 limit = 50000 task_min.step(task_max, limit) do |offset| opts = "#{@sge_opts} -o #{@log_dir} -e #{@log_dir} -cwd" span = "-t #{offset}-#{[offset + limit, task_max].min}:#{task_step}" qsub = "qsub #{opts} #{span} #{@script_file}" $stderr.puts "Submitting ... #{qsub}" system(qsub) end end |