Module: HPC::PBS
Class Method Summary
collapse
batch_dir_for_id, batch_options, batch_system_variables, cleanup_environment, coda, exec_cmd, execute, follow_job, header, hold_dependencies, job_queued, job_template, load_conda, load_modules, meta_data, prepare_environment, prepare_submision, rbbt_job_exec_cmd, run_job, sync_environment, wait_for_job
accumulate_rules, add_batch_deps, add_config_keys, add_rules_and_consolidate, chain_batches, check_chains, get_chains, get_job_dependencies, get_recursive_job_dependencies, job_batches, job_chains, job_dependencies, job_rules, job_rules, job_workload, merge_rules, orchestrate_job, orchestration_rules, parse_chains, pb, piggyback, prepare_for_execution, task_specific_rules, workflow_rules, workload
Class Method Details
.batch_system ⇒ Object
9
10
11
|
# File 'lib/rbbt/hpc/pbs.rb', line 9
def self.batch_system
"PBS"
end
|
.batch_system_variables ⇒ Object
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
|
# File 'lib/rbbt/hpc/pbs.rb', line 13
def self.batch_system_variables
<<-EOF
let TOTAL_PROCESORS="$(cat /proc/cpuinfo|grep ^processor |wc -l)"
let MAX_MEMORY_DEFAULT="$(grep MemTotal /proc/meminfo|grep -o "[[:digit:]]*") / ( (1024 * $TOTAL_PROCESORS) / $PBS_CPUS_PER_TASK )"
MAX_MEMORY="$MAX_MEMORY_DEFAULT"
[ ! -z $PBS_MEM_PER_CPU ] && let MAX_MEMORY="$PBS_MEM_PER_CPU * $PBS_CPUS_PER_TASK"
[ ! -z $PBS_MEM_PER_NODE ] && MAX_MEMORY="$PBS_MEM_PER_NODE"
export MAX_MEMORY_DEFAULT
export MAX_MEMORY
export BATCH_JOB_ID=$PBS_JOBID
export BATCH_SYSTEM=#{batch_system}
cd ${PBS_O_WORKDIR}
EOF
end
|
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
|
# File 'lib/rbbt/hpc/pbs.rb', line 29
def self.(options = {})
options = options.dup
workdir = Misc.process_options options, :workdir
batch_dir = Misc.process_options options, :batch_dir
batch_name = Misc.process_options options, :batch_name
queue = Misc.process_options options, :queue
account = Misc.process_options options, :account
time = Misc.process_options options, :time
nodes = Misc.process_options options, :nodes
place = Misc.process_options options, :place, :place => 'scatter'
system = Misc.process_options options, :partition
filesystems = Misc.process_options options, :filesystems
filesystems = "home" if filesystems.nil?
filesystems = filesystems * "," if Array === filesystems
partition = Misc.process_options options, :partition
task_cpus = Misc.process_options options, :task_cpus
exclusive = Misc.process_options options, :exclusive
highmem = Misc.process_options options, :highmem
licenses = Misc.process_options options, :licenses
constraint = Misc.process_options options, :constraint
gres = Misc.process_options options, :gres
constraint = [constraint, "highmem"].compact * "&" if highmem
mem = Misc.process_options options, :mem
mem_per_cpu = Misc.process_options options, :mem_per_cpu
fout = File.join(batch_dir, 'std.out')
ferr = File.join(batch_dir, 'std.err')
time = Misc.format_seconds Misc.timespan(time) unless time.include? ":"
qsub_params = { "-l filesystems=" => filesystems,
"-l system=" => system,
"-l select=" => nodes,
"-l place=" => place,
"-l walltime=" => time,
"-q " => queue,
"-A " => account,
"-o " => fout,
"-e " => ferr,
"-k doe" => true,
}
=<<-EOF
#!/bin/bash
EOF
qsub_params.each do |name,value|
next if value.nil? || value == ""
if TrueClass === value
<< "#PBS #{name}" << "\n"
elsif Array === value
value.each do |v|
<< "#PBS #{name}\"#{v}\"" << "\n"
end
else
<< "#PBS #{name}\"#{value}\"" << "\n"
end
end
end
|
.job_status(job = nil) ⇒ Object
165
166
167
168
169
170
171
172
173
174
175
|
# File 'lib/rbbt/hpc/pbs.rb', line 165
def self.job_status(job = nil)
if job.nil?
CMD.cmd("qstat").read
else
begin
CMD.cmd("qstat #{job}").read
rescue
""
end
end
end
|
.run_template(batch_dir, dry_run) ⇒ Object
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
|
# File 'lib/rbbt/hpc/pbs.rb', line 111
def self.run_template(batch_dir, dry_run)
fout = File.join(batch_dir, 'std.out')
ferr = File.join(batch_dir, 'std.err')
fjob = File.join(batch_dir, 'job.id')
fdep = File.join(batch_dir, 'dependencies.list')
fcfdep = File.join(batch_dir, 'canfail_dependencies.list')
fexit = File.join(batch_dir, 'exit.status')
fsync = File.join(batch_dir, 'sync.log')
fcmd = File.join(batch_dir, 'command.batch')
return if Open.exists?(fexit)
Log.info "Issuing PBS file: #{fcmd}"
Log.debug Open.read(fcmd)
if File.exist?(fjob)
job = Open.read(fjob).to_i
else
dependencies = Open.read(fdep).split("\n") if File.exist? fdep
canfail_dependencies = Open.read(fcfdep).split("\n") if File.exist? fcfdep
normal_dep_str = dependencies && dependencies.any? ? "afterok:" + dependencies * ":" : nil
canfail_dep_str = canfail_dependencies && canfail_dependencies.any? ? "afterany:" + canfail_dependencies * ":" : nil
if normal_dep_str.nil? && canfail_dep_str.nil?
dep_str = ""
else
dep_str = '-W depend=' + [normal_dep_str, canfail_dep_str].compact * ","
end
cmd = "qsub #{dep_str} '#{fcmd}'"
if File.exist?(fout)
return
elsif dry_run
STDERR.puts Log.color(:magenta, "To execute run: ") + Log.color(:blue, "squb '#{fcmd}'")
STDERR.puts Log.color(:magenta, "To monitor progress run (needs local rbbt): ") + Log.color(:blue, "rbbt pbs tail '#{batch_dir}'")
raise HPC::BATCH_DRY_RUN, batch_dir
else
Open.rm fsync
Open.rm fexit
Open.rm fout
Open.rm ferr
job = CMD.cmd(cmd).read.scan(/\d+/).first.to_i
Log.debug "SBATCH job id: #{job}"
Open.write(fjob, job.to_s)
job
end
end
end
|