Class: Step

Inherits:
Object
  • Object
show all
Defined in:
lib/rbbt/workflow/step.rb,
lib/rbbt/util/python/step.rb,
lib/rbbt/workflow/step/run.rb,
lib/rbbt/workflow/step/info.rb,
lib/rbbt/workflow/step/status.rb,
lib/rbbt/workflow/step/produce.rb,
lib/rbbt/workflow/util/archive.rb,
lib/rbbt/workflow/step/accessor.rb,
lib/rbbt/workflow/util/provenance.rb,
lib/rbbt/workflow/step/dependencies.rb,
lib/rbbt/workflow/step/save_load_inputs.rb

Direct Known Subclasses

RemoteStep

Constant Summary collapse

RBBT_DEBUG_CLEAN =
ENV["RBBT_DEBUG_CLEAN"] == 'true'
MAIN_RSYNC_ARGS =
"-avztAXHP --copy-links"
INFO_SERIALIZER =
begin
  if ENV["RBBT_INFO_SERIALIZER"]
    Kernel.const_get ENV["RBBT_INFO_SERIALIZER"]
  else
    Marshal
  end
end
STREAM_CACHE =
{}
STREAM_CACHE_MUTEX =
Mutex.new

Class Attribute Summary collapse

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(path, task = nil, inputs = nil, dependencies = nil, bindings = nil, clean_name = nil, &block) ⇒ Step

Returns a new instance of Step.



33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# File 'lib/rbbt/workflow/step.rb', line 33

def initialize(path, task = nil, inputs = nil, dependencies = nil, bindings = nil, clean_name = nil, &block)
  path = Path.setup(Misc.sanitize_filename(path)) if String === path
  path = path.call if Proc === path

  task = block if block_given?

  @path = path
  @task = task
  @bindings = bindings
  @dependencies = case
                  when dependencies.nil? 
                    []
                  when Array === dependencies
                    dependencies
                  else
                    [dependencies]
                  end
  @mutex = Mutex.new
  @info_mutex = Mutex.new
  @inputs = inputs 
  NamedArray.setup @inputs, task.inputs.collect{|s| s.to_s} if task and task.respond_to? :inputs and task.inputs
end

Class Attribute Details

.lock_dirObject

Returns the value of attribute lock_dir.



15
16
17
# File 'lib/rbbt/workflow/step.rb', line 15

def lock_dir
  @lock_dir
end

.log_relay_stepObject

Returns the value of attribute log_relay_step.



143
144
145
# File 'lib/rbbt/workflow/step.rb', line 143

def log_relay_step
  @log_relay_step
end

Instance Attribute Details

#bindingsObject

Returns the value of attribute bindings.



2
3
4
# File 'lib/rbbt/workflow/step/accessor.rb', line 2

def bindings
  @bindings
end

#clean_nameObject

Returns the value of attribute clean_name.



26
27
28
29
30
# File 'lib/rbbt/workflow/step.rb', line 26

def clean_name
  @clean_name ||= begin
                    info[:clean_name] || path.sub(/_[a-z0-9]{32}/, '')
                  end
end

#dependenciesObject

Returns the value of attribute dependencies.



2
3
4
# File 'lib/rbbt/workflow/step/accessor.rb', line 2

def dependencies
  @dependencies
end

#duppedObject (readonly)

Returns the value of attribute dupped.



9
10
11
# File 'lib/rbbt/workflow/step/run.rb', line 9

def dupped
  @dupped
end

#exec(no_load = false) ⇒ Object

Returns the value of attribute exec.



111
112
113
114
115
116
117
118
# File 'lib/rbbt/workflow/step/run.rb', line 111

def exec(no_load=false)
  dependencies.each{|dependency| dependency.exec(no_load) }
  @mutex.synchronize do
    @result = self._exec
    @result = @result.stream if TSV::Dumper === @result
  end
  (no_load || ENV["RBBT_NO_STREAM"]) ? @result : prepare_result(@result, @task.result_description)
end

#inputsObject

Returns the value of attribute inputs.



67
68
69
70
71
72
73
74
75
76
# File 'lib/rbbt/workflow/step.rb', line 67

def inputs
  return @inputs if NamedArray === @inputs

  load_inputs_from_info if @inputs.nil? 

  task_inputs = task.respond_to?(:inputs) ? task.inputs : nil
  NamedArray.setup(@inputs, task_inputs) unless NamedArray === @inputs

  @inputs || []
end

#mutexObject

Returns the value of attribute mutex.



7
8
9
# File 'lib/rbbt/workflow/step/accessor.rb', line 7

def mutex
  @mutex
end

#original_task_nameObject

Returns the value of attribute original_task_name.



8
9
10
# File 'lib/rbbt/workflow/step/accessor.rb', line 8

def original_task_name
  @original_task_name
end

#original_workflowObject

Returns the value of attribute original_workflow.



8
9
10
# File 'lib/rbbt/workflow/step/accessor.rb', line 8

def original_workflow
  @original_workflow
end

#overridenObject

Returns the value of attribute overriden.



3
4
5
# File 'lib/rbbt/workflow/step/accessor.rb', line 3

def overriden
  @overriden
end

#pathObject

Returns the value of attribute path.



134
135
136
137
138
139
140
# File 'lib/rbbt/workflow/step.rb', line 134

def path
  if Proc === @path
    @path = Path.setup(Misc.sanitize_filename(@path.call))
  else
    @path
  end
end

#pidObject

Returns the value of attribute pid.



4
5
6
# File 'lib/rbbt/workflow/step/accessor.rb', line 4

def pid
  @pid
end

#real_inputsObject

Returns the value of attribute real_inputs.



8
9
10
# File 'lib/rbbt/workflow/step/accessor.rb', line 8

def real_inputs
  @real_inputs
end

#relocatedObject

Returns the value of attribute relocated.



6
7
8
# File 'lib/rbbt/workflow/step/accessor.rb', line 6

def relocated
  @relocated
end

#resultObject

Returns the value of attribute result.



7
8
9
# File 'lib/rbbt/workflow/step/accessor.rb', line 7

def result
  @result
end

#saved_streamObject (readonly)

Returns the value of attribute saved_stream.



9
10
11
# File 'lib/rbbt/workflow/step/run.rb', line 9

def saved_stream
  @saved_stream
end

#seenObject

Returns the value of attribute seen.



7
8
9
# File 'lib/rbbt/workflow/step/accessor.rb', line 7

def seen
  @seen
end

#streamObject (readonly)

Returns the value of attribute stream.



9
10
11
# File 'lib/rbbt/workflow/step/run.rb', line 9

def stream
  @stream
end

#taskObject

Returns the value of attribute task.



2
3
4
# File 'lib/rbbt/workflow/step/accessor.rb', line 2

def task
  @task
end

#task_nameObject

Returns the value of attribute task_name.



3
4
5
# File 'lib/rbbt/workflow/step/accessor.rb', line 3

def task_name
  @task_name
end

#workflowObject

Returns the value of attribute workflow.



56
57
58
59
60
61
62
63
# File 'lib/rbbt/workflow/step.rb', line 56

def workflow
  @workflow ||= begin
                  wf = info[:workflow] 
                  wf = nil if wf == ""
                  wf ||= @task.workflow if @task && @task.respond_to?(:workflow)
                  wf ||= path.split("/")[-3]
                end
end

Class Method Details

.archive(files, target = nil, recursive = true) ⇒ Object



109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# File 'lib/rbbt/workflow/util/archive.rb', line 109

def self.archive(files, target = nil, recursive = true)
  target = self.path + '.tar.gz' if target.nil?
  target = File.expand_path(target) if String === target

  job_files = job_files_for_archive files, recursive
  TmpFile.with_file do |tmpdir|
    job_files.each do |file|
      Step.link_job file, tmpdir
    end

    Misc.in_dir(tmpdir) do
      if File.directory?(target)
        CMD.cmd_log("rsync #{MAIN_RSYNC_ARGS} --copy-unsafe-links '#{ tmpdir }/' '#{ target }/'")
      else
        CMD.cmd_log("tar cvhzf '#{target}'  ./*")
      end
    end
    Log.debug "Archive finished at: #{target}"
  end
end

.clean(path) ⇒ Object



2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# File 'lib/rbbt/workflow/step/status.rb', line 2

def self.clean(path)
  info_file = Step.info_file path
  pid_file = Step.pid_file path
  md5_file = Step.md5_file path
  files_dir = Step.files_dir path
  tmp_path = Step.tmp_path path

  if ! (Open.writable?(path) && Open.writable?(info_file))
    Log.warn "Could not clean #{path}: not writable"
    return 
  end

  if ENV["RBBT_DEBUG_CLEAN"] == 'true'
    raise "DO NOT CLEAN" 
  end

  if (Open.exists?(path) or Open.broken_link?(path)) or Open.exists?(pid_file) or Open.exists?(info_file) or Open.exists?(files_dir) or Open.broken_link?(files_dir) or Open.exists?(pid_file)

    @result = nil
    @pid = nil

    Misc.insist do
      Open.rm info_file if Open.exists?(info_file)
      Open.rm md5_file if Open.exists?(md5_file)
      Open.rm path if (Open.exists?(path) || Open.broken_link?(path))
      Open.rm_rf files_dir if Open.exists?(files_dir) || Open.broken_link?(files_dir)
      Open.rm pid_file if Open.exists?(pid_file)
      Open.rm tmp_path if Open.exists?(tmp_path)
    end
  end
end

.dup_stream(stream) ⇒ Object



12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/rbbt/workflow/step/dependencies.rb', line 12

def self.dup_stream(stream)
  case stream
  when IO, File, Step
    return stream if stream.respond_to?(:closed?) and stream.closed?
    return stream if stream.respond_to?(:done?) and stream.done?

    STREAM_CACHE_MUTEX.synchronize do
      stream_key = Misc.fingerprint(stream)
      current = STREAM_CACHE[stream_key]
      case current
      when nil, Step
        Log.medium "Not duplicating stream #{stream_key}"
        STREAM_CACHE[stream_key] = stream
      when File
        if Open.exists?(current.path)
          Log.medium "Reopening file #{stream_key}"
          Open.open(current.path)
        else
          new = Misc.dup_stream(current)
          Log.medium "Duplicating file #{stream_key} #{current.inspect} => #{Misc.fingerprint(new)}"
          new
        end
      else
        new = Misc.dup_stream(current)
        Log.medium "Duplicating stream #{stream_key} #{ Misc.fingerprint(stream) } => #{Misc.fingerprint(new)}"
        new
      end
    end
  when TSV::Dumper, TSV::Parser
    orig_stream = stream
    stream = stream.stream
    return stream if stream.closed?

    STREAM_CACHE_MUTEX.synchronize do
      if STREAM_CACHE[stream].nil?
        Log.high "Not duplicating #{Misc.fingerprint orig_stream} #{ stream.inspect }"
        STREAM_CACHE[stream] = stream
      else
        new = Misc.dup_stream(STREAM_CACHE[stream])
        Log.high "Duplicating #{Misc.fingerprint orig_stream} #{ stream.inspect } into #{new.inspect}"
        new
      end
    end
  else
    stream
  end
end

.files_dir(path) ⇒ Object



65
66
67
# File 'lib/rbbt/workflow/step/accessor.rb', line 65

def self.files_dir(path)
  path.nil? ? nil : Path.setup(path + '.files')
end

.info_file(path) ⇒ Object



69
70
71
72
73
74
75
76
77
78
79
80
# File 'lib/rbbt/workflow/step/accessor.rb', line 69

def self.info_file(path)
  return nil if path.nil?
  info_file = path + '.info'
  return info_file if Open.exist?(info_file)

  if path.end_with?('.gz')
    info_file_nogz = path.sub(/\.gz$/,'') + '.info'
    return info_file_nogz if Open.exists?(info_file_nogz)
  end

  info_file
end

.job_files_for_archive(files, recursive = false, skip_overriden = false) ⇒ Object



55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# File 'lib/rbbt/workflow/util/archive.rb', line 55

def self.job_files_for_archive(files, recursive = false, skip_overriden = false)
  job_files = Set.new

  jobs = files.collect do |file|  
    if Step === file
      file
    else
      file = file.sub(/\.info$/,'')
      Step.new(File.expand_path(file))
    end
  end.uniq

  jobs.each do |step|
    next unless File.exist?(step.path)
    next if skip_overriden && step.overriden

    job_files << step.path
    job_files << step.info_file if File.exist?(step.info_file)
    job_files << Step.md5_file(step.path) if File.exist?(Step.md5_file step.path)
    job_file_dir_content = Dir.glob(step.files_dir + '/**/*')
    job_files += job_file_dir_content
    job_files << step.files_dir if File.exist?(step.files_dir)
    rec_dependencies = Set.new

    next unless recursive

    deps = [step.path]
    seen = Set.new
    while deps.any?
      path = deps.shift

      dep = Workflow.load_step path
      seen << dep.path

      dep.load_dependencies_from_info

      dep.dependencies.each do |dep|
        next if seen.include? dep.path
        deps << dep.path
        rec_dependencies << dep.path
      end if dep.info[:dependencies]
    end

    rec_dependencies.each do |path|
      dep = Workflow.load_step path
      job_files << dep.path
      job_files << dep.files_dir if Dir.glob(dep.files_dir + '/*').any?
      job_files << dep.info_file if File.exist?(dep.info_file)
    end
  end

  job_files.to_a
end

.job_name_for_info_file(info_file, extension = nil) ⇒ Object



109
110
111
112
113
114
115
# File 'lib/rbbt/workflow/step/accessor.rb', line 109

def self.job_name_for_info_file(info_file, extension = nil)
  if extension and not extension.empty?
    info_file.sub(/\.#{extension}\.info$/,'')
  else
    info_file.sub(/\.info$/,'')
  end
end


6
7
8
9
10
11
12
13
14
15
16
17
18
19
# File 'lib/rbbt/workflow/util/archive.rb', line 6

def self.link_job(path, target_dir, task = nil, workflow = nil)
  Path.setup(target_dir)

  name = File.basename(path)
  task = File.basename(File.dirname(path)) if task.nil?
  workflow = File.basename(File.dirname(File.dirname(path))) if workflow.nil?

  return if target_dir[workflow][task][name].exists? || File.symlink?(target_dir[workflow][task][name].find)
  Log.debug "Linking #{ path }"
  FileUtils.mkdir_p target_dir[workflow][task] unless target_dir[workflow][task].exists?
  FileUtils.ln_s path, target_dir[workflow][task][name].find if File.exist?(path)
  FileUtils.ln_s path + '.files', target_dir[workflow][task][name].find + '.files' if File.exist?(path + '.files')
  FileUtils.ln_s path + '.info', target_dir[workflow][task][name].find + '.info' if File.exist?(path + '.info')
end

.load_serialized_info(io) ⇒ Object



35
36
37
# File 'lib/rbbt/workflow/step/accessor.rb', line 35

def self.load_serialized_info(io)
  IndiferentHash.setup(INFO_SERIALIZER.load(io))
end

.log(status, message, path, &block) ⇒ Object



243
244
245
246
247
248
249
250
251
252
253
# File 'lib/rbbt/workflow/step/info.rb', line 243

def self.log(status, message, path, &block)
  if block
    if Hash === message
      log_progress(status, message, path, &block)
    else
      log_block(status, message, path, &block)
    end
  else
    log_string(status, message, path)
  end
end

.log_block(status, message, path, &block) ⇒ Object



162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
# File 'lib/rbbt/workflow/step/info.rb', line 162

def self.log_block(status, message, path, &block)
  start = Time.now
  status = status.to_s
  status_color = self.status_color status

  Log.info do 
    now = Time.now
    str = Log.color :reset
    str << "#{ Log.color status_color, status}"
    str << ": #{ message }" if message and message != :result
    str << " -- #{Log.color :blue, path.to_s}" if path
    str << " #{Log.color :yellow, Process.pid}"
    str
  end
  res = yield
  eend = Time.now
  Log.info do 
    now = Time.now
    str = "#{ Log.color :cyan, status.to_s } +#{Log.color :green, "%.2f" % (eend - start)}"
    str << ": #{ res }" if message == :result
    str << " -- #{Log.color :blue, path.to_s}" if path
    str << " #{Log.color :yellow, Process.pid}"
    str
  end
  res
end

.log_progress(status, options = {}, path = nil, &block) ⇒ Object



204
205
206
207
208
209
210
211
212
213
214
215
216
217
# File 'lib/rbbt/workflow/step/info.rb', line 204

def self.log_progress(status, options = {}, path = nil, &block)
  options = Misc.add_defaults options, :severity => Log::INFO, :file => (@exec ? nil : path)
  max = Misc.process_options options, :max
  Log::ProgressBar.with_bar(max, options) do |bar|
    begin
      res = yield bar
      raise KeepBar.new res if IO === res
      res
    rescue
      Log.exception $!
      raise $!
    end
  end
end

.log_string(status, message, path) ⇒ Object



189
190
191
192
193
194
195
196
197
198
199
200
201
202
# File 'lib/rbbt/workflow/step/info.rb', line 189

def self.log_string(status, message, path)
  Log.info do 

    status = status.to_s
    status_color = self.status_color status

    str = Log.color :reset
    str << "#{ Log.color status_color, status}"
    str << ": #{ message }" if message
    str << " -- #{Log.color :blue, path.to_s}" if path
    str << " #{Log.color :yellow, Process.pid}"
    str
  end
end

.md5_file(path) ⇒ Object



90
91
92
# File 'lib/rbbt/workflow/step/accessor.rb', line 90

def self.md5_file(path)
  path.nil? ? nil : path + '.md5'
end

.migrate(paths, search_path, options = {}) ⇒ Object



163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
# File 'lib/rbbt/workflow/util/archive.rb', line 163

def self.migrate(paths, search_path, options = {})
  subpath_files = {}
  target_paths = []
  resource = options[:resource] || Rbbt

  target = Rbbt.migrate_target_path('var/jobs', search_path, resource, options[:target])

  (Array === paths ? paths : [paths]).each do |path|
    if Step === path
      if options[:source]
        path = Rbbt.identify(path.path)
      else
        path = path.path
      end
    end
    search_path = 'user' if search_path.nil?


    path, real_paths, lpath = self.migrate_source_paths(path, resource, options[:source], options[:recursive])

    real_paths.sort.each do |path|
      parts = path.split("/")
      subpath = parts[0..-4] * "/" + "/"

      if subpath_files.keys.any? && subpath.start_with?(subpath_files.keys.last)
        subpath = subpath_files.keys.last
      end

      source = path.chars[subpath.length..-1] * ""

      subpath_files[subpath] ||= []
      subpath_files[subpath] << source
    end


    target_paths << File.join(target, *path.split("/")[-3..-1])
  end


  subpath_files.each do |subpath, files|
    Rbbt.migrate_files([subpath], target, options.merge(:files => files))
  end

  target_paths
end

.migrate_source_paths(path, resource = Rbbt, source = nil, recursive = true) ⇒ Object



130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
# File 'lib/rbbt/workflow/util/archive.rb', line 130

def self.migrate_source_paths(path, resource = Rbbt, source = nil, recursive = true)
  recursive = false if recursive.nil?
  if source
    lpath, *paths = Misc.ssh_run(source, <<-EOF).split("\n")
require 'rbbt-util'
require 'rbbt/workflow'

recursive = #{ recursive.to_s }
path = "#{path}"

if Open.exists?(path)
path = #{resource.to_s}.identify(path)
else
path = Path.setup(path)
end

files = path.glob_all.collect{|p| File.directory?(p) ? p + "/" : p }
files = Step.job_files_for_archive(files, recursive)

puts path
puts files * "\n"
    EOF

    [path, paths.collect{|p| [source, p] * ":"}, lpath]
  else
    path = Path.setup(path.dup)
    files = path.glob_all
    files = Step.job_files_for_archive(files, recursive)

    [path, files, path]
  end
end

.pid_file(path) ⇒ Object



94
95
96
# File 'lib/rbbt/workflow/step/accessor.rb', line 94

def self.pid_file(path)
  path.nil? ? nil : path + '.pid'
end

.prepare_for_execution(job) ⇒ Object



72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# File 'lib/rbbt/workflow/step/dependencies.rb', line 72

def self.prepare_for_execution(job)
  return if job.done? && ! job.dirty?

  status = job.status.to_s

  if defined?(WorkflowRemoteClient) && WorkflowRemoteClient::RemoteStep === job 
    return unless (status == 'done' or status == 'error' or status == 'aborted')
  else
    return if status == 'streaming' and job.running?
  end

  canfail = nil
  job.status_lock.synchronize do
    status = job.status.to_s

    if (status == 'error' && (job.recoverable_error? || job.dirty?)) ||
        (job.noinfo? && Open.exists?(job.pid_file)) ||
        job.aborted? ||
        (job.done? && ! job.updated?)  || (job.error? && ! job.updated?) ||
        (job.done? && job.dirty?)  || (job.error? && job.dirty?) ||
        (!(job.noinfo? || job.done? || job.error? || job.aborted? || job.running?))

      if ! (job.resumable? && (job.updated? && ! job.dirty?))
        Log.high "About to clean -- status: #{status}, present #{File.exist?(job.path)}, " +
          %w(done? error? recoverable_error? noinfo? updated? dirty? aborted? running? resumable?).
          collect{|v| [v, job.send(v)]*": "} * ", " if RBBT_DEBUG_CLEAN

        job.clean
      end
      job.set_info :status, :cleaned
    end

    job.dup_inputs unless status == 'done' or job.started?
    job.init_info(status == 'noinfo') unless status == 'waiting' || status == 'done' || job.started? || ! Workflow.job_path?(job.path)

    canfail = ComputeDependency === job && job.canfail?
  end

  Step.raise_dependency_error(job) if job.error? and not canfail
end

.produce_dependencies(jobs, tasks, cpus) ⇒ Object



2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# File 'lib/rbbt/workflow/step/produce.rb', line 2

def self.produce_dependencies(jobs, tasks, cpus)
  deps = []

  jobs = [jobs] unless Array === jobs
  tasks = [tasks] unless Array === tasks
  tasks = tasks.collect{|t| t.to_s}

  jobs.each do |job|
    job.rec_dependencies.each do |dep|
      next if dep.done?
      dep.clean if dep.error? && dep.recoverable_error?
      deps << dep if tasks.include?(dep.task_name.to_s) or tasks.include?([dep.workflow.to_s, dep.task_name] * "#")
    end
  end

  cpus = jobs.length if cpus.to_s == "max"
  cpus = cpus.to_i if String === cpus
  TSV.traverse deps.collect{|dep| dep.path}, :type => :array, :cpus => cpus, :bar => "Prepare dependencies #{Misc.fingerprint tasks} for #{Misc.fingerprint jobs}" do |path|
    dep = deps.select{|dep| dep.path == path}.first
    dep.produce
    nil
  end
end

.prov_report(step, offset = 0, task = nil, seen = [], expand_repeats = false, input = nil) ⇒ Object



86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# File 'lib/rbbt/workflow/util/provenance.rb', line 86

def self.prov_report(step, offset = 0, task = nil, seen = [], expand_repeats = false, input = nil)
  info = step.info  || {}
  info[:task_name] = task
  path  = step.path
  status = info[:status] || :missing
  status = "remote" if Open.remote?(path) || Open.ssh?(path)
  name = info[:name] || File.basename(path)
  status = :unsync if status == :done and not Open.exist?(path)
  status = :notfound if status == :noinfo and not Open.exist?(path)


  this_step_msg = prov_report_msg(status, name, path, info, input)

  input_dependencies = {}
  step.dependencies.each do |dep|
    if dep.input_dependencies.any?
      dep.input_dependencies.each do |id|
        input_name, _dep = dep.recursive_inputs.fields.zip(dep.recursive_inputs).select{|f,d| 
          d == id || (String === d && d.start_with?(id.files_dir)) || (Array === d && d.include?(id))
        }.last
        if input_name
          input_dependencies[id] ||= []
          input_dependencies[id] << [dep, input_name]
        end
      end
    end
  end if step.dependencies

  str = ""
  str = " " * offset + this_step_msg if ENV["RBBT_ORIGINAL_STACK"] == 'true'

  step.dependencies.dup.tap{|l| 
    l.reverse! if ENV["RBBT_ORIGINAL_STACK"] == 'true'
  }.each do |dep|
    path = dep.path
    new = ! seen.include?(path)
    if new
      seen << path
      str << prov_report(dep, offset + 1, task, seen, expand_repeats, input_dependencies[dep])
    else
      if expand_repeats
        str << Log.color(Step.status_color(dep.status), Log.uncolor(prov_report(dep, offset+1, task)))
      else
        info = dep.info  || {}
        status = info[:status] || :missing
        status = "remote" if Open.remote?(path) || Open.ssh?(path)
        name = info[:name] || File.basename(path)
        status = :unsync if status == :done and not Open.exist?(path)
        status = :notfound if status == :noinfo and not Open.exist?(path)

        str << Log.color(Step.status_color(status), " " * (offset + 1) + Log.uncolor(prov_report_msg(status, name, path, info, input_dependencies[dep])))
      end
    end
  end if step.dependencies

  str += (" " * offset) + this_step_msg unless ENV["RBBT_ORIGINAL_STACK"] == 'true'

  str
end

.prov_report_msg(status, name, path, info, input = nil) ⇒ Object



29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# File 'lib/rbbt/workflow/util/provenance.rb', line 29

def self.prov_report_msg(status, name, path, info, input = nil)
  parts = path.sub(/\{.*/,'').split "/"

  parts.pop

  task = Log.color(:yellow, parts.pop)
  workflow = Log.color(:magenta, parts.pop)
  #if status.to_s == 'noinfo' && parts.last != 'jobs'
  if ! Workflow.job_path?(path)
    task, status, workflow = Log.color(:yellow, info[:task_name]), Log.color(:green, "file"), Log.color(:magenta, "-")
  end

  path_mtime = begin
                 Open.mtime(path)
               rescue Exception
                 nil
               end

  if input.nil? || input.empty?
    input_str = nil
  else
    input = input.reject{|dep,name| (input & dep.dependencies.collect{|d| [d,name]}).any? }
    input = input.reject{|dep,name| (input & dep.input_dependencies.collect{|d| [d,name]}).any? }
    input_str = Log.color(:magenta, "-> ") + input.collect{|dep,name| Log.color(:yellow, dep.task_name.to_s) + ":" + Log.color(:yellow, name) }.uniq * " "
  end

  str = if ! (Open.remote?(path) || Open.ssh?(path)) && (Open.exists?(path) && $main_mtime && path_mtime && ($main_mtime - path_mtime) < -2)
          prov_status_msg(status.to_s) << " " << [workflow, task, path, input_str].compact * " " << " (#{Log.color(:red, "Mtime out of sync") })"
        else
          prov_status_msg(status.to_s) << " " << [workflow, task, path, input_str].compact * " " 
        end

  if $inputs and $inputs.any? 
    job_inputs = Workflow.load_step(path).recursive_inputs.to_hash
    IndiferentHash.setup(job_inputs)

    $inputs.each do |input|
      value = job_inputs[input]
      next if  value.nil?
      value_str = Misc.fingerprint(value)
      str << "\t#{Log.color :magenta, input}=#{value_str}"
    end
  end

  if $info_fields and $info_fields.any?
    $info_fields.each do |field|
      IndiferentHash.setup(info)
      value = info[field]
      next if value.nil?
      value_str = Misc.fingerprint(value)
      str << "\t#{Log.color :magenta, field}=#{value_str}"
    end
  end

  str << "\n"
end

.prov_status_msg(status) ⇒ Object



24
25
26
27
# File 'lib/rbbt/workflow/util/provenance.rb', line 24

def self.prov_status_msg(status)
  color = status_color(status)
  Log.color(color, status.to_s)
end

.purge(path, recursive = false, skip_overriden = true) ⇒ Object



209
210
211
212
213
214
215
216
217
218
219
220
221
# File 'lib/rbbt/workflow/util/archive.rb', line 209

def self.purge(path, recursive = false, skip_overriden = true)
  path = [path] if String === path
  job_files = job_files_for_archive path, recursive, skip_overriden

  job_files.each do |file|
    begin
      Log.debug "Purging #{file}"
      Open.rm_rf file if Open.exists?(file)
    rescue
      Log.warn "Could not erase '#{file}': #{$!.message}"
    end
  end
end

.purge_stream_cacheObject



5
6
7
8
9
10
# File 'lib/rbbt/workflow/step/dependencies.rb', line 5

def self.purge_stream_cache
  # Log.debug "Purging dup. stream cache"
  STREAM_CACHE_MUTEX.synchronize do
    STREAM_CACHE.clear
  end
end

.raise_dependency_error(job) ⇒ Object



113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# File 'lib/rbbt/workflow/step/dependencies.rb', line 113

def self.raise_dependency_error(job)
  begin
    if job.get_exception
      klass = job.get_exception.class
    else
      klass = Kernel.const_get(info[:exception][:class])
    end
  rescue
    Log.exception $!
    raise DependencyError, job 
  end

  if (klass <= RbbtException)
    raise DependencyRbbtException, job 
  else
    raise DependencyError, job 
  end
end

.save_input(name, value, type, dir) ⇒ Object



146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
# File 'lib/rbbt/workflow/step/save_load_inputs.rb', line 146

def self.save_input(name, value, type, dir)
  path = File.join(dir, name.to_s)

  case value
  when Path
    if Step === value.resource
      step = value.resource
      value = File.join('var/jobs', step.workflow.to_s, step.short_path + '.files', Misc.path_relative_to(step.files_dir, value))
      path = path + '.as_step_file'
    else
      path = path + '.as_path'
    end
  when String
    if Misc.is_filename?(value, true)
      value = value.dup
      value.extend Path
      return save_input(name, value, type, dir)
    end
  when IO
    path = path + '.as_io'
  when Step
    value = value.path
    path = path + '.as_step'
  when Array
    case value.first
    when Path
      if Step === value.first.resource
        path = path + '.as_step_file_array'
      else
        path = path + '.as_path_array'
      end
    when String
      if Misc.is_filename?(value.first, true)
        path = path + '.as_path_array'
      end
    when IO
      path = path + '.as_io_array'
    when Step
      path = path + '.as_step_array'
      value = value.collect{|s| s.path }
    when Numeric
      path = path + '.as_number_array'
    end

    value = value * "\n"
  end

  Log.debug "Saving job input #{name} (#{type}) into #{path}"

  if IO === value && value.respond_to?(:filename) && value.filename
    Open.write(path, value.filename)
  elsif IO === value
    Open.write(path, value)
  else
    Open.write(path, value.to_s)
  end
end

.save_inputs(inputs, input_types, dir) ⇒ Object



204
205
206
207
208
209
210
211
212
# File 'lib/rbbt/workflow/step/save_load_inputs.rb', line 204

def self.save_inputs(inputs, input_types, dir)
  inputs.each do |name,value|
    next if value.nil?
    type = input_types[name]
    type = type.to_s if type

    save_input(name, value, type, dir)
  end.any?
end

.save_job_inputs(job, dir, options = nil) ⇒ Object



214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
# File 'lib/rbbt/workflow/step/save_load_inputs.rb', line 214

def self.save_job_inputs(job, dir, options = nil)
  options = IndiferentHash.setup options.dup if options

  task_name = job.original_task_name || job.task_name
  workflow = job.original_workflow || job.workflow
  workflow = Kernel.const_get workflow if String === workflow
  if workflow
    task_info = IndiferentHash.setup(workflow.task_info(task_name))
    input_types = IndiferentHash.setup(task_info[:input_types])
    input_options = IndiferentHash.setup(task_info[:input_options])
    task_inputs = IndiferentHash.setup(task_info[:inputs])
    input_defaults = IndiferentHash.setup(task_info[:input_defaults])
  else
    task_info = IndiferentHash.setup({})
    input_types = IndiferentHash.setup({})
    task_inputs = IndiferentHash.setup({})
    task_options = IndiferentHash.setup({})
    input_defaults = IndiferentHash.setup({})
  end

  inputs = IndiferentHash.setup({})
  real_inputs = job.real_inputs || job.info[:real_inputs]
  job.recursive_inputs.zip(job.recursive_inputs.fields).each do |value,name|
    next unless task_inputs.include? name.to_sym
    next unless real_inputs.include? name.to_sym
    next if options && ! options.include?(name)
    next if value.nil?
    next if input_defaults[name] == value
    inputs[name] = value
  end

  if options && options.include?('override_dependencies')
    inputs.merge!(:override_dependencies => open[:override_dependencies])
    input_types = IndiferentHash.setup(input_types.merge(:override_dependencies => :array))
  end

  save_inputs(inputs, input_types, dir)

  inputs.keys
end

.serialize_info(info) ⇒ Object



30
31
32
33
# File 'lib/rbbt/workflow/step/accessor.rb', line 30

def self.serialize_info(info)
  info = info.clean_version if IndiferentHash === info
  INFO_SERIALIZER.dump(info)
end

.status_color(status) ⇒ Object



148
149
150
151
152
153
154
155
156
157
158
159
160
# File 'lib/rbbt/workflow/step/info.rb', line 148

def self.status_color(status)
  status = status.split(">").last
  case status
  when "starting"
    :yellow
  when "error", "aborted"
    :red
  when "done"
    :green
  else
    :cyan
  end
end

.step_info(path) ⇒ Object



98
99
100
101
102
103
104
105
106
107
# File 'lib/rbbt/workflow/step/accessor.rb', line 98

def self.step_info(path)
  begin
    Open.open(info_file(path), :mode => 'rb') do |f|
      self.load_serialized_info(f)
    end
  rescue Exception
    Log.exception $!
    {}
  end
end

.tmp_path(path) ⇒ Object



82
83
84
85
86
87
88
# File 'lib/rbbt/workflow/step/accessor.rb', line 82

def self.tmp_path(path)
  path = path.find if Path === path
  path = File.expand_path(path)
  dir = File.dirname(path)
  filename = File.basename(path)
  File.join(dir, '.' << filename)
end

.wait_for_jobs(jobs) ⇒ Object



40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/rbbt/workflow/step/accessor.rb', line 40

def self.wait_for_jobs(jobs)
  jobs = [jobs] if Step === jobs
  begin
    threads = []

    threads = jobs.collect do |j| 
      Thread.new do
        begin
          j.soft_grace
          j.join unless j.done?
        rescue Exception
          Log.error "Exception waiting for job: #{Log.color :blue, j.path}"
          raise $!
        end
      end
    end

    threads.each{|t| t.join }
  rescue Exception
    threads.each{|t| t.exit }
    jobs.each do |j| j.abort end
    raise $!
  end
end

Instance Method Details

#_abortObject



655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
# File 'lib/rbbt/workflow/step/run.rb', line 655

def _abort
  return if @aborted
  @aborted = true
  Log.medium{"#{Log.color :red, "Aborting"} #{Log.color :blue, path}"}
  doretry = true
  begin
    return if done?
    abort_pid if running?
    kill_children
    abort_stream
    stop_dependencies
  rescue Aborted, Interrupt
    Log.medium{"#{Log.color :red, "Aborting ABORTED RETRY"} #{Log.color :blue, path}"}
    if doretry
      doretry = false
      retry
    end
    raise $!
  rescue Exception
    if doretry
      doretry = false
      retry
    end
  ensure
    _clean_finished
  end
end

#_clean_finishedObject



635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
# File 'lib/rbbt/workflow/step/run.rb', line 635

def _clean_finished
  if Open.exists?(path) && status != :done
    Log.warn "Aborted job had finished. Removing result -- #{ path }"
    begin
      Open.rm path
    rescue Exception
      Log.warn "Exception removing result of aborted job: #{$!.message}"
    end
  end

  if Open.exists?(tmp_path) && status != :done
    Log.warn "Aborted job had finished. Removing tmp result -- #{ tmp_path }"
    begin
      Open.rm tmp_path
    rescue Exception
      Log.warn "Exception removing tmp result of aborted job: #{$!.message}"
    end
  end
end

#_execObject



93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# File 'lib/rbbt/workflow/step/run.rb', line 93

def _exec
  resolve_input_steps
  rewind_inputs
  @exec = true if @exec.nil?
  begin
    old = Signal.trap("INT"){ Thread.current.raise Aborted }
    if @task.respond_to?(:exec_in)
      @task.exec_in((bindings || self), *@inputs)
    elsif @task
      (bindings || self).instance_exec *@inputs, &@task
    else
      raise DependencyError, "Dependency #{self.path} cannot be produced"
    end
  ensure
    Signal.trap("INT", old)
  end
end

#abortObject



683
684
685
686
687
688
# File 'lib/rbbt/workflow/step/run.rb', line 683

def abort
  return if done? and (status == :done or status == :noinfo)
  _abort
  log(:aborted, "Job aborted") unless aborted? or error?
  self
end

#abort_pidObject



594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
# File 'lib/rbbt/workflow/step/run.rb', line 594

def abort_pid
  @pid ||= info[:pid] || Open.read(pid_file)

  case @pid
  when nil
    Log.medium "Could not abort #{path}: no pid"
    false
  when Process.pid
    Log.medium "Could not abort #{path}: same process"
    false
  else
    Log.medium "Aborting pid #{path}: #{ @pid } #{Process.pid}"
    begin
      Process.kill("TERM", @pid.to_i)
      s = Process.waitpid2 @pid.to_i
      Log.medium "Aborted pid #{path} #{s}"
    rescue Exception
      Log.debug("Aborted job #{@pid} was not killed: #{$!.message}")
    end
    true
  end
end

#abort_streamObject



617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
# File 'lib/rbbt/workflow/step/run.rb', line 617

def abort_stream
  stream = @result if IO === @result
  @saved_stream = nil
  if stream and stream.respond_to? :abort and not stream.aborted?
    doretry = true
    begin
      Log.medium "Aborting job stream #{stream.inspect} -- #{Log.color :blue, path}"
      stream.abort 
    rescue Aborted, Interrupt
      Log.medium "Aborting job stream #{stream.inspect} ABORTED RETRY -- #{Log.color :blue, path}"
      if doretry
        doretry = false
        retry
      end
    end
  end
end

#aborted?Boolean

Returns:

  • (Boolean)


142
143
144
145
# File 'lib/rbbt/workflow/step/status.rb', line 142

def aborted?
  status = self.status
  status == :aborted || ((status != :ending && status != :dependencies && status != :cleaned && status != :noinfo && status != :setup && status != :noinfo && status != :waiting) && nopid?)
end

#accessObject



361
362
363
# File 'lib/rbbt/workflow/step/accessor.rb', line 361

def access
  CMD.cmd("touch -c -h -a #{self.path} #{self.info_file}")
end

#archive(target = nil) ⇒ Object



21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# File 'lib/rbbt/workflow/util/archive.rb', line 21

def archive(target = nil)
  target = self.path + '.tar.gz' if target.nil?
  target = File.expand_path(target)
  TmpFile.with_file do |tmpdir|
    Step.link_job self.path, tmpdir
    rec_dependencies = Set.new
    deps = [self.path]
    seen = Set.new
    while deps.any?
      path = deps.shift
      dep = Workflow.load_step path
      seen << dep.path
      dep.dependencies.each do |dep|
        next if seen.include? dep.path
        deps << dep.path
        rec_dependencies << dep.path
      end if dep.dependencies
    end

    rec_dependencies.each do |path|
      Step.link_job path, tmpdir
    end

    Misc.in_dir(tmpdir) do
      if File.directory?(target)
        CMD.cmd_log("rsync #{MAIN_RSYNC_ARGS} --copy-unsafe-links '#{ tmpdir }/' '#{ target }/'")
      else
        CMD.cmd_log("tar cvhzf '#{target}'  ./*")
      end
    end
    Log.debug "Archive finished at: #{target}"
  end
end

#archive_depsObject



79
80
81
82
# File 'lib/rbbt/workflow/step/info.rb', line 79

def archive_deps
  self.set_info :archived_info, archived_info
  self.set_info :archived_dependencies, info[:dependencies]
end

#archived_infoObject



84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# File 'lib/rbbt/workflow/step/info.rb', line 84

def archived_info
  return info[:archived_info] if info[:archived_info]

  archived_info = {}
  dependencies.each do |dep|
    if Symbol === dep.overriden && ! Open.exists?(dep.info_file)
      archived_info[dep.path] = dep.overriden
    else
      archived_info[dep.path] = dep.info
    end
    archived_info.merge!(dep.archived_info)
  end if dependencies

  archived_info
end

#archived_inputsObject



100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# File 'lib/rbbt/workflow/step/info.rb', line 100

def archived_inputs
  return {} unless info[:archived_dependencies]
  archived_info = self.archived_info

  all_inputs = IndiferentHash.setup({})
  deps = info[:archived_dependencies].collect{|p| p.last}
  seen = []
  while path = deps.pop
    dep_info = archived_info[path]
    if Hash === dep_info
      dep_info[:inputs].each do |k,v|
        all_inputs[k] = v unless all_inputs.include?(k)
      end if dep_info[:inputs]
      deps.concat(dep_info[:dependencies].collect{|p| p.last } - seen) if dep_info[:dependencies]
      deps.concat(dep_info[:archived_dependencies].collect{|p| p.last } - seen) if dep_info[:archived_dependencies]
    end
    seen << path
  end

  all_inputs
end

#canfail_pathsObject



355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
# File 'lib/rbbt/workflow/step/dependencies.rb', line 355

def canfail_paths
  return Set.new if done? && ! Open.exists?(info_file)

  @canfail_paths ||= begin 
                       if info[:canfail] 
                         paths = info[:canfail].uniq
                         paths = Workflow.relocate_array self.path, paths if relocated
                         Set.new(paths)
                       else
                         canfail_paths = Set.new
                         all_deps = dependencies || []
                         all_deps.each do |dep|
                           next if canfail_paths.include? dep.path
                           canfail_paths += dep.canfail_paths
                           next unless ComputeDependency === dep && dep.canfail?
                           canfail_paths << dep.path
                           canfail_paths += dep.rec_dependencies.collect{|d| d.path }
                         end
                         canfail_paths
                         begin
                           set_info :canfail, canfail_paths.to_a
                         rescue Errno::EROFS
                         end
                         canfail_paths
                       end
                     end
end

#checksObject



144
145
146
# File 'lib/rbbt/workflow/step/run.rb', line 144

def checks
  (dependency_checks + input_checks).uniq
end

#child(&block) ⇒ Object



241
242
243
244
245
246
247
248
249
250
251
# File 'lib/rbbt/workflow/step.rb', line 241

def child(&block)
  child_pid = Process.fork &block
  children_pids = info[:children_pids]
  if children_pids.nil?
    children_pids = [child_pid]
  else
    children_pids << child_pid
  end
  set_info :children_pids, children_pids
  child_pid
end

#cleanObject



34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# File 'lib/rbbt/workflow/step/status.rb', line 34

def clean
  if ! Open.exists?(info_file)
    Log.high "Refusing to clean step with no .info file: #{path}"
    return self
  end
  status = []
  status << "dirty" if done? && dirty?
  status << "not running" if ! done? && ! running? 
  status.unshift " " if status.any?
  Log.high "Cleaning step: #{path}#{status * " "}"
  Log.stack caller if RBBT_DEBUG_CLEAN
  abort if ! done? && running?
  Step.clean(path)
  @done = false
  self
end

#cmd(*args) ⇒ Object



253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
# File 'lib/rbbt/workflow/step.rb', line 253

def cmd(*args)
  all_args = *args

  all_args << {} unless Hash === all_args.last

  level = all_args.last[:log] || 0
  level = 0 if TrueClass === level
  level = 10 if FalseClass === level
  level = level.to_i

  all_args.last[:log] = true
  all_args.last[:pipe] = true

  io = CMD.cmd(*all_args)
  child_pid = io.pids.first

  children_pids = info[:children_pids]
  if children_pids.nil?
    children_pids = [child_pid]
  else
    children_pids << child_pid
  end
  set_info :children_pids, children_pids

  while c = io.getc
    STDERR << c if Log.severity <= level
    if c == "\n"
      if pid
        Log.logn "STDOUT [#{pid}]: ", level
      else
        Log.logn "STDOUT: ", level
      end
    end
  end 

  io.join

  nil
end

#config(key, *tokens) ⇒ Object



346
347
348
349
350
351
352
353
354
355
356
357
358
359
# File 'lib/rbbt/workflow/step/accessor.rb', line 346

def config(key, *tokens)
  options = tokens.pop if Hash === tokens.last
  options ||= {}

  new_tokens = []
  if workflow
    workflow_name = workflow.to_s
    new_tokens << ("workflow:" << workflow_name)
    new_tokens << ("task:" << workflow_name << "#" << task_name.to_s)
  end
  new_tokens << ("task:" << task_name.to_s)

  Rbbt::Config.get(key, tokens + new_tokens, options)
end

#copy_files_dirObject



78
79
80
81
82
83
84
85
86
87
88
# File 'lib/rbbt/workflow/step.rb', line 78

def copy_files_dir
  if File.symlink?(self.files_dir)
    begin
      realpath = Open.realpath(self.files_dir)
      Open.rm self.files_dir
      Open.cp realpath, self.files_dir
    rescue
      Log.warn "Copy files_dir for #{self.workflow_short_path}: " + $!.message
    end
  end
end

#dependency_checksObject



128
129
130
131
132
133
134
135
136
137
# File 'lib/rbbt/workflow/step/run.rb', line 128

def dependency_checks
  return [] if ENV["RBBT_UPDATE"] != "true"

  rec_dependencies(true).
    reject{|dependency| (defined?(WorkflowRemoteClient) && WorkflowRemoteClient::RemoteStep === dependency) || Open.remote?(dependency.path) }.
    reject{|dependency| dependency.error? }.
    #select{|dependency| Open.exists?(dependency.path) || ((Open.exists?(dependency.info_file) && (dependency.status == :cleaned) || dependency.status == :waiting)) }.
    #select{|dependency| dependency.updatable? }.
    collect{|dependency| Workflow.relocate_dependency(self, dependency)}
end

#dirty?Boolean

Returns:

  • (Boolean)


76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# File 'lib/rbbt/workflow/step/status.rb', line 76

def dirty?
  return true if Open.exists?(pid_file) && ! ( Open.exists?(info_file) || done? )
  return true if done? && ! (status == :done || status == :noinfo)
  return false unless done? || status == :done
  return false unless ENV["RBBT_UPDATE"] == "true"

  status = self.status

  if done? and not (status == :done or status == :ending or status == :producing) and not status == :noinfo
    return true 
  end

  if status == :done and not done?
    return true 
  end

  if dirty_files.any?
    Log.low "Some dirty files found for #{self.path}: #{Misc.fingerprint dirty_files}"
    true
  else
    ! self.updated?
  end
end

#dirty_filesObject



63
64
65
66
67
68
69
70
71
72
73
74
# File 'lib/rbbt/workflow/step/status.rb', line 63

def dirty_files
  rec_dependencies = self.rec_dependencies(true)
  return [] if rec_dependencies.empty?
  canfail_paths = self.canfail_paths

  dirty_files = rec_dependencies.reject{|dep|
    (defined?(WorkflowRemoteClient) && WorkflowRemoteClient::RemoteStep === dep) || 
      ! Open.exists?(dep.info_file) ||
      (dep.path && (Open.exists?(dep.path) || Open.remote?(dep.path))) || 
      ((dep.error? || dep.aborted?) && (! dep.recoverable_error? || canfail_paths.include?(dep.path)))
  }
end

#done?Boolean

Returns:

  • (Boolean)


100
101
102
# File 'lib/rbbt/workflow/step/status.rb', line 100

def done?
  @done ||= path and Open.exists?(path)
end

#dup_inputsObject



60
61
62
63
64
65
66
67
68
69
70
# File 'lib/rbbt/workflow/step/dependencies.rb', line 60

def dup_inputs
  return if @inputs.nil?
  return if @dupped or ENV["RBBT_NO_STREAM"] == 'true'
  return if ComputeDependency === self and self.compute == :produce
  Log.low "Dupping inputs for #{path}"
  dupped_inputs = @inputs.collect do |input|
    Step.dup_stream input
  end
  @inputs.replace dupped_inputs
  @dupped = true
end

#error?Boolean

Returns:

  • (Boolean)


134
135
136
# File 'lib/rbbt/workflow/step/status.rb', line 134

def error?
  status == :error
end

#exception(ex, msg = nil) ⇒ Object



263
264
265
266
267
268
269
270
271
272
273
274
275
# File 'lib/rbbt/workflow/step/info.rb', line 263

def exception(ex, msg = nil)
  ex_class = ex.class.to_s
  backtrace = ex.backtrace if ex.respond_to?(:backtrace)
  message = ex.message if ex.respond_to?(:message)
  set_info :backtrace, backtrace
  set_info :exception, {:class => ex_class, :message => message, :backtrace => backtrace}
  if msg.nil?
    log :error, "#{ex_class} -- #{message}"
  else
    log :error, "#{msg} -- #{message}"
  end
  self._abort
end

#execute_and_dup(step, dep_step, log = true) ⇒ Object



227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
# File 'lib/rbbt/workflow/step/dependencies.rb', line 227

def execute_and_dup(step, dep_step, log = true)
  dup = step.result.nil?
  execute_dependency(step, log)
  if dup and step.streaming? and not step.result.nil?
    if dep_step[step.path] and dep_step[step.path].length > 1
      stream = step.result
      other_steps = dep_step[step.path].uniq.reject{|d| d.overriden }

      other_steps = other_steps.collect{|d|
        deps_using_step_input = d.rec_dependencies.select{|d| d.inputs.include? step  }
        deps_using_step_input.any? ? deps_using_step_input : d
      }.flatten.uniq

      return unless other_steps.length > 1

      log_dependency_exec(step, "duplicating #{other_steps.length}") 
      copies = Misc.tee_stream_thread_multiple(stream, other_steps.length)
      copies.extend StreamArray
      step.instance_variable_set("@result", copies)
    end
  end
end

#execute_dependency(dependency, log = true) ⇒ Object



156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
# File 'lib/rbbt/workflow/step/dependencies.rb', line 156

def execute_dependency(dependency, log = true)
  task_name = self.task_name
  canfail_paths = self.canfail_paths
  already_failed = []
  begin

    dependency.resolve_input_steps

    if dependency.done?
      dependency.inputs.each do |v|
        Misc.consume_stream(v) if IO === v
        Misc.consume_stream(TSV.get_stream v) if Step === v and not v.done?  and  v.streaming?
      end
      log_dependency_exec(dependency, :done) if log
      return
    end

    dependency.status_lock.synchronize do
      if dependency.aborted? || (dependency.error? && dependency.recoverable_error? && ! canfail_paths.include?(dependency.path) && ! already_failed.include?(dependency.path)) || (!Open.remote?(dependency.path) && dependency.missing?)
        if dependency.resumable?
          dependency.status = :resume
        else
          Log.warn "Cleaning dep. on exec #{Log.color :blue, dependency.path} (missing: #{dependency.missing?}; error #{dependency.error?})"
          dependency.clean
          already_failed << dependency.path
          raise TryAgain
        end
      end
    end

    if dependency.status == :resume || ! (dependency.started? || dependency.error?)
      log_dependency_exec(dependency, :starting)
      dependency.run(true)
      raise TryAgain
    end

    dependency.grace

    if dependency.error?
      log_dependency_exec(dependency, :error)
      Step.raise_dependency_error dependency 
    end

    if dependency.streaming?
      log_dependency_exec(dependency, :streaming) if log
      return
    end

    begin
      log_dependency_exec(dependency, :joining)
      dependency.join
      raise TryAgain unless dependency.done?
    rescue Aborted
      raise TryAgain
    end

  rescue TryAgain
    #Log.low "Retrying dep. #{Log.color :yellow, dependency.task_name.to_s} -- [#{dependency.status}] #{(dependency.messages || ["No message"]).last}"
    retry
  rescue Aborted, Interrupt
    Log.error "Aborted dep. #{Log.color :red, dependency.task_name.to_s}"
    raise $!
  rescue Interrupt
    Log.error "Interrupted while in dep. #{Log.color :red, dependency.task_name.to_s}"
    raise $!
  rescue Exception
    Log.error "Exception in dep. #{ Log.color :red, dependency.task_name.to_s } -- #{$!.message}"
    raise $! unless canfail_paths.include? dependency.path
  end
end

#file(name = nil) ⇒ Object



267
268
269
# File 'lib/rbbt/workflow/step/accessor.rb', line 267

def file(name=nil)
  Path.setup(File.join(files_dir, name.to_s), workflow, self)
end

#filesObject



260
261
262
263
264
265
# File 'lib/rbbt/workflow/step/accessor.rb', line 260

def files
  files = Dir.glob(File.join(files_dir, '**', '*')).reject{|path| File.directory? path}.collect do |path| 
    Misc.path_relative_to(files_dir, path) 
  end
  files
end

#files_dirObject

{{{ INFO



252
253
254
# File 'lib/rbbt/workflow/step/accessor.rb', line 252

def files_dir
  @files_dir ||= Step.files_dir path
end

#fork(no_load = false, semaphore = nil) ⇒ Object



514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
# File 'lib/rbbt/workflow/step/run.rb', line 514

def fork(no_load = false, semaphore = nil)
  raise "Can not fork: Step is waiting for proces #{@pid} to finish" if not @pid.nil? and not Process.pid == @pid and Misc.pid_exists?(@pid) and not done? and info[:forked]
  Log.debug "Fork to run #{self.path}"
  sout, sin = Misc.pipe if no_load == :stream
  @pid = Process.fork do
    Signal.trap(:TERM) do
      raise Aborted, "Recieved TERM Signal on forked process #{Process.pid}"
    end
    sout.close if sout
    Misc.pre_fork
    Open.mkdir File.dirname(path) unless Open.exist?(File.dirname(path))
    Open.write(pid_file, Process.pid.to_s) unless Open.exists?(path) or Open.exists?(pid_file)

    if semaphore
      init_info
      log :queue, "Queued over semaphore: #{semaphore}"
      ret = RbbtSemaphore.wait_semaphore(semaphore)
      raise SemaphoreInterrupted if ret == -1
    end

    begin
      begin
        @forked = true
        res = run no_load
        set_info :forked, true
        if sin
          io = TSV.get_stream res
          if io.respond_to? :setup
            io.setup(sin) 
            sin.pair = io
            io.pair = sin
          end
          begin
            Misc.consume_stream(io, false, sin)
          rescue 
            Log.warn "Could not consume stream (#{io.closed? ? 'closed' : 'open'}) into pipe for forked job: #{self.path}"
            Misc.consume_stream(io) unless io.closed?
          end
        end
      rescue Aborted, Interrupt
        Log.debug{"Forked process aborted: #{path}"}
        log :aborted, "Job aborted (#{Process.pid})"
        raise $!
      rescue Exception
        Log.debug("Exception '#{$!.message}' caught on forked process: #{path}")
        raise $!
      ensure
        join_stream
      end

      begin
        children_pids = info[:children_pids]
        if children_pids
          children_pids.each do |pid|
            if Misc.pid_exists? pid
              begin
                Process.waitpid pid
              rescue Errno::ECHILD
                Log.low "Waiting on #{ pid } failed: #{$!.message}"
              end
            end
          end
          set_info :children_done, Time.now
        end
      rescue Exception
        Log.debug("Exception waiting for children: #{$!.message}")
        RbbtSemaphore.post_semaphore(semaphore) if semaphore
        Kernel.exit! -1
      end
      #set_info :pid, nil
    ensure
      RbbtSemaphore.post_semaphore(semaphore) if semaphore
    end
  end
  sin.close if sin
  @result = sout if sout 
  Process.detach(@pid)
  self
end

#get_exceptionObject



277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
# File 'lib/rbbt/workflow/step/info.rb', line 277

def get_exception
  if info[:exception].nil?
    return Aborted if aborted?
    return Exception.new(messages.last) if error?
    Exception.new "" 
  else
    ex_class, ex_message, ex_backtrace = info[:exception].values_at :class, :message, :backtrace
    begin
      klass = Kernel.const_get(ex_class)
      ex = klass.new ex_message
      ex.set_backtrace ex_backtrace unless ex_backtrace.nil? or ex_backtrace.empty?
      ex
    rescue
      Log.exception $!
      Exception.new ex_message
    end
  end
end

#get_streamObject



11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
# File 'lib/rbbt/workflow/step/run.rb', line 11

def get_stream
  @mutex.synchronize do
    Log.low "Getting stream from #{path} #{!@saved_stream} [#{object_id}-#{Misc.fingerprint(@result)}]"
    begin
      if IO === @result 
        return nil if @saved_stream
        @saved_stream = @result 
      elsif StreamArray === @result and @result.any?
        @saved_stream = @result.pop 
      else
        nil
      end
    end
  end
end

#graceObject



711
712
713
714
715
716
# File 'lib/rbbt/workflow/step/run.rb', line 711

def grace
  until done? || result || error? || aborted? || streaming? || waiting? || running?
    sleep 1 
  end
  self
end

#info(check_lock = true) ⇒ Object



2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# File 'lib/rbbt/workflow/step/info.rb', line 2

def info(check_lock = true)
  return {:status => :noinfo} if info_file.nil? || ! Open.exists?(info_file)

  begin
    Misc.insist do

      begin
        return @info_cache if @info_cache and @info_cache_time and Open.ctime(info_file) < @info_cache_time 
      rescue Exception
        raise $!
      end


      begin
        @info_cache = Misc.insist(3, 1.6, info_file) do
          Misc.insist(2, 1, info_file) do
            Misc.insist(3, 0.2, info_file) do
              raise TryAgain, "Info locked" if check_lock and info_lock.locked?
              info_lock.lock if check_lock and false
              begin
                Open.open(info_file, :mode => 'rb') do |file|
                  Step.load_serialized_info(file)
                end
              ensure
                info_lock.unlock if check_lock and false
              end
            end
          end
        end
        @info_cache_time = Time.now
        @info_cache
      end
    end
  rescue Exception
    Log.debug{"Error loading info file: " + info_file}
    Log.exception $!
    #Open.rm info_file
    #Misc.sensiblewrite(info_file, Step.serialize_info({:status => :error, :messages => ["Info file lost"]}))
    raise $!
  end
end

#info_fileObject

{{{ INFO



175
176
177
# File 'lib/rbbt/workflow/step/accessor.rb', line 175

def info_file
  @info_file ||= Step.info_file(path)
end

#info_lockObject



183
184
185
186
187
188
189
190
# File 'lib/rbbt/workflow/step/accessor.rb', line 183

def info_lock
  @info_lock = begin
                 path = Persist.persistence_path(info_file + '.lock', {:dir => Step.lock_dir})
                 #Lockfile.new path, :refresh => false, :dont_use_lock_id => true
                 Lockfile.new path
               end if @info_lock.nil?
               @info_lock
end

#init_info(force = false) ⇒ Object



201
202
203
204
205
206
207
208
209
210
211
212
213
214
# File 'lib/rbbt/workflow/step/accessor.rb', line 201

def init_info(force = false)
  return nil if @exec || info_file.nil? || (Open.exists?(info_file) && ! force)
  batch_job = info[:batch_job] if Open.exists?(info_file)
  batch_system = info[:batch_system] if Open.exists?(info_file)
  Open.lock(info_file, :lock => info_lock) do
    i = {:status => :waiting, :pid => Process.pid, :path => path, :real_inputs => real_inputs, :overriden => overriden}
    i[:batch_job] = batch_job if batch_job
    i[:batch_system] = batch_system if batch_system
    i[:dependencies] = dependencies.collect{|dep| [dep.task_name, dep.name, dep.path]} if dependencies
    Misc.sensiblewrite(info_file, Step.serialize_info(i), :force => true, :lock => false)
    @info_cache = IndiferentHash.setup(i)
    @info_cache_time = Time.now
  end
end

#input_checksObject



139
140
141
142
# File 'lib/rbbt/workflow/step/run.rb', line 139

def input_checks
  (inputs.select{|i| Step === i } + inputs.select{|i| Path === i && Step === i.resource}.collect{|i| i.resource})
    #select{|dependency| dependency.updatable? }
end

#input_dependenciesObject



147
148
149
150
151
152
153
154
# File 'lib/rbbt/workflow/step/dependencies.rb', line 147

def input_dependencies
  @input_dependencies ||= recursive_inputs(true).flatten.
    select{|i| Step === i || (defined?(RemoteStep) && RemoteStep === i) } + 
    recursive_inputs(true).flatten.
    select{|dep| Path === dep && Step === dep.resource }.
    #select{|dep| ! dep.resource.started? }. # Ignore input_deps already started
    collect{|dep| dep.resource }
end

#inspectObject



431
432
433
# File 'lib/rbbt/workflow/step/accessor.rb', line 431

def inspect
  Misc.fingerprint(self)
end

#joinObject



718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
# File 'lib/rbbt/workflow/step/run.rb', line 718

def join

  grace if Open.exists?(info_file) 

  if streaming?
    join_stream 
  end

  return self if not Open.exists? info_file

  return self if info[:joined]

  pid = @pid 

  Misc.insist [0.1, 0.2, 0.5, 1] do
    pid ||= info[:pid]
  end

  begin

    if pid.nil? or Process.pid == pid
      dependencies.each{|dep| dep.join }
    else
      begin
        pid = pid.to_i if String === pid
        Log.debug{"Waiting for pid: #{pid}"}
        Process.waitpid pid
      rescue Errno::ECHILD
        Log.debug{"Process #{ pid } already finished: #{ path }"}
      end if Misc.pid_exists? pid
      pid = nil
      dependencies.each{|dep| dep.join }
    end

    until (Open.exists?(path) && (status == :done || status == :noinfo)) or error? or aborted? or waiting?
      sleep 1
      join_stream if streaming?
    end

    self
  ensure
    begin
      set_info :joined, true 
    rescue
    end if Open.exists?(info_file) && writable?
    @result = nil
  end
end

#join_streamObject



690
691
692
693
694
695
696
697
698
699
700
701
702
# File 'lib/rbbt/workflow/step/run.rb', line 690

def join_stream
  stream = get_stream if @result
  @result = nil
  if stream
    begin
      Misc.consume_stream stream 
      stream.join if stream.respond_to? :join
    rescue Exception
      stream.abort $!
      self._abort
    end
  end
end

#kill_childrenObject



203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
# File 'lib/rbbt/workflow/step/run.rb', line 203

def kill_children
  begin
    children_pids = info[:children_pids]
    if children_pids and children_pids.any?
      Log.medium("Killing children: #{ children_pids * ", " }")
      children_pids.each do |pid|
        Log.medium("Killing child #{ pid }")
        begin
          Process.kill "TERM", pid.to_i
        rescue Exception
          Log.medium("Exception killing child #{ pid }: #{$!.message}")
        end
      end
    end
  rescue
    Log.medium("Exception finding children")
  end
end

#knowledge_base(organism = nil) ⇒ Object



424
425
426
427
428
429
# File 'lib/rbbt/workflow/step/accessor.rb', line 424

def knowledge_base(organism = nil)
  @_kb ||= begin
             kb_dir = self.file('knowledge_base')
             KnowledgeBase.new kb_dir, organism
           end
end

#loadObject



294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
# File 'lib/rbbt/workflow/step.rb', line 294

def load
  res = begin
          @result = nil if IO === @result && @result.closed?
          if @result && @path != @result &&  ! StreamArray === @result
            res = @result
          else
            join if not done?
            res = @path.exists? ? Persist.load_file(@path, result_type) : run
          end

          if result_description
            entity_info = info.dup
            # Also load entity_info in rbbt6
            entity_info.merge! info[:inputs] if Hash === info[:inputs]
            res = prepare_result res, result_description, entity_info 
          end

          res
        rescue IOError
          if @result
            @result = nil
            retry
          end
          raise $!
        ensure
          @result = nil if IO === @result
        end

  res
end

#load_dependencies_from_infoObject



62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# File 'lib/rbbt/workflow/step/info.rb', line 62

def load_dependencies_from_info
  relocated = nil
  @dependencies = (self.info[:dependencies] || []).collect do |task,name,dep_path|
    dep_path = task if dep_path.nil?
    if Open.exists?(dep_path) || Open.exists?(dep_path + '.info')
      Workflow._load_step dep_path
    else
      next if FalseClass === relocated
      new_path = Workflow.relocate(path, dep_path)
      relocated = true if Open.exists?(new_path) || Open.exists?(new_path + '.info')
      Workflow._load_step new_path
    end
  end.compact
  @relocated = relocated
end

#load_file(name, type = nil, options = {}) ⇒ Object



287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
# File 'lib/rbbt/workflow/step/accessor.rb', line 287

def load_file(name, type = nil, options = {})
  if type.nil? and name =~ /.*\.(\w+)$/
    extension = name.match(/.*\.(\w+)$/)[1]
    case extension
    when "tc"
      type = :tc
    when "tsv"
      type = :tsv
    when "list", "ary", "array"
      type = :array
    when "yaml"
      type = :yaml
    when "marshal"
      type = :marshal
    else
      type = :other
    end
  else
    type ||= :other
  end

  case type.to_sym
  when :tc
    Persist.open_tokyocabinet(file(name), false)
  when :tsv
    TSV.open Open.open(file(name)), options
  when :array
    #Open.read(file(name)).split /\n|,\s*/
    Open.read(file(name)).split "\n"
  when :yaml
    Misc.load_yaml(file(name))
  when :marshal
    Marshal.load(Open.open(file(name)))
  else
    Open.read(file(name))
  end
end

#load_inputs_from_infoObject



44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# File 'lib/rbbt/workflow/step/info.rb', line 44

def load_inputs_from_info
  if info[:inputs]
    info_inputs = info[:inputs]
    if task && task.respond_to?(:inputs) && task.inputs
      IndiferentHash.setup info_inputs
      @inputs = NamedArray.setup info_inputs.values_at(*task.inputs.collect{|name| name.to_s}), task.inputs
    else
      if Hash === info_inputs
        @inputs = NamedArray.setup info_inputs.values, info_inputs.keys
      else
        @inputs = info_inputs
      end
    end
  else
    nil
  end
end

#log(status, message = nil, &block) ⇒ Object



255
256
257
258
259
260
261
# File 'lib/rbbt/workflow/step/info.rb', line 255

def log(status, message = nil, &block)
  self.status = status
  if message
    self.message Log.uncolor(message)
  end
  Step.log(status, message, path, &block)
end

#log_dependency_exec(dependency, action) ⇒ Object



132
133
134
135
136
137
138
139
140
141
142
143
144
145
# File 'lib/rbbt/workflow/step/dependencies.rb', line 132

def log_dependency_exec(dependency, action)
  task_name = self.task_name

  str = Log.color(:reset, "")
  str << Log.color(:yellow, task_name.to_s || "") 
  str << " "
  str << Log.color(:magenta, action.to_s)
  str << " "
  str << Log.color(:yellow, dependency.task_name.to_s || "")
  str << " -- "
  str << "#{Log.color :blue, dependency.path}"

  Log.info str
end

#log_progress(status, options = {}, &block) ⇒ Object



219
220
221
# File 'lib/rbbt/workflow/step/info.rb', line 219

def log_progress(status, options = {}, &block)
  Step.log_progress(status, options, file(:progress), &block)
end

#merge_info(hash) ⇒ Object



235
236
237
238
239
240
241
242
243
244
245
246
247
248
# File 'lib/rbbt/workflow/step/accessor.rb', line 235

def merge_info(hash)
  return nil if @exec or info_file.nil?
  return nil if ! writable?
  value = Annotated.purge value if defined? Annotated
  Open.lock(info_file, :lock => info_lock) do
    i = info(false)
    i.merge! hash
    dump = Step.serialize_info(i)
    @info_cache = IndiferentHash.setup(i)
    Misc.sensiblewrite(info_file, dump, :force => true, :lock => false) if Open.exists?(info_file)
    @info_cache_time = Time.now
    value
  end
end

#message(message) ⇒ Object



143
144
145
146
# File 'lib/rbbt/workflow/step/info.rb', line 143

def message(message)
  message = Log.uncolor(message)
  set_info(:messages, (messages || []) << message)
end

#messagesObject



135
136
137
138
139
140
141
# File 'lib/rbbt/workflow/step/info.rb', line 135

def messages
  if messages = info[:messages]
    messages
  else
    set_info(:messages, []) if self.respond_to?(:set_info)
  end || []
end

#missing?Boolean

Returns:

  • (Boolean)


130
131
132
# File 'lib/rbbt/workflow/step/status.rb', line 130

def missing?
  status == :done && ! Open.exists?(path)
end

#monitor_stream(stream, options = {}, &block) ⇒ Object



372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
# File 'lib/rbbt/workflow/step/accessor.rb', line 372

def monitor_stream(stream, options = {}, &block)
  case options[:bar] 
  when TrueClass
    bar = progress_bar 
  when Hash
    bar = progress_bar options[:bar]
  when Numeric
    bar = progress_bar :max => options[:bar]
  else
    bar = options[:bar]
  end

  out = if bar.nil?
          Misc.line_monitor_stream stream, &block
        elsif (block.nil? || block.arity == 0)
          Misc.line_monitor_stream stream do
            bar.tick
          end
        elsif block.arity == 1
          Misc.line_monitor_stream stream do |line|
            bar.tick
            block.call line
          end
        elsif block.arity == 2
          Misc.line_monitor_stream stream do |line|
            block.call line, bar
          end
        end

  ConcurrentStream.setup(out, :abort_callback => Proc.new{
    Log::ProgressBar.remove_bar(bar, true) if bar
  }, :callback => Proc.new{
    Log::ProgressBar.remove_bar(bar) if bar
  })

  bgzip = (options[:compress] || options[:gzip]).to_s == 'bgzip'
  bgzip = true if options[:bgzip]

  gzip = true if options[:compress] || options[:gzip]
  if bgzip
    Open.bgzip(out)
  elsif gzip
    Open.gzip(out)
  else
    out
  end
end

#nameObject



152
153
154
# File 'lib/rbbt/workflow/step/accessor.rb', line 152

def name
  @name ||= path.sub(/.*\/#{Regexp.quote task_name.to_s}\/(.*)/, '\1')
end

#noinfo?Boolean

Returns:

  • (Boolean)


108
109
110
# File 'lib/rbbt/workflow/step/status.rb', line 108

def noinfo?
  status == :noinfo
end

#nopid?Boolean

Returns:

  • (Boolean)


138
139
140
# File 'lib/rbbt/workflow/step/status.rb', line 138

def nopid?
  ! Open.exists?(pid_file) && ! (status.nil? || status == :aborted || status == :done || status == :error || status == :cleaned)
end

#out_of_dateObject



157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
# File 'lib/rbbt/workflow/step/run.rb', line 157

def out_of_date

  checks = self.checks
  return [] if checks.empty?
  outdated_time  = []
  outdated_dep  = []
  canfail_paths = self.canfail_paths
  this_mtime = Open.mtime(self.path) if Open.exists?(self.path)

  #outdated_time = checks.select{|dep| dep.updatable? && dep.done? && Persist.newer?(path, dep.path) }
  outdated_time = checks.select{|dep| dep.done? && Persist.newer?(path, dep.path) }
  outdated_dep = checks.reject{|dep| dep.done? || (dep.error? && ! dep.recoverable_error? && canfail_paths.include?(dep.path)) }

  #checks.each do |dep| 
  #  next unless dep.updatable?
  #  dep_done = dep.done?

  #  begin
  #    if this_mtime && dep_done && Open.exists?(dep.path) && (Open.mtime(dep.path) > this_mtime + 1)
  #      outdated_time << dep
  #    end
  #  rescue
  #  end

  #  # Is this pointless? this would mean some dep got updated after a later
  #  # dep but but before this one.
  #  #if (! dep.done? && ! canfail_paths.include?(dep.path)) || ! dep.updated?

  #  if (! dep_done && ! canfail_paths.include?(dep.path))
  #    outdated_dep << dep
  #  end
  #end

  Log.medium "Some newer files found: #{Misc.fingerprint outdated_time}" if outdated_time.any?
  Log.medium "Some outdated files found: #{Misc.fingerprint outdated_dep}" if outdated_dep.any?

  outdated_time + outdated_dep
end

#overriden?Boolean

Returns:

  • (Boolean)


538
539
540
541
542
543
544
545
546
547
# File 'lib/rbbt/workflow/step/dependencies.rb', line 538

def overriden?
  return @overriden
  return true if @overriden
  return true if dependencies && dependencies.select{|dep| TrueClass === dep.overriden }.any?
  info[:archived_info].each do |f,i|
    next if Symbol === i
    return true if i[:overriden] || i["overriden"]
  end if info[:archived_info]
  return false
end

#overriden_depsObject



559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
# File 'lib/rbbt/workflow/step/dependencies.rb', line 559

def overriden_deps
  ord = []
  deps = dependencies.dup
  while dep = deps.shift
    case dep.overriden
    when FalseClass
      next
    when Symbol
      ord << dep
    else
      deps += dep.dependencies
    end
  end
  ord
end

#persist_checksObject



148
149
150
151
152
153
154
155
# File 'lib/rbbt/workflow/step/run.rb', line 148

def persist_checks
  canfail_paths = self.canfail_paths
  checks.collect do |dep| 
    path = dep.path
    next if ! dep.done? && canfail_paths.include?(path)
    path 
  end.compact
end

#pid_fileObject



179
180
181
# File 'lib/rbbt/workflow/step/accessor.rb', line 179

def pid_file
  @pid_file ||= Step.pid_file(path)
end

#prepare_result(value, description = nil, entity_info = nil) ⇒ Object



167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
# File 'lib/rbbt/workflow/step.rb', line 167

def prepare_result(value, description = nil, entity_info = nil)
  res = case 
        when IO === value
          begin
            res = case result_type
                  when :array
                    array = []
                    while line = value.gets
                      array << line.chomp
                    end
                    array
                  when :tsv
                    begin
                      TSV.open(value)
                    rescue IOError
                      TSV.setup({})
                    end
                  else
                    value.read
                  end
            value.join if value.respond_to? :join
            res
          rescue Exception
            value.abort if value.respond_to? :abort
            self.abort
            raise $!
          end
        when (not defined? Entity or description.nil? or not Entity.formats.include? description)
          value
        when (Annotated === value and info.empty?)
          value
        when Annotated === value
          annotations = value.annotations
          entity_info ||= begin 
                            entity_info = info.dup
                            entity_info.merge! info[:inputs] if info[:inputs]
                            entity_info
                          end
          entity_info.each do |k,v|
            value.send("#{h}=", v) if annotations.include? k
          end

          value
  else
    entity_info ||= begin 
                      entity_info = info.dup
                      entity_info.merge! info[:inputs] if info[:inputs]
                      entity_info
                    end
    Entity.formats[description].setup(value, entity_info.merge(:format => description))
  end

  if Annotated === res
    dep_hash = nil
    res.annotations.each do |a|
      a = a.to_s
      varname = "@" + a
      next unless res.instance_variable_get(varname).nil? 

      dep_hash ||= begin
                     h = {}
                     rec_dependencies.each{|dep| h[dep.task_name.to_s] ||= dep }
                     h
                   end
      dep = dep_hash[a]
      next if dep.nil?
      res.send(a.to_s+"=", dep.load)
    end 
  end

  res
end

#produce(force = false, dofork = false) ⇒ Object



476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
# File 'lib/rbbt/workflow/step/run.rb', line 476

def produce(force=false, dofork=false)
  return self if done? and not dirty?

  self.status_lock.synchronize do
    if error? || aborted? || stalled?
      if stalled?
        Log.warn "Aborting stalled job #{self.path}"
        abort
      end
      if force or aborted? or recoverable_error?
        clean
      else
        e = get_exception
        if e
          Log.error "Raising exception in produced job #{self.path}: #{e.message}" 
          raise e
        else
          raise "Error in job: #{self.path}"
        end
      end
    end
  end

  update if done?

  if dofork
    fork(true) unless started?

    join unless done? or dofork == :nowait
  else
    run(true) unless started?

    join unless done?
  end

  self
end

#progress_bar(msg = "Progress", options = nil, &block) ⇒ Object



223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
# File 'lib/rbbt/workflow/step/info.rb', line 223

def progress_bar(msg = "Progress", options = nil, &block)
  if Hash === msg and options.nil?
    options = msg
    msg = nil
  end
  options = {} if options.nil?

  max = options[:max]
  bar = Log::ProgressBar.new_bar(max, {:desc => msg, :file => (@exec ? nil : file(:progress))}.merge(options))

  if block_given?
    bar.init
    res = yield bar
    bar.remove
    res
  else
    bar
  end
end

#provenanceObject



325
326
327
328
329
330
331
332
333
334
335
336
# File 'lib/rbbt/workflow/step/accessor.rb', line 325

def provenance
  provenance = {}
  dependencies.each do |dep|
    next unless dep.path.exists?
    if Open.exists? dep.info_file
      provenance[dep.path] = dep.provenance if Open.exists? dep.path
    else
      provenance[dep.path] = nil
    end
  end
  {:inputs => info[:inputs], :provenance => provenance}
end

#provenance_pathsObject



338
339
340
341
342
343
344
# File 'lib/rbbt/workflow/step/accessor.rb', line 338

def provenance_paths
  provenance = {}
  dependencies.each do |dep|
    provenance[dep.path] = dep.provenance_paths if Open.exists? dep.path
  end
  provenance
end

#python(python = nil, options = {}, &block) ⇒ Object



12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# File 'lib/rbbt/util/python/step.rb', line 12

def python(python = nil, options = {}, &block)
  begin
    RbbtPython.add_path self.workflow.libdir.python.find
  rescue
    Log.warn "Error loading libdir python for workflow '#{Misc.fingerprint self.workflow}'"
  end
  case python
  when Path
    python_file python.find, options
  when String
    if Open.exists?(python)
      python_file python
    else
      TmpFile.with_file do |dir|
        pkg = "pkg#{rand(100)}"
        Open.write File.join(dir, "#{pkg}/__init__.py"), code

        RbbtPython.add_path dir

        Misc.in_dir dir do
          yield pkg
        end
      end
    end
  else
    python_block(python, &block)
  end
end

#python_block(options = {}, &block) ⇒ Object



8
9
10
# File 'lib/rbbt/util/python/step.rb', line 8

def python_block(options = {}, &block)
  RbbtPython.run options, &block
end

#python_file(file, options = {}) ⇒ Object



4
5
6
# File 'lib/rbbt/util/python/step.rb', line 4

def python_file(file, options = {})
  CMD.cmd_log(:python, file, options)
end

#rec_accessObject



365
366
367
368
369
370
# File 'lib/rbbt/workflow/step/accessor.rb', line 365

def rec_access
  access
  rec_dependencies.each do |dep|
    dep.access
  end
end

#rec_dependencies(connected = false, seen = []) ⇒ Object

connected = true means that dependency searching ends when a result is done but dependencies are absent, meanining that the file could have been dropped in



583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
# File 'lib/rbbt/workflow/step/dependencies.rb', line 583

def rec_dependencies(connected = false, seen = [])
  # A step result with no info_file means that it was manually
  # placed. In that case, do not consider its dependencies
  return [] if ! (defined? WorkflowRemoteClient && WorkflowRemoteClient::RemoteStep === self) && ! Open.exists?(self.info_file) && Open.exists?(self.path.to_s) 

  return [] if dependencies.nil? or dependencies.empty?

  if self.overriden?
    archived_deps = []
  else
    archived_deps = self.info[:archived_info] ? self.info[:archived_info].keys : []
  end

  new_dependencies = []
  dependencies.each{|step| 
    #next if self.done? && Open.exists?(info_file) && info[:dependencies] && info[:dependencies].select{|task,name,path| path == step.path }.empty?
    next if archived_deps.include? step.path
    next if seen.include? step
    next if step.done? && connected && ! step.updatable?

    r = step.rec_dependencies(connected, new_dependencies)
    new_dependencies.concat r
    new_dependencies << step
  }

  new_dependencies.uniq
end

#recoverable_error?Boolean

Returns:

  • (Boolean)


296
297
298
299
300
301
302
303
304
305
306
# File 'lib/rbbt/workflow/step/info.rb', line 296

def recoverable_error?
  return true if aborted?
  return false unless error?
  begin
    return true unless info[:exception]
    klass = Kernel.const_get(info[:exception][:class])
    ! (klass <= RbbtException )
  rescue Exception
    true
  end
end

#recursive_cleanObject



336
337
338
339
340
341
342
# File 'lib/rbbt/workflow/step.rb', line 336

def recursive_clean
  dependencies.each do |step| 
    step.recursive_clean 
  end if dependencies
  clean if Open.exists?(self.info_file)
  self
end

#recursive_inputs(connected = false) ⇒ Object



90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# File 'lib/rbbt/workflow/step.rb', line 90

def recursive_inputs(connected = false)
  if NamedArray === inputs
    i = {}
    inputs.zip(inputs.fields).each do |v,f|
      i[f] = v
    end
  else
    i = {}
  end
  rec_dependencies(connected).each do |dep|
    next unless NamedArray === dep.inputs

    dep.inputs.zip(dep.inputs.fields).each do |v,f|
      if i.include?(f) && i[f] != v
        next
        #Log.debug "Variable '#{f}' reused with different values: #{[Misc.fingerprint(i[f]), Misc.fingerprint(v)] * " <-> "}"
      else 
        i[f] = v
      end
    end

    dep.archived_inputs.each do |k,v|
      i[k] = v unless i.include? k
    end
  end

  self.archived_inputs.each do |k,v|
    i[k] = v unless i.include? k
  end

  #dependencies.each do |dep|
  #  di = dep.recursive_inputs
  #  next unless NamedArray === di
  #  di.fields.zip(di).each do |k,v|
  #    i[k] = v unless i.include? k
  #  end
  #end
  
  v = i.values
  NamedArray.setup v, i.keys 
  v
end

#relay_log(step) ⇒ Object

May be deprecated: This is no loger used



148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
# File 'lib/rbbt/workflow/step.rb', line 148

def relay_log(step)
  return self if self.task_name.nil?

  if not self.respond_to? :original_log
    class << self
      attr_accessor :relay_step
      alias original_log log 
      def log(status, message = nil)
        self.status = status
        message Log.uncolor message
        relay_step.log([self.task_name.to_s, status.to_s] * ">", message.nil? ? nil : message ) unless (relay_step.done? or relay_step.error? or relay_step.aborted?)
      end
    end
  end
  @relay_step = step

  self
end

#relocated?Boolean

Returns:

  • (Boolean)


420
421
422
# File 'lib/rbbt/workflow/step/accessor.rb', line 420

def relocated?
  done? && info[:path] && info[:path] != path
end

#resolve_input_stepsObject



27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# File 'lib/rbbt/workflow/step/run.rb', line 27

def resolve_input_steps
  step = false
  pos = 0

  input_options = Workflow === workflow ? workflow.task_info(task_name)[:input_options] : {}
  new_inputs = inputs.collect do |i| 
    begin
      if Step === i
        if i.error?
          e = i.get_exception
          if e
            raise e
          else
            raise DependencyError, "Error in dep. #{Log.blue e.path}"
          end
        end
        step = true
        i.produce unless i.done? || i.error? || i.started?
        if i.done?
          if (task.input_options[task.inputs[pos]] || {})[:stream]
            TSV.get_stream i
          else
            if (task.input_options[task.inputs[pos]] || {})[:nofile]
              i.path
            else
              i.load
            end
          end
        elsif i.streaming? and (task.input_options[task.inputs[pos]] || {})[:stream]
          TSV.get_stream i
        else
          i.join
          if (task.input_options[task.inputs[pos]] || {})[:stream]
            TSV.get_stream i
          else
            if (task.input_options[task.inputs[pos]] || {})[:nofile]
              i.path
            else
              i.load
            end
          end
        end
      else
        i
      end
    ensure
      pos += 1
    end
  end
  @inputs.replace new_inputs if step
end

#result_descriptionObject



140
141
142
143
144
145
146
# File 'lib/rbbt/workflow/step/accessor.rb', line 140

def result_description
  @result_description ||= if @task.respond_to?(:result_description)
                            @task.result_description
                          else
                            nil
                          end
end

#result_description=(description) ⇒ Object



148
149
150
# File 'lib/rbbt/workflow/step/accessor.rb', line 148

def result_description=(description)
  @result_description = description
end

#result_typeObject



128
129
130
131
132
133
134
# File 'lib/rbbt/workflow/step/accessor.rb', line 128

def result_type
  @result_type ||= if @task.respond_to?(:result_type)
                     @task.result_type || info[:result_type]
                   else
                     info[:result_type]
                   end
end

#result_type=(type) ⇒ Object



136
137
138
# File 'lib/rbbt/workflow/step/accessor.rb', line 136

def result_type=(type)
  @result_type = type
end

#resumable?Boolean

Returns:

  • (Boolean)


51
52
53
# File 'lib/rbbt/workflow/step/status.rb', line 51

def resumable?
  (task && task.resumable) || status == :waiting || status == :cleaned
end

#rewind_inputsObject



79
80
81
82
83
84
85
86
87
88
89
90
91
# File 'lib/rbbt/workflow/step/run.rb', line 79

def rewind_inputs
  return if @inputs.nil?
  Log.debug "Rewinding inputs for #{path}"
  @inputs.each do |input|
    next unless input.respond_to? :rewind
    begin
      input.rewind
      input.first_line = nil if TSV::Parser === input
      Log.debug "Rewinded #{Misc.fingerprint input}"
    rescue
    end
  end
end

#run(no_load = false) ⇒ Object



222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
# File 'lib/rbbt/workflow/step/run.rb', line 222

def run(no_load = false)
  result = nil

  if Workflow === workflow && workflow.relay_tasks && workflow.relay_tasks.include?(task_name)
    server, options = workflow.relay_tasks[task_name]
    options[:migrate] = true
    return RemoteWorkflow::SSH.relay_job(self, server, options) 
  end

  begin
    no_load = :stream if no_load
    result_type = self.result_type || info[:result_type]

    res = @mutex.synchronize do
      time_elapsed = total_time_elapsed = nil
      Open.write(pid_file, Process.pid.to_s) unless Open.exists?(path) or Open.exists?(pid_file)

      result = Persist.persist "Job", result_type, :file => path, :check => persist_checks, :no_load => no_load do 

        if Step === Step.log_relay_step and not self == Step.log_relay_step
          relay_log(Step.log_relay_step) unless self.respond_to? :relay_step and self.relay_step
        end

        Open.write(pid_file, Process.pid.to_s) unless Open.exists? pid_file

        @exec = false
        init_info(true)

        #workflow           = @workflow || @task.respond_to?(:workflow)  ?  @task.workflow : nil
        result_type        = @task.respond_to?(:result_type)            ?  @task.result_type : nil
        result_description = @task.respond_to?(:result_description)     ?  @task.result_description : nil

        log :setup, "#{Log.color :green, "Setup"} step #{Log.color :yellow, task_name}"

        merge_info({
          :issued             => (issue_time = Time.now),
          :name               => name,
          :pid                => Process.pid.to_s,
          :pid_hostname       => Socket.gethostname,
          :clean_name         => clean_name,
          :workflow           => workflow.to_s,
          :task_name          => task_name,
          :result_type        => result_type,
          :result_description => result_description,
          :dependencies       => dependencies.collect{|dep| [dep.task_name, dep.name, dep.path]},
          :versions           => Rbbt.versions
        })

        new_inputs = []
        @inputs.each_with_index do |input,i|
          name = @task.respond_to?(:inputs)       ?  @task.inputs[i] : nil
          type = @task.respond_to?(:input_types)  ?  @task.input_types[i] : nil

          if type == :directory
            directory_inputs = file('directory_inputs')
            input_source = directory_inputs['.source'][name].find
            input_dir = directory_inputs[name].find

            case input
            when Path
              if input.directory?
                new_inputs << input
              else
                input.open do |io|
                  begin
                    Misc.untar(io, input_source)
                  rescue
                    raise ParameterException, "Error unpackaging tar directory input '#{name}':\n\n#{$!.message}"
                  end
                end
                tar_1 = input_source.glob("*")
                raise ParameterException, "When using tar.gz files for directories, the directory must be the single first level entry" if tar_1.length != 1
                FileUtils.ln_s Misc.path_relative_to(directory_inputs, tar_1.first), input_dir
                new_inputs << input_dir
              end
            when File, IO, Tempfile
              begin
                Misc.untar(Open.gunzip(input), input_source)
              rescue
                raise ParameterException, "Error unpackaging tar directory input '#{name}':\n\n#{$!.message}"
              end
              tar_1 = input_source.glob("*")
              raise ParameterException, "When using tar.gz files for directories, the directory must be the single first level entry" if tar_1.length != 1
              FileUtils.ln_s Misc.path_relative_to(directory_inputs, tar_1.first), input_dir
              new_inputs << input_dir
            else
              raise ParameterException, "Format of directory input '#{name}' not understood: #{Misc.fingerprint input}"
            end
          else
            new_inputs << input
          end
        end if @inputs

        @inputs = new_inputs if @inputs

        if @inputs && task.respond_to?(:inputs) && ! task.inputs.nil?
          info_inputs = @inputs.collect do |i| 
            if Path === i 
              i.to_s
            else 
              i 
            end
          end
          set_info :inputs, Misc.remove_long_items(Misc.zip2hash(task.inputs, info_inputs)) 
        end

        begin
          run_dependencies
        rescue Exception
          Open.rm pid_file if Open.exists?(pid_file)
          stop_dependencies
          raise $!
        end

        set_info :started, (start_time = Time.now)
        log :started, "Starting step #{Log.color :yellow, task_name}"

        config_keys_pre = Rbbt::Config::GOT_KEYS.dup
        begin

          result = _exec
        rescue Aborted, Interrupt
          log(:aborted, "Aborted")
          raise $!
        rescue Exception
          backtrace = $!.backtrace

          # HACK: This fixes an strange behaviour in 1.9.3 where some
          # backtrace strings are coded in ASCII-8BIT
          backtrace = backtrace.collect{|l| l.dup.force_encoding("UTF-8")} if String.instance_methods.include? :force_encoding
          set_info :backtrace, backtrace 
          log(:error, "#{$!.class}: #{$!.message}")
          stop_dependencies
          raise $!
        end

        if not no_load or ENV["RBBT_NO_STREAM"] == "true" 
          result = prepare_result result, @task.description, info if IO === result 
          result = prepare_result result.stream, @task.description, info if TSV::Dumper === result 
        end

        stream = case result
                 when IO
                   result
                 when TSV::Dumper
                   result.stream
                 end

        if stream
          log :streaming, "Streaming step #{Log.color :yellow, task_name.to_s || ""}"

          callback = Proc.new do
            if AbortedStream === stream
              if stream.exception
                raise stream.exception 
              else
                raise Aborted
              end
            end
            begin
              status = self.status
              if status != :done and status != :error and status != :aborted
                Misc.insist do
                  merge_info({
                    :done => (done_time = Time.now),
                    :total_time_elapsed => (total_time_elapsed = done_time - issue_time),
                    :time_elapsed => (time_elapsed = done_time - start_time),
                    :versions => Rbbt.versions
                  })
                  log :done, "Completed step #{Log.color :yellow, task_name.to_s || ""} in #{time_elapsed.to_i}+#{(total_time_elapsed - time_elapsed).to_i} sec."
                end
              end
            rescue
              Log.exception $!
            ensure
              Step.purge_stream_cache
              Open.rm pid_file if Open.exist?(pid_file)
            end
          end

          abort_callback = Proc.new do |exception|
            begin
              if exception
                self.exception exception
              else
                log :aborted, "#{Log.color :red, "Aborted"} step #{Log.color :yellow, task_name.to_s || ""}" if status == :streaming
              end
              _clean_finished
            rescue
              stop_dependencies
              Open.rm pid_file if Open.exist?(pid_file)
            end
          end

          ConcurrentStream.setup stream, :callback => callback, :abort_callback => abort_callback

          if AbortedStream === stream 
            exception = stream.exception || Aborted.new("Aborted stream: #{Misc.fingerprint stream}")
            self.exception exception
            _clean_finished
            raise exception
          end
        else
          merge_info({
            :done => (done_time = Time.now),
            :total_time_elapsed => (total_time_elapsed = done_time - issue_time),
            :time_elapsed => (time_elapsed = done_time - start_time),
            :versions => Rbbt.versions
          })
          log :ending
          Step.purge_stream_cache
        end

        set_info :dependencies, dependencies.collect{|dep| [dep.task_name, dep.name, dep.path]}

        config_keys = Rbbt::Config::GOT_KEYS[config_keys_pre.length..-1]
        set_info :config_keys, config_keys.uniq

        if result.nil? && File.exist?(self.tmp_path) && ! File.exist?(self.path)
          Open.mv self.tmp_path, self.path
        end
        Open.rm pid_file if Open.exist?(pid_file) unless stream
        result
      end # END PERSIST
      log :done, "Completed step #{Log.color :yellow, task_name.to_s || ""} in #{time_elapsed.to_i}+#{(total_time_elapsed - time_elapsed).to_i} sec." unless stream or time_elapsed.nil?

      if no_load
        @result ||= result
        self
      else
        Step.purge_stream_cache
        @result = prepare_result result, result_description
      end
    end # END SYNC
    res
  rescue DependencyError, DependencyRbbtException
    exception $!
  rescue LockInterrupted
    raise $!
  rescue Aborted, Interrupt
    abort
    stop_dependencies
    raise $!
  rescue Exception
    exception $!
    stop_dependencies
    raise $!
  ensure 
    no_load = false unless IO === result
    Open.rm pid_file if Open.exist?(pid_file) unless no_load
    #set_info :pid, nil unless no_load
  end
end

#run_compute_dependencies(type, list, dep_step = {}) ⇒ Object



250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
# File 'lib/rbbt/workflow/step/dependencies.rb', line 250

def run_compute_dependencies(type, list, dep_step = {})
  if Array === type
    type, *rest = type
  end

  canfail = (rest && rest.include?(:canfail)) || type == :canfail

  case type
  when :canfail
    list.each do |dep|
      begin
        dep.produce
      rescue RbbtException
        Log.warn "Allowing failing of #{dep.path}: #{dep.messages.last if dep.messages}"
      rescue Exception
        Log.warn "Not Allowing failing of #{dep.path} because #{$!.class} not RbbtException"
        raise $!
      end
      nil
    end
  when :produce, :no_dup
    list.each do |step|
      Misc.insist do
        begin
          step.produce
        rescue RbbtException
          raise $! unless canfail || step.canfail?
        rescue Exception
          step.exception $!
          if step.recoverable_error?
            raise $!
          else
            raise StopInsist.new($!)
          end
        end
      end
    end
    nil
  when :bootstrap
    cpus = rest.nil? ? nil : rest.first 

    if cpus.nil? 
      keys = ['bootstrap'] + list.collect{|d| [d.task_name, d.task_signature] }.flatten.uniq
      cpus = config('dep_cpus', *keys, :default => [5, list.length / 2].min)
    elsif Symbol === cpus
      cpus = config('dep_cpus', cpus, :default => [5, list.length / 2].min)
    end

    respawn = rest && rest.include?(:respawn)
    respawn = false if rest && rest.include?(:norespawn)
    respawn = rest && rest.include?(:always_respawn)
    respawn = :always if respawn.nil?

    Misc.bootstrap(list, cpus, :bar => "Bootstrapping dependencies for #{self.short_path} [#{cpus}]", :respawn => respawn) do |dep|
      begin
        Signal.trap(:INT) do
          dep.abort
          raise Aborted
        end

        Misc.insist do
          begin
            dep.produce 
            Log.warn "Error in bootstrap dependency #{dep.path}: #{dep.messages.last}" if dep.error? or dep.aborted?

          rescue Aborted
            ex = $!
            begin
              dep.abort
              Log.warn "Aborted bootstrap dependency #{dep.path}: #{dep.messages.last}" if dep.error? or dep.aborted?
            rescue
            end
            raise StopInsist.new(ex)

          rescue RbbtException
            if canfail || dep.canfail?
              Log.warn "Allowing failing of #{dep.path}: #{dep.messages.last}"
            else
              Log.warn "NOT Allowing failing of #{dep.path}: #{dep.messages.last}"
              dep.exception $!
              if dep.recoverable_error?
                begin
                  dep.abort
                rescue
                end
                raise $!
              else
                raise StopInsist.new($!)
              end
            end
          end
        end
      rescue
        dep.abort
        raise $!
      end
      nil
    end
  else
    list.each do |step|
      execute_and_dup(step, dep_step, false)
    end
  end
end

#run_dependenciesObject



383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
# File 'lib/rbbt/workflow/step/dependencies.rb', line 383

def run_dependencies

  rec_dependencies = self.rec_dependencies(true) + input_dependencies.reject{|d| d.started? }

  return if rec_dependencies.empty?

  all_deps = rec_dependencies + [self]

  compute_deps = rec_dependencies.collect do |dep|
    next unless ComputeDependency === dep
    dep.rec_dependencies + dep.inputs.flatten.select{|i| Step === i}
  end.compact.flatten.uniq

  canfail_paths = self.canfail_paths

  dep_step = {}
  seen_paths = Set.new
  all_deps.uniq.each do |step|
    next if seen_paths.include? step.path
    seen_paths << step.path

    begin
      Step.prepare_for_execution(step) unless step == self 
    rescue DependencyError, DependencyRbbtException
      raise $! unless canfail_paths.include? step.path
    end

    next unless step.dependencies and step.dependencies.any?

    # ToDo is this really necessary
    #(step.dependencies + step.input_dependencies).each do |step_dep|
    step.dependencies.each do |step_dep|
      next unless step.dependencies.include?(step_dep)
      next if step_dep.done? or step_dep.running? or 
        (ComputeDependency === step_dep and (step_dep.compute == :nodup or step_dep.compute == :ignore))
      dep_step[step_dep.path] ||= []
      dep_step[step_dep.path] << step
    end

  end

  produced = []
  (dependencies + input_dependencies).each do |dep|
    next if dep.started?
    next unless ComputeDependency === dep
    if dep.compute == :produce
      dep.produce 
      produced << dep.path
    end
  end

  self.dup_inputs

  required_dep_paths = []
  dep_step.each do |path,list|
    required_dep_paths << path if (list & dependencies).any?
  end

  required_dep_paths.concat dependencies.collect{|dep| dep.path}

  required_dep_paths.concat input_dependencies.collect{|dep| dep.path}

  required_dep_paths.concat(dependencies.collect do |dep| 
    [dep.path] + dep.input_dependencies
  end.flatten)


  pre_deps = []
  simple_dependencies = []
  compute_simple_dependencies = {}
  compute_last_deps = {}
  seen_paths = Set.new
  rec_dependencies.uniq.reverse.each do |step| 
    next if seen_paths.include? step.path
    seen_paths << step.path
    next unless required_dep_paths.include? step.path
    required_seen_paths = seen_paths & required_dep_paths

    inputs = step.inputs
    inputs = inputs.values if Hash === inputs
    internal = inputs.select{|i| i.respond_to?(:path) && required_seen_paths.include?(i.path) }.any?

    if ComputeDependency === step 
      next if produced.include? step.path
      if internal
        compute_last_deps[step.compute] ||= []
        compute_last_deps[step.compute] << step
      else
        compute_simple_dependencies[step.compute] ||= []
        compute_simple_dependencies[step.compute] << step
      end
    else
      if internal
        simple_dependencies << step
      else
        simple_dependencies.prepend(step)
      end
    end
  end

  log :dependencies, "Processing dependencies for #{Log.color :yellow, task_name.to_s || ""}" if compute_simple_dependencies.any? || simple_dependencies.any? || compute_last_deps.any?

  Log.debug "compute_simple_dependencies: #{Misc.fingerprint(compute_simple_dependencies)} - #{Log.color :blue, self.path}" if compute_simple_dependencies.any?
  compute_simple_dependencies.each do |type,list|
    run_compute_dependencies(type, list, dep_step)
  end

  Log.low "pre_deps: #{Misc.fingerprint(pre_deps)} - #{Log.color :blue, self.path}" if pre_deps.any?
  pre_deps.each do |step|
    next if compute_deps.include? step
    begin
      execute_and_dup(step, dep_step, false)
    rescue Exception
      raise $! unless canfail_paths.include?(step.path)
    end
  end

  Log.debug "simple_dependencies: #{Misc.fingerprint(simple_dependencies)} - #{Log.color :blue, self.path}" if simple_dependencies.any?
  simple_dependencies.each do |step|
    next if compute_deps.include? step
    begin Exception
      execute_and_dup(step, dep_step) 
    rescue 
      raise $! unless canfail_paths.include?(step.path)
    end
  end

  Log.low "compute_last_deps: #{Misc.fingerprint(compute_simple_dependencies)} - #{Log.color :blue, self.path}" if compute_simple_dependencies.any?
  compute_simple_dependencies.each do |type,list|
    run_compute_dependencies(type, list, dep_step)
  end

  dangling_deps = all_deps.reject{|dep| dep.done? || canfail_paths.include?(dep.path) }.
    select{|dep| dep.waiting? }

  Log.medium "Aborting (actually not) waiting dangling dependencies #{Misc.fingerprint dangling_deps}" if dangling_deps.any?
  #dangling_deps.each{|dep| dep.abort }

end

#running?Boolean

Returns:

  • (Boolean)


112
113
114
115
116
117
118
119
120
121
122
123
124
# File 'lib/rbbt/workflow/step/status.rb', line 112

def running? 
  return false if ! (started? || status == :ending)
  return nil unless Open.exist?(self.pid_file)
  pid = Open.read(self.pid_file).to_i

  return false if done? or error? or aborted? 

  if Misc.pid_exists?(pid) 
    pid
  else
    done? or error? or aborted? 
  end
end

#save_file(name, content) ⇒ Object



271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
# File 'lib/rbbt/workflow/step/accessor.rb', line 271

def save_file(name, content)
  content = case
            when String === content
              content
            when Array === content
              content * "\n"
            when TSV === content
              content.to_s
            when Hash === content
              content.collect{|*p| p * "\t"} * "\n"
            else
              content.to_s
            end
  Open.write(file(name), content)
end

#set_info(key, value) ⇒ Object



216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
# File 'lib/rbbt/workflow/step/accessor.rb', line 216

def set_info(key, value)
  return nil if @exec or info_file.nil?
  return nil if ! writable?
  value = Annotated.purge value if defined? Annotated
  Open.lock(info_file, :lock => info_lock) do
    i = info(false).dup
    value = Annotated.purge(value)

    i[key] = value 

    dump = Step.serialize_info(i)

    @info_cache = IndiferentHash.setup(i)
    Misc.sensiblewrite(info_file, dump, :force => true, :lock => false) if Open.exists?(info_file)
    @info_cache_time = Time.now
    value
  end
end

#short_pathObject



156
157
158
# File 'lib/rbbt/workflow/step/accessor.rb', line 156

def short_path
  [task_name, name] * "/"
end

#short_path_realObject



160
161
162
# File 'lib/rbbt/workflow/step/accessor.rb', line 160

def short_path_real
  [(Symbol === overriden ? overriden : task_name).to_s, name] * "/"
end

#soft_graceObject



704
705
706
707
708
709
# File 'lib/rbbt/workflow/step/run.rb', line 704

def soft_grace
  until done? or (Open.exist?(info_file) && info[:status] != :noinfo)
    sleep 1 
  end
  self
end

#stalled?Boolean

Returns:

  • (Boolean)


126
127
128
# File 'lib/rbbt/workflow/step/status.rb', line 126

def stalled?
  started? && ! (done? || running? || done? || error? || aborted?)
end

#started?Boolean

Returns:

  • (Boolean)


55
56
57
# File 'lib/rbbt/workflow/step/status.rb', line 55

def started?
  Open.exists?(path) or (Open.exists?(pid_file) && Open.exists?(info_file))
end

#statusObject



122
123
124
125
126
127
128
129
# File 'lib/rbbt/workflow/step/info.rb', line 122

def status
  begin
    info[:status]
  rescue Exception
    Log.error "Exception reading status: #{$!.message}" 
    :error
  end
end

#status=(status) ⇒ Object



131
132
133
# File 'lib/rbbt/workflow/step/info.rb', line 131

def status=(status)
  set_info(:status, status)
end

#status_lockObject



192
193
194
195
196
197
198
199
# File 'lib/rbbt/workflow/step/accessor.rb', line 192

def status_lock
  return @mutex
  #@status_lock = begin
  #               path = Persist.persistence_path(info_file + '.status.lock', {:dir => Step.lock_dir})
  #               Lockfile.new path, :refresh => false, :dont_use_lock_id => true
  #             end if @status_lock.nil?
  #@status_lock
end

#step(name) ⇒ Object



344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
# File 'lib/rbbt/workflow/step.rb', line 344

def step(name)
  @steps ||= {}
  @steps[name] ||= begin
                     name = name.to_sym
                     deps = rec_dependencies.select{|step| 
                       step.task_name && step.task_name.to_sym == name
                     }
                     raise "Dependency step not found: #{ name }" if deps.empty?
                     if (deps & self.dependencies).any?
                       (deps & self.dependencies).last
                     else
                       deps.last
                     end
                   end
end

#stop_dependenciesObject



523
524
525
526
527
528
529
530
531
532
533
534
535
536
# File 'lib/rbbt/workflow/step/dependencies.rb', line 523

def stop_dependencies
  return if dependencies.nil?
  dependencies.each do |dep|
    if dep.nil?
      Log.warn "Dependency is nil #{Misc.fingerprint step} -- #{Misc.fingerprint dependencies}"
      next
    end

    next if dep.done? or dep.aborted?

    dep.abort if dep.running?
  end
  kill_children
end

#streaming?Boolean

Returns:

  • (Boolean)


104
105
106
# File 'lib/rbbt/workflow/step/status.rb', line 104

def streaming?
  (IO === @result) or (not @saved_stream.nil?) or status == :streaming
end

#task_signatureObject



169
170
171
# File 'lib/rbbt/workflow/step/accessor.rb', line 169

def task_signature
  [workflow.to_s, task_name] * "#"
end

#tmp_pathObject



256
257
258
# File 'lib/rbbt/workflow/step/accessor.rb', line 256

def tmp_path
  @tmp_path ||= Step.tmp_path path
end

#updatable?Boolean

Returns:

  • (Boolean)


120
121
122
123
124
125
126
# File 'lib/rbbt/workflow/step/run.rb', line 120

def updatable?
  return true if ENV["RBBT_UPDATE_ALL_JOBS"] == 'true'
  return false unless ENV["RBBT_UPDATE"] == "true"
  return false unless Open.exists?(info_file)
  return true if status != :noinfo && ! (relocated? && done?)
  false
end

#updateObject



325
326
327
328
329
330
# File 'lib/rbbt/workflow/step.rb', line 325

def update
  if dirty?
    dependencies.collect{|d| d.update } if dependencies
    clean
  end
end

#updated?Boolean

Returns:

  • (Boolean)


196
197
198
199
200
201
# File 'lib/rbbt/workflow/step/run.rb', line 196

def updated?
  return true if ENV["RBBT_UPDATE"] != "true"
  return true unless (done? || error? || ! writable?)

  @updated ||= out_of_date.empty?
end

#waiting?Boolean

Returns:

  • (Boolean)


59
60
61
# File 'lib/rbbt/workflow/step/status.rb', line 59

def waiting?
  Open.exists?(info_file) and not started?
end

#workflow_short_pathObject



164
165
166
167
# File 'lib/rbbt/workflow/step/accessor.rb', line 164

def workflow_short_path
  return short_path unless workflow
  workflow.to_s + "#" + short_path
end

#writable?Boolean

Returns:

  • (Boolean)


332
333
334
# File 'lib/rbbt/workflow/step.rb', line 332

def writable?
  Open.writable?(self.path) && Open.writable?(self.info_file)
end