Class: Cnvrg::Helpers::Agent

Inherits:
Object
  • Object
show all
Defined in:
lib/cnvrg/helpers/agent.rb

Defined Under Namespace

Modules: LogLevel, Status

Instance Method Summary collapse

Constructor Details

#initialize(executer: nil, slug: nil, command: nil, container_name: nil, send_log_interval: 60, timeout: -1,, logs_regex: [], async: false, send_logs: false, files_exist: [], retries: 0, sleep_before_retry: 30, single_quotes: false, docker_user: nil, use_bash: false, **kwargs) ⇒ Agent

This class represent a single command in the system. it runs under an executer (machine_activity) so it should have all the executer params


19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# File 'lib/cnvrg/helpers/agent.rb', line 19

def initialize(executer: nil, slug: nil, command: nil, container_name: nil, send_log_interval: 60, timeout: -1, logs_regex: [], async: false, send_logs: false, files_exist: [], retries: 0, sleep_before_retry: 30, single_quotes: false, docker_user: nil, use_bash: false, **kwargs)
  @executer = executer
  @job_id = ENV["CNVRG_JOB_ID"]
  @slug = slug
  @files_exist = files_exist
  @container_name = container_name
  @execute_command_completed = false
  @is_new_main = !ENV["MAIN_CONTAINER_PORT"].blank?
  @main_name = @is_new_main ? "main" : "slave"
  @run_in_main = @container_name.downcase == @main_name
  @log_interval = send_log_interval
  # https://ruby-doc.org/stdlib-2.5.1/libdoc/timeout/rdoc/Timeout.html timeout should be 0 for running forever
  if timeout.blank? or timeout.negative?
    @timeout = 0
  else
    @timeout = timeout
  end
  @logs_regex = logs_regex || []
  @async = async
  @command = command
  @send_logs = send_logs
  @retries = retries.try(:to_i) ## How many times the user asked to try to execute the command again
  @sleep_before_retry = sleep_before_retry
  @real_execution_retries = 0 ## How many times the command really executed until success
  @single_quotes = single_quotes
  @docker_user = docker_user
  @use_bash = use_bash
  @output = []
  @errors = []
  @exit_status = nil
  @is_running = true
  @pid = nil
end

Instance Method Details

#base_urlObject


53
54
55
# File 'lib/cnvrg/helpers/agent.rb', line 53

def base_url
  [@executer.activity_url, "commands", @slug].join("/")
end

#exec!Object


69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# File 'lib/cnvrg/helpers/agent.rb', line 69

def exec!
  log_internal("Command: #{@command} with slug: #{@slug} started!")
  command_status = Status::FINISHED
  if @command.blank?
    @exit_status = 0
    command_status = Status::ABORTED
  elsif should_run?
    send_logs(status: Status::STARTED)
    periodic_thread_handle = periodic_thread
    execute_command
  else
    command_status = Status::ABORTED
    @exit_status = 127
  end
  @execute_command_completed = true
  finish_log = "Command: #{@command} with slug: #{@slug} finished"
  finish_log += " after #{@real_execution_retries} retries" if @real_execution_retries > 0
  log_internal(finish_log)
  send_logs(exit_status: @exit_status, status: command_status)
  if periodic_thread_handle.present?
    periodic_thread_handle.join
  end
end

#execute_commandObject


152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
# File 'lib/cnvrg/helpers/agent.rb', line 152

def execute_command
  return execute_command_on_slave if @run_in_main
  Timeout.timeout(@timeout) do
    PTY.spawn(@command) do |stdout, stdin, pid, stderr|
      @pid = pid
      begin
        if stdout.present?
          stdout.each do |line|
            log_internal(line, level: LogLevel::INFO)
            line = line.strip.gsub(/\e\[([;\d]+)?m/, '')
            @output << {log: line, timestamp: Time.now}
          end
        end

        if stderr.present?
          stderr.each do |line|
            line = line.strip.gsub(/\e\[([;\d]+)?m/, '')
            log_internal(line, level: LogLevel::ERROR)
            @errors << {log: line, timestamp: Time.now}
          end
        end
      rescue Errno::EIO => e
        next
      rescue => e
        log_internal(e.message, level: LogLevel::ERROR)
        log_internal(e.backtrace.join("\n"), level: LogLevel::ERROR)
        @errors << {log: e.message, timestamp: Time.now}
      end
      ::Process.wait pid
    end
  end
  @exit_status = $?.exitstatus
rescue NoMethodError => e
  log_internal("No Method Error: #{e}", level: LogLevel::ERROR)
  @exit_status = 129
rescue Timeout::Error
  Process.kill(0, @pid)
  @errors << {log: "Command timed out!", timestamp: Time.now}
  log_internal("Command timed out!", level: LogLevel::ERROR)
  @exit_status = 124
rescue => e
  log_internal("Error: #{e}", level: LogLevel::ERROR)
  @exit_status = 129
ensure
  retry_command if @retries != 0 and @exit_status !=0
  @exit_status
end

#execute_command_on_slaveObject


117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# File 'lib/cnvrg/helpers/agent.rb', line 117

def execute_command_on_slave
  extra_slug = (0...2).map { (65 + rand(26)).chr }.join
  result_file = "/conf/result-#{@slug}-#{extra_slug}"
  Timeout.timeout(@timeout) do
    data = {cmd: @command, async: true, file_name: result_file, use_script: true, use_bash: @use_bash, use_sh: !@use_bash, docker_user: @docker_user}
    conn = Cnvrg::Helpers::Executer.get_main_conn
    response = conn.post('command', data.to_json)
    if response.to_hash[:status].to_i != 200
      @exit_status = 129
      raise StandardError.new("Cant send command to slave")
    end
    t = FileWatch::Tail.new
    filename = result_file
    t.tail(filename)
    t.subscribe do |path, line|
      if line.include?("cnvrg-exit-code")
        @exit_status = line.split("=")[1].to_i
        break
      end
      if !@is_new_main
        log_internal(line, level: LogLevel::PURE)
      end
      line = line.encode('utf-8', :invalid => :replace, :undef => :replace, :replace => '_')
      @output << {log: line, timestamp: Time.now}
    end
  end
rescue Timeout::Error
  @errors << {log: "Command timed out!", timestamp: Time.now}
  log_internal("Command timed out!", level: LogLevel::ERROR)
  @exit_status = 124
ensure
  retry_command if @retries != 0 and @exit_status !=0
  @exit_status
end

#get_logs_to_sendObject


93
94
95
96
97
# File 'lib/cnvrg/helpers/agent.rb', line 93

def get_logs_to_send
  new_logs = @output.pop(@output.length)
  new_errors = @errors.pop(@errors.length)
  [new_logs, new_errors]
end

#periodic_threadObject


100
101
102
103
104
105
106
107
108
# File 'lib/cnvrg/helpers/agent.rb', line 100

def periodic_thread
  Thread.new do
      while !@execute_command_completed
        Thread.exit if @log_interval.blank?
        sleep(@log_interval)
        send_logs
      end
  end
end

#retry_commandObject


110
111
112
113
114
115
# File 'lib/cnvrg/helpers/agent.rb', line 110

def retry_command
  @retries -=1
  sleep @sleep_before_retry
  @real_execution_retries +=1
  execute_command
end

#should_run?Boolean

Returns:

  • (Boolean)

57
58
59
60
61
62
63
64
65
66
# File 'lib/cnvrg/helpers/agent.rb', line 57

def should_run?
  if @files_exist.present?
    file_doesnt_exists = @files_exist.find do |file|
      not File.exists? file
    end
    return true if file_doesnt_exists.blank?
    return false
  end
  true
end