Class: Jkr::DataUtils

Inherits:
Object
  • Object
show all
Defined in:
lib/jkr/userutils.rb

Constant Summary collapse

BLOCKSIZE =

256MB

268435456

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.read_blockseq(io_or_filepath, separator = "\n\n", &proc) ⇒ Object



24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# File 'lib/jkr/userutils.rb', line 24

def self.read_blockseq(io_or_filepath, separator = "\n\n", &proc)
  file = io_or_filepath
  if ! io_or_filepath.is_a? IO
    file = File.open(io_or_filepath, "r")
  end
  proc ||= lambda do |blockstr|
    unless blockstr.strip.empty?
      blockstr.split
    else
      nil
    end
  end

  #result = []
  #bufstr = ""
  #while ! file.eof?
  #  bufstr += file.read(BLOCKSIZE)
  #  blocks = bufstr.split(separator)
  #  bufstr = blocks.pop
  #  blocks.each do |block|
  #    ret = proc.call(block)
  #    result.push(ret) if ret
  #  end
  #end
  #ret = proc.call(bufstr)
  #result.push(ret) if ret
  result = file.read.split(separator).map do |x|
    proc.call(x)
  end.compact

  result
end

.read_csv(io_or_filepath, fs = ",", rs = nil, &proc) ⇒ Object



332
333
334
335
336
337
338
339
340
341
342
343
344
# File 'lib/jkr/userutils.rb', line 332

def self.read_csv(io_or_filepath, fs = ",", rs = nil, &proc)
  if io_or_filepath.is_a?(String) && File.exists?(io_or_filepath)
    io_or_filepath = File.open(io_or_filepath, "r")
  end

  result = []
  proc ||= lambda{|row| row}
  CSV.parse(io_or_filepath).each do |row|
    ret = proc.call(row)
    result.push ret if ret
  end
  result
end

.read_iostat(io_or_filepath, &block) ⇒ Object

This function parses io_or_filepath as an iostat log and returns the parsed result.

block

If given, invoked for each iostat record like block.call(t, record) t … wallclock time of the record record … e.g. => {“rrqm/s” => 0.0, …, …}



249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
# File 'lib/jkr/userutils.rb', line 249

def self.read_iostat(io_or_filepath, &block)
  hostname = `hostname`.strip
  date = nil
  last_time = nil
  sysname_regex = Regexp.new(Regexp.quote("#{`uname -s`.strip}"))
  self.read_blockseq(io_or_filepath) do |blockstr|
    if blockstr =~ sysname_regex
      # the first line
      if blockstr =~ /(\d{2})\/(\d{2})\/(\d{2,4})/
        if $~[3].size == 2
          y = $~[3].to_i + 2000
        else
          y = $~[3].to_i
        end
        m = $~[1].to_i
        d = $~[2].to_i
        date = Date.new(y, m, d)
        next
      end
    else
      rows = blockstr.lines.map(&:strip)
      timestamp = rows.shift
      time = nil
      unless date
        raise RuntimeError.new("Cannot detect date: #{io_or_filepath}")
      end

      if timestamp =~ /(\d{2})\/(\d{2})\/(\d{2}) (\d{2}):(\d{2}):(\d{2})/
        y = $~[3].to_i; m = $~[1].to_i; d = $~[2].to_i
        time = Time.local(date.year, date.month, date.day, $~[4].to_i, $~[5].to_i, $~[6].to_i)
      elsif date && timestamp =~ /Time: (\d{2}):(\d{2}):(\d{2}) (AM|PM)/
        if $~[4] == "PM"
          hour = $~[1].to_i
          if $~[1].to_i != 12
            hour += 12
          end
        elsif $~[4] == "AM" && $~[1].to_i == 12
          hour = 0
        else
          hour = $~[1].to_i
        end
        time = Time.local(date.year, date.month, date.day,
                          hour, $~[2].to_i, $~[3].to_i)
      elsif date && timestamp =~ /Time: (\d{2}):(\d{2}):(\d{2})/
        time = Time.local(date.year, date.month, date.day,
                          $~[1].to_i, $~[2].to_i, $~[3].to_i)
      end

      unless time
        unless date
          raise StandardError.new("Cannot find date in your iostat log: #{io_or_filepath}")
        end
        raise StandardError.new("Cannot find timestamp in your iostat log: #{io_or_filepath}")
      end

      labels = rows.shift.split
      unless labels.shift =~ /Device:/
        raise StandardError.new("Invalid iostat log: #{io_or_filepath}")
      end

      record = Hash.new
      rows.each do |row|
        vals = row.split
        dev = vals.shift
        unless vals.size == labels.size
          raise StandardError.new("Invalid iostat log: #{io_or_filepath}")
        end
        record_item = Hash.new
        labels.each do |label|
          record_item[label] = vals.shift.to_f
        end
        record[dev] = record_item
      end

      if block.is_a? Proc
        block.call(time, record)
      end

      [time, record]
    end
  end
end

.read_mpstat(io_or_filepath) ⇒ Object

Format of returned value [

  :time => <Time>,
  :labels => [:cpu, :usr, :nice, :sys, :iowait, :irq, ...],
  :data => [{:cpu => "all", :usr => 0.11, :nice => 0.00, ...],
            {:cpu => 0, :usr => 0.12, :nice => 0.00, ...,
             ...]
},
...]


159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
# File 'lib/jkr/userutils.rb', line 159

def self.read_mpstat(io_or_filepath)
  hostname = `hostname`.strip
  
  date = nil
  last_time = nil
  self.read_blockseq(io_or_filepath) do |blockstr|
    if blockstr =~ /Linux/ && blockstr =~ /(\d{2})\/(\d{2})\/(\d{2})/
      # the first line
      y = $~[3].to_i; m = $~[1].to_i; d = $~[2].to_i
      date = Date.new(2000 + y, m, d)
      next
    else
      # it's a data block, maybe
      unless date
        $stderr.puts "Cannot find date in your mpstat log. It was assumed today."
        date = Date.today
      end

      result = Hash.new
      rows = blockstr.lines.map(&:strip)
      header = rows.shift.split
      next if header.shift =~ /Average/
      if header.first =~ /\AAM|PM\Z/
        header.shift
      end
      result[:labels] = header.map do |label|
        {
          "CPU" => :cpu, "%usr" => :usr, "%user" => :usr,
          "%nice" => :nice, "%sys" => :sys, "%iowait" => :iowait,
          "%irq" => :irq, "%soft" => :soft, "%steal" => :steal,
          "%guest" => :guest, "%idle" => :idle
        }[label] || label
      end
      time = nil
      result[:data] = rows.map { |row|
        vals = row.split
        wallclock = vals.shift
        if vals.first =~ /\AAM|PM\Z/
          vals.shift
        end

        unless time
          unless wallclock =~ /(\d{2}):(\d{2}):(\d{2})/
            raise RuntimeError.new("Cannot extract wallclock time from mpstat data")
          end
          time = Time.local(date.year, date.month, date.day,
                            $~[1].to_i, $~[2].to_i, $~[3].to_i)
          if last_time && time < last_time
            date += 1
            time = Time.local(date.year, date.month, date.day,
                              $~[1].to_i, $~[2].to_i, $~[3].to_i)
          end
          result[:time] = time
          last_time = time
        end
        if vals.size != result[:labels].size
          raise RuntimeError.new("Invalid mpstat data")
        end

        record = Hash.new
        vals.each_with_index{|val, idx|
          label = result[:labels][idx]
          val = if val =~ /\A\d+\Z/
                  val.to_i
                else
                  begin
                    Float(val)
                  rescue ArgumentError
                    val
                  end
                end
          record[label] = val
        }

        record
      }
      result
    end
  end
end

.read_mpstat_avg(io_or_filepath) ⇒ Object



122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# File 'lib/jkr/userutils.rb', line 122

def self.read_mpstat_avg(io_or_filepath)
  self.read_blockseq(io_or_filepath){|blockstr|
    if blockstr =~ /^Average:/
      result = Hash.new
      rows = blockstr.lines.map(&:strip)
      header = rows.shift.split
      header.shift
      result[:labels] = header
      result[:data] = rows.map { |row|
        vals = row.split
        vals.shift
        if vals.size != result[:labels].size
          raise RuntimeError.new("Invalid mpstat data")
        end
        vals.map{|val|
          begin
            Float(val)
          rescue ArgumentError
            val
          end
        }
      }
      result
    end
  }.last
end

.read_rowseq(io_or_filepath, &block) ⇒ Object



57
58
59
# File 'lib/jkr/userutils.rb', line 57

def self.read_rowseq(io_or_filepath, &block)
  self.read_blockseq(io_or_filepath, "\n", &block)
end

.read_sar(sar_filepath) ⇒ Object



61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# File 'lib/jkr/userutils.rb', line 61

def self.read_sar(sar_filepath)
  labels = nil
  date = nil
  last_time = nil
  idx = 0
  self.read_rowseq(sar_filepath){|rowstr|
    if rowstr =~ /^Linux/ && rowstr =~ /(\d{2})\/(\d{2})\/(\d{2})/
      y = $~[3].to_i; m = $~[1].to_i; d = $~[2].to_i
      date = Date.new(2000 + y, m, d)
      next
    else
      row = Hash.new

      time, *vals = rowstr.split

      if vals.size == 0
        next
      end
      if vals.every?{|val| val =~ /\A\d+(?:\.\d+)?\Z/ }
        vals = vals.map(&:to_f)
      else
        # label line
        labels = vals
        next
      end

      unless date
        raise StandardError.new("cannot find date information in sar log")
      end
      unless labels
        raise StandardError.new("no label information")
      end

      unless time =~ /(\d{2}):(\d{2}):(\d{2})/
        if time =~ /Average/
          next
        end
        raise StandardError.new("Invalid time format: #{time}")
      else
        time = Time.local(date.year, date.month, date.day,
                          $~[1].to_i, $~[2].to_i, $~[3].to_i)
        if last_time && time < last_time
          date += 1
          time = Time.local(date.year, date.month, date.day,
                            $~[1].to_i, $~[2].to_i, $~[3].to_i)
        end

        row[:time] = time
        row[:data] = Hash.new
        labels.each_with_index do |label,idx|
          row[:data][label] = vals[idx]
        end
        row[:labels] = labels

        last_time = time
      end
    end
    row
  }
end

.read_top(io_or_filepath, opt = {}, &proc) ⇒ Object



347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
# File 'lib/jkr/userutils.rb', line 347

def read_top(io_or_filepath, opt = {}, &proc)
  opt[:start_time] ||= Time.now

  def block_filter(filter, block)
    if filter.is_a? Symbol
      filters = {
        :kernel_process => /\A(kworker|ksoftirqd|migration|watchdog|kintegrityd|kblockd|events|kondemand|aio|crypto|ata|kmpathd|kconservative|rpciod|xfslogd|xfsdatad|xfsconvertd)\//
      }
      raise ArgumentError.new("Invalid filter: #{filter.inspect}") unless filters[filter]
      filter = filters[filter]
    elsif filter.is_a? Regexp
      filter = filter
    else
      raise ArgumentError.new("Invalid filter: #{filter.inspect}")
    end

    block = block.select do |record|
      ! (record[:command] =~ filter)
    end
  end

  def parse_block(block, opt)
    y = opt[:start_time].year
    m = opt[:start_time].month
    d = opt[:start_time].day

    lines = block.lines.map(&:strip)
    head_line = lines.shift

    unless head_line =~ /(\d{2}):(\d{2}):(\d{2})/
      raise ArgumentError.new("Invalid top(3) data")
    end
    time = Time.local(y, m, d, $~[1].to_i, $~[2].to_i, $~[3].to_i)

    while ! (lines[0] =~ /\APID/)
      line = lines.shift
    end
    labels = lines.shift.split.map do |key|
      {"PID" => :pid, "USER" => :user, "PR" => :pr, "NI" => :ni,
        "VIRT" => :virt, "RES" => :res, "SHR" => :shr, "S" => :s,
        "%CPU" => :cpu, "%MEM" => :mem, "TIME+" => :time_plus,
        "COMMAND" => :command}[key] || key
    end

    lines = lines.select{|line| ! line.empty?}
    records = lines.map do |line|
      record = Hash.new
      record[:time] = time
      line.split.each_with_index do |val, idx|
        key = labels[idx]
        if val =~ /\A(\d+)([mg]?)\Z/
          record[key] = Integer($~[1])
          if ! $~[2].empty?
            record[key] *= {'g' => 2**20, 'm' => 2**10}[$~[2]]
          end
        elsif val =~ /\A(\d+\.\d+)([mg]?)\Z/
          record[key] = Float($~[1])
          if ! $~[2].empty?
            record[key] *= {'g' => 2**20, 'm' => 2**10}[$~[2]]
          end
        elsif val =~ /\A(\d+):(\d+\.\d+)\Z/
          record[key] = Integer($~[1])*60 + Float($~[2])
        else
          record[key] = val
        end
      end

      record
    end

    if opt[:filter]
      records = block_filter(opt[:filter], records)
    end

    if opt[:top_k]
      records = records.first(opt[:top_k])
    end

    records
  end

  File.open(io_or_filepath, "r").read.split("\n\n\n").map do |block|
    parse_block(block, opt)
  end
end

Instance Method Details

#block_filter(filter, block) ⇒ Object



350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
# File 'lib/jkr/userutils.rb', line 350

def block_filter(filter, block)
  if filter.is_a? Symbol
    filters = {
      :kernel_process => /\A(kworker|ksoftirqd|migration|watchdog|kintegrityd|kblockd|events|kondemand|aio|crypto|ata|kmpathd|kconservative|rpciod|xfslogd|xfsdatad|xfsconvertd)\//
    }
    raise ArgumentError.new("Invalid filter: #{filter.inspect}") unless filters[filter]
    filter = filters[filter]
  elsif filter.is_a? Regexp
    filter = filter
  else
    raise ArgumentError.new("Invalid filter: #{filter.inspect}")
  end

  block = block.select do |record|
    ! (record[:command] =~ filter)
  end
end

#parse_block(block, opt) ⇒ Object



368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
# File 'lib/jkr/userutils.rb', line 368

def parse_block(block, opt)
  y = opt[:start_time].year
  m = opt[:start_time].month
  d = opt[:start_time].day

  lines = block.lines.map(&:strip)
  head_line = lines.shift

  unless head_line =~ /(\d{2}):(\d{2}):(\d{2})/
    raise ArgumentError.new("Invalid top(3) data")
  end
  time = Time.local(y, m, d, $~[1].to_i, $~[2].to_i, $~[3].to_i)

  while ! (lines[0] =~ /\APID/)
    line = lines.shift
  end
  labels = lines.shift.split.map do |key|
    {"PID" => :pid, "USER" => :user, "PR" => :pr, "NI" => :ni,
      "VIRT" => :virt, "RES" => :res, "SHR" => :shr, "S" => :s,
      "%CPU" => :cpu, "%MEM" => :mem, "TIME+" => :time_plus,
      "COMMAND" => :command}[key] || key
  end

  lines = lines.select{|line| ! line.empty?}
  records = lines.map do |line|
    record = Hash.new
    record[:time] = time
    line.split.each_with_index do |val, idx|
      key = labels[idx]
      if val =~ /\A(\d+)([mg]?)\Z/
        record[key] = Integer($~[1])
        if ! $~[2].empty?
          record[key] *= {'g' => 2**20, 'm' => 2**10}[$~[2]]
        end
      elsif val =~ /\A(\d+\.\d+)([mg]?)\Z/
        record[key] = Float($~[1])
        if ! $~[2].empty?
          record[key] *= {'g' => 2**20, 'm' => 2**10}[$~[2]]
        end
      elsif val =~ /\A(\d+):(\d+\.\d+)\Z/
        record[key] = Integer($~[1])*60 + Float($~[2])
      else
        record[key] = val
      end
    end

    record
  end

  if opt[:filter]
    records = block_filter(opt[:filter], records)
  end

  if opt[:top_k]
    records = records.first(opt[:top_k])
  end

  records
end