Class: Readorder::Commands::Test

Inherits:
Readorder::Command show all
Defined in:
lib/readorder/commands/test.rb

Overview

Test reading all the contents of a subset of the files and report summary information on how long it takes to read the files given different reading orders.

Instance Attribute Summary

Attributes inherited from Readorder::Command

#analyzer, #filelist, #options, #output

Instance Method Summary collapse

Methods inherited from Readorder::Command

#after, #command_name, command_name, commands, #error, find, #get_physical?, inherited, #initialize, #logger, #results, #results_dbfile, #shutdown

Constructor Details

This class inherits a constructor from Readorder::Command

Instance Method Details

#beforeObject

call-seq:

test.before -> nil

Part of the Command lifecycle. In the Test command this make sure we are on a Linux machine and running as root.



18
19
20
21
22
23
24
25
26
# File 'lib/readorder/commands/test.rb', line 18

def before
  super
  if not Datum.is_linux? then
    raise Error, "Only able to perform testing on linux.  I know how to dump the file sysem cache there."
  end
  if Process.euid != 0 then 
    raise Error, "Must be root to perform testing."
  end
end

#drop_cachesObject

call-seq:

test.drop_caches -> nil

Drop the caches on a linux filesystem.

See proc(5) and /proc/sys/vm/drop_caches



198
199
200
201
202
203
204
205
# File 'lib/readorder/commands/test.rb', line 198

def drop_caches
  # old habits die hard
  logger.info "  dropping caches"
  3.times { %x[ /bin/sync ] }
  File.open( "/proc/sys/vm/drop_caches", "w" ) do |f|
    f.puts 3
  end
end

#dump_to_dev_null(datum) ⇒ Object

call-seq:

test.dump_to_dev_null( Datum ) -> Integer

Write the contents of the file info in Datum to /dev/null and return the number of bytes written.



214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
# File 'lib/readorder/commands/test.rb', line 214

def dump_to_dev_null( datum )
  bytes = 0
  File.open( "/dev/null", "w+" ) do |writer|
    File.open( datum.filename, "r") do |reader|
      chunk_size = datum.stat.blksize || 4096 
      buf = String.new  
      loop do
        begin
          r = reader.sysread( chunk_size, buf )
          bytes += writer.write( r )
        rescue => e
          break
        end
      end
    end
  end
  return bytes
end

#first_of(data) ⇒ Object

call-seq:

test.first_of( Filelist ) -> Filelist

Use the percentage option to take the first percentage of the input Filelist and return a new Filelist object continaing that subjset.



35
36
37
38
39
40
41
42
43
# File 'lib/readorder/commands/test.rb', line 35

def first_of( data ) 
  percentage = options['percentage']
  logger.info "gathering the first #{percentage}% of the data"
  lines = []
  data.each_line { |l| lines << l.strip }
  max_index = ( lines.size.to_f * ( percentage.to_f / 100.0  ) ).ceil
  subset = lines[0..max_index]
  return Filelist.new( StringIO.new( subset.join("\n") ) )
end

#report_results(timings) ⇒ Object

call-seq:

test.report_results( results ) -> nil

Write the report of the timings to output



140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
# File 'lib/readorder/commands/test.rb', line 140

def report_results( timings )
  t = timings.first
  output.puts 
  output.puts "  Total files read : #{"%12d" % t.value_stats.count}"
  output.puts "  Total bytes read : #{"%12d" % t.value_stats.sum}"
  output.puts "  Minimum filesize : #{"%12d" % t.value_stats.min}"
  output.puts "  Average filesize : #{"%16.3f" % t.value_stats.mean}"
  output.puts "  Maximum filesize : #{"%12d" % t.value_stats.max}"
  output.puts "  Stddev of sizes  : #{"%16.3f" % t.value_stats.stddev}"
  output.puts

  output.puts ["%28s" % "read order", "%20s" % "Elapsed time (sec)", "%22s" % "Read rate (bytes/sec)" ].join(" ")
  output.puts "-" * 72
  timings.each do |timing|
    p = [ ]
    p << "%28s" % timing.name
    p << "%20.3f" % timing.timed_stats.sum
    p << "%22.3f" % timing.rate
    output.puts p.join(" ")
  end
  output.puts
end

#runObject

call-seq:

test.run -> nil

Part of the Command lifecycle.



74
75
76
77
# File 'lib/readorder/commands/test.rb', line 74

def run
  test_using_random_sample
  test_using_first_of
end

#run_test(test_name, data) ⇒ Object

call-seq:

test.run_test( 'original', [ Datum, Dataum, ... ]) -> Hitimes::TimedValueMetric

Loop over all the Datum instances in the array and read the contents of the file dumping them to /dev/null. Timings of this process are recorded an a Hitimes::TimedValueMetric is returned which holds the results.



171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
# File 'lib/readorder/commands/test.rb', line 171

def run_test( test_name, data )
  logger.info "running #{test_name} test on #{data.size} files"
  self.drop_caches
  timer = ::Hitimes::TimedValueMetric.new( test_name )
  logger.info "  begin test"
  data.each do |d|
    timer.start
    bytes = dump_to_dev_null( d )
    timer.stop( bytes )

    if timer.timed_stats.count % 10_000 == 0 then
      logger.info "  processed #{timer.count} at #{"%0.3f" % timer.rate} bytes/sec"
    end
  end
  logger.info "  end test"
  logger.info "  processed #{timer.timed_stats.count} at #{"%0.3f" % timer.rate} bytes/sec"
  return timer
end

#sample_from(data) ⇒ Object

call-seq:

test.sample_from( Filelist ) -> Filelist

Use the percentage option to take a random subsampling of data from the input Filelist and return an new Filelist object containing that subset.



53
54
55
56
57
58
59
60
61
62
63
64
65
66
# File 'lib/readorder/commands/test.rb', line 53

def sample_from( data )
  logger.info "sampling a random #{options['percentage']}% of the data"
  samples = []
  total = 0
  fraction = options['percentage'] / 100.0
  data.each_line do |l|
    total += 1
    if rand < fraction
      samples << l.strip
    end
  end
  logger.info "sampled #{samples.size} of #{total}"
  return Filelist.new( StringIO.new( samples.join("\n") ) )
end

#test_using_first_ofObject

call-seq:

test.test_using_first_of

Run the full test using a the first percentage of the original Filelist



102
103
104
105
106
107
108
109
# File 'lib/readorder/commands/test.rb', line 102

def test_using_first_of
  @filelist = nil
  sublist = first_of( self.filelist ) 
  results = test_using_sublist( sublist )
  output.puts "Test Using First Of".center(72)
  output.puts "=" * 72
  report_results( results )
end

#test_using_random_sampleObject

call-seq:

test.test_using_random_sample

Run the full test using a random subsample of the original Filelist



85
86
87
88
89
90
91
92
93
# File 'lib/readorder/commands/test.rb', line 85

def test_using_random_sample
  @filelist = nil
  sublist = sample_from( self.filelist ) 
  results = test_using_sublist( sublist )
  output.puts "Test Using Random Sample".center(72)
  output.puts "=" * 72
  report_results( results )

end

#test_using_sublist(sublist) ⇒ Object

call-seq:

test.test_using_sublist( Filelist ) -> Array of TimedValueMetric

given a Filielist of messages run the whole test on them all



117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# File 'lib/readorder/commands/test.rb', line 117

def test_using_sublist( sublist )
  analyzer = Analyzer.new( sublist )
  analyzer.collect_data
  results = []

  %w[ original_order inode_number first_physical_block_number ].each do |order|
    logger.info "ordering #{analyzer.good_data.size} samples by #{order}"
    tree = ::MultiRBTree.new
    analyzer.good_data.each do |s|
      rank = s.send( order )
      tree[rank] = s
    end
    results << run_test( order, tree.values )
  end
  return results
end