Class: Readorder::Analyzer

Inherits:
Object
  • Object
show all
Defined in:
lib/readorder/analyzer.rb

Overview

Use the given Filelist and traverse all the file collecting the appropriate Datum instances

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(filelist, results, get_physical = true) ⇒ Analyzer

Initialize the Analyzer with the Filelist object and whether or not to gather the physical block size.



23
24
25
26
27
28
29
30
31
# File 'lib/readorder/analyzer.rb', line 23

def initialize( filelist, results, get_physical = true )
  @filelist          = filelist
  @get_physical      = get_physical
  @size_metric       = ::Hitimes::ValueMetric.new( 'size' )
  @time_metric       = ::Hitimes::TimedMetric.new( 'time' )
  @results           = results
  @bad_data_count    = 0
  @good_data_count   = 0
end

Instance Attribute Details

#bad_data_countObject

number of bad_data items encountered



11
12
13
# File 'lib/readorder/analyzer.rb', line 11

def bad_data_count
  @bad_data_count
end

#good_data_countObject

number of good_data items encountered



14
15
16
# File 'lib/readorder/analyzer.rb', line 14

def good_data_count
  @good_data_count
end

#resultsObject

The Results handler



17
18
19
# File 'lib/readorder/analyzer.rb', line 17

def results
  @results
end

Instance Method Details

#collect_dataObject

call-seq:

analyzer.collect_data -> nil

Run data collections over the Filelist and store the results into good_data or bad_data as appropriate. A status message is written to the log every 10,000 files processed



51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# File 'lib/readorder/analyzer.rb', line 51

def collect_data
  logger.info "Begin data collection"
  original_order = 0
  @filelist.each_line do |fname|
    next if @results.has_datum_for_filename?( fname )
    logger.debug "  analyzing #{fname.strip}"
    @time_metric.measure do
      d = Datum.new( fname )
      begin
        d.collect( @get_physical )
        d.original_order = original_order

        @results.add_datum( d )

        if d.valid? then
          @size_metric.measure d.stat.size
          @good_data_count += 1
        else
          @bad_data_count += 1
        end
      rescue => e
        logger.error "#{e} : #{d.to_hash.inspect}"
      end
    end

    if @time_metric.count % 10_000 == 0 then
      logger.info "  processed #{@time_metric.count} at #{"%0.3f" % @time_metric.rate} files/sec ( #{@good_data_count} good, #{@bad_data_count} bad )"
    end
    original_order += 1
  end
  @results.flush
  logger.info "  processed #{@time_metric.count} at #{"%0.3f" % @time_metric.rate} files/sec"
  logger.info "  yielded #{@good_data_count} data points"
  logger.info "End data collection" 
  nil
end

#dump_errors_to(io) ⇒ Object

call-seq:

analyzer.dump_errors_to( IO ) -> nil

write a csv to the IO object passed in. The format is:

error_reason,filename

If there are no bad Datum instances then do not write anything.



131
132
133
134
135
136
137
138
139
# File 'lib/readorder/analyzer.rb', line 131

def dump_errors_to( io )
  if results.error_count > 0 then
    io.puts "error_reason,filename"
    results.each_error do |d|
      io.puts "#{d['error_reason']},#{d['filename']}"
    end
  end
  nil
end

#dump_valid_to(io) ⇒ Object

call-seq:

analyzer.dump_valid_to( IO ) -> nil

Write a csv fo the IO object passed in. The format is:

filename,size,inode_number,physical_block_count,first_physical_block_number

The last two fields physical_block_count and first_physical_block_number are only written if the analyzer was able to gather physical block information



153
154
155
156
157
158
159
160
161
162
163
164
165
166
# File 'lib/readorder/analyzer.rb', line 153

def dump_valid_to( io )
  fields = %w[ filename size inode_number ]
  by_field = 'inode_number'
  if @get_physical then
    fields << 'physical_block_count'
    fields << 'first_physical_block_number'
    by_field = 'first_physical_block_number'
  end
  io.puts fields.join(",")
  results.each_valid_by_field( by_field ) do |d|
   f = fields.collect { |f| d[f] }
   io.puts f.join(",")
  end
end

#log_summary_reportObject

call-seq:

analyzer.log_summary_report -> nil

Write the summary report to the #logger



94
95
96
97
98
# File 'lib/readorder/analyzer.rb', line 94

def log_summary_report
  summary_report.split("\n").each do |l|
    logger.info l
  end
end

#loggerObject

call-seq:

analyzer.logger -> Logger

return the Logger instance for the Analyzer



39
40
41
# File 'lib/readorder/analyzer.rb', line 39

def logger
  ::Logging::Logger[self]
end

#summary_reportObject

call-seq:

analyzer.summary_report -> String

Generate a summary report of how long it took to analyze the files and the filesizes found. return it as a String



107
108
109
110
111
112
113
114
115
116
117
118
119
# File 'lib/readorder/analyzer.rb', line 107

def summary_report
  s = StringIO.new
  s.puts "Files analyzed   : #{"%12d" % @time_metric.count}"
  s.puts "Elapsed time     : #{"%12d" % @time_metric.duration} seconds"
  s.puts "Collection Rate  : #{"%16.3f" % @time_metric.rate} files/sec"
  s.puts "Good files       : #{"%12d" % @good_data_count}"
  s.puts "  average size   : #{"%16.3f" % @size_metric.mean} bytes"
  s.puts "  minimum size   : #{"%16.3f" % @size_metric.min} bytes"
  s.puts "  maximum size   : #{"%16.3f" % @size_metric.max} bytes"
  s.puts "  sum of sizes   : #{"%12d" % @size_metric.sum} bytes"
  s.puts "Bad files        : #{"%12d" % @bad_data_count}"
  return s.string
end