Class: Histogram

Inherits:
Object show all
Defined in:
lib/redshift/util/histogram.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(ary, opts = {}) ⇒ Histogram

Construct a Histogram based on ary with the opts:

"bincount"  :: number of bins (default is 10)

"min" ::       min value (otherwise, based on data)

"max" ::       max value (otherwise, based on data)

"normalize" :: divide each bin by the total count, unless false
              if numeric, scale the result by the value
              (default is false)

"stats" ::    calculate statistics for the data set (min/stdev)
              (default is false)

Raises:

  • (ArgumentError)


45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# File 'lib/redshift/util/histogram.rb', line 45

def initialize(ary, opts={})
  @opts = opts
  @bins = []

  ary = ary.map {|x| x.to_f}
  @count = ary.size
  
  @bincount = opts["bincount"]
  @binsize  = opts["binsize"]
  @min      = opts["min"] || ary.min
  @max      = opts["max"] || ary.max
  @norm     = opts["normalize"] || false
  @stats    = opts["stats"] || false

  if @bincount and @binsize
    raise ArgumentError, "Cannot specify both bincount and binsize"
  elsif @bincount
    @binsize = (@max-@min)/@bincount
  elsif @binsize
    @bincount = (@max-@min)/@binsize
  else
    @bincount = 10
    @binsize = (@max-@min)/@bincount
  end
  
  raise ArgumentError, "Cannot have binsize==0" if @binsize == 0
  
  @counts = Array.new(@bincount+1, 0)

  ary.each do |x|
    @counts[((x-min)/@binsize).round] += 1
  end

  return if ary.empty?
  
  if @stats
    n = ary.size.to_f
    @mean = ary.inject {|sum, x| sum + x} / n
    var = ary.inject(0) {|sum,x| sum+(x-@mean)**2} / (n-1)
    @stdev = Math::sqrt(var)
  end
  
  scale = (norm && @count != 0) ? norm/@count.to_f : 1
  @counts.each_with_index do |bin, i|
    @bins << [min + i*@binsize, bin*scale]
  end
end

Instance Attribute Details

#bincountObject (readonly)

Number of bins.



3
4
5
# File 'lib/redshift/util/histogram.rb', line 3

def bincount
  @bincount
end

#binsObject (readonly)

An array of pairs of the form [bin, count]. Suitable for plotting. Bins are inclusive of lower endpoint. Highest bin is inclusive of both endpoints.



25
26
27
# File 'lib/redshift/util/histogram.rb', line 25

def bins
  @bins
end

#binsizeObject (readonly)

Size of each bin.



6
7
8
# File 'lib/redshift/util/histogram.rb', line 6

def binsize
  @binsize
end

#countObject (readonly)

Count of data points given.



9
10
11
# File 'lib/redshift/util/histogram.rb', line 9

def count
  @count
end

#maxObject (readonly)

max, as given in opts, or max of data.



12
13
14
# File 'lib/redshift/util/histogram.rb', line 12

def max
  @max
end

#meanObject (readonly)

If “stats” option is present, calculate statistics in these attrs.



21
22
23
# File 'lib/redshift/util/histogram.rb', line 21

def mean
  @mean
end

#minObject (readonly)

min, as given in opts, or min of data.



15
16
17
# File 'lib/redshift/util/histogram.rb', line 15

def min
  @min
end

#normObject (readonly)

Unless false, normalize by this factor (or 1).



18
19
20
# File 'lib/redshift/util/histogram.rb', line 18

def norm
  @norm
end

#optsObject (readonly)

Options as originally given.



28
29
30
# File 'lib/redshift/util/histogram.rb', line 28

def opts
  @opts
end

#stdevObject (readonly)

If “stats” option is present, calculate statistics in these attrs.



21
22
23
# File 'lib/redshift/util/histogram.rb', line 21

def stdev
  @stdev
end

Instance Method Details

#inspectObject



93
94
95
96
97
98
# File 'lib/redshift/util/histogram.rb', line 93

def inspect
  attrs = %w{ bincount binsize count min max norm }
  attrs.concat %w{ mean stdev } if @stats
  s = attrs.map {|a| "#{a}=#{send(a)}"}.join(", ")
  "#<#{self.class}: #{s}>"
end