Class: RailsDataExplorer::Utils::DataBinner

Inherits:
Object
  • Object
show all
Defined in:
lib/rails_data_explorer/utils/data_binner.rb

Overview

Responsibilities:

* Convert quantitative data to categorical data.

saedsayad.com/binning.htm E.g., ages in years to 5 groups:

  • < 10

  • 11 - 20

  • 21 - 30

  • 31 - 40

  • > 40

Instance Method Summary collapse

Constructor Details

#initialize(threshold_specs) ⇒ DataBinner

@param threshold_specs a hash with a key value pair for each threshold

The key is the label to use, and the value is a Numeric threshold.
Adds one more bin for values greater than the highest threshold.
Example: { '0' => 0, '1' => 1, '2' => 2, '3..10' => 10, '11..100' => 100 }
Will generate the following output:
  -1   => '0'
   0   => '0'
   0.1 => '1'
   4   => '3..10'
  10   => '3..10'
  10.1 => '3..10'
1000   => '> 100'


30
31
32
33
34
35
36
37
38
39
40
# File 'lib/rails_data_explorer/utils/data_binner.rb', line 30

def initialize(threshold_specs)
  @max = -Float::INFINITY
  @bin_specs = threshold_specs.to_a.sort { |(k_a, v_a), (k_b, v_b)|
    v_a <=> v_b
  }.map { |(label, threshold)|
    raise "Invalid threshold: #{ threshold.inspect }"  unless threshold.is_a?(Numeric)
    @max = [@max, threshold].max
    { label: label, lte: threshold }
  }
  @bin_specs << { label: "> #{ @max }", gt: @max }
end

Instance Method Details

#bin(value) ⇒ Object



42
43
44
45
46
47
48
49
50
# File 'lib/rails_data_explorer/utils/data_binner.rb', line 42

def bin(value)
  unless value.is_a?(Numeric)
    raise(ArgumentError.new("Wrong type of value, numeric expected, got: #{ value.inspect }"))
  end
  bin = @bin_specs.detect { |bs|
    (bs[:lte] && value <= bs[:lte]) || (bs[:gt] && value > bs[:gt])
  }
  bin[:label]
end