Class: FlatKit::FieldStats
- Inherits:
-
Object
- Object
- FlatKit::FieldStats
- Defined in:
- lib/flat_kit/field_stats.rb
Overview
Internal: Collect stats on a single field.
We may not know what the field data type is to start with, so collect a bunch of values until we have the threshold, and then calculte states based upon the data types determined by the guess
Constant Summary collapse
- DEFAULT_GUESS_THRESHOLD =
1000
- CORE_STATS =
:core
- CARDINALITY_STATS =
:cardinality
- ALL_STATS =
[CORE_STATS, CARDINALITY_STATS].freeze
- EXPORT_FIELDS =
%w[ name type count max mean min stddev sum mode unique_count max_length mean_length min_length stddev_length mode_length unique_count_lengths null_count unknown_count out_of_type_count total_count null_percent unknown_percent ].freeze
Instance Attribute Summary collapse
-
#field_type ⇒ Object
readonly
Returns the value of attribute field_type.
-
#name ⇒ Object
readonly
Returns the value of attribute name.
-
#out_of_type_count ⇒ Object
readonly
Returns the value of attribute out_of_type_count.
-
#type_counts ⇒ Object
readonly
Returns the value of attribute type_counts.
Instance Method Summary collapse
- #collecting_frequencies? ⇒ Boolean
- #count ⇒ Object
- #field_type_determined? ⇒ Boolean
- #frequencies ⇒ Object
-
#initialize(name:, stats_to_collect: CORE_STATS, type: ::FlatKit::FieldType::GuessType, guess_threshold: DEFAULT_GUESS_THRESHOLD) ⇒ FieldStats
constructor
A new instance of FieldStats.
- #length_frequencies ⇒ Object
- #max ⇒ Object
- #max_length ⇒ Object
- #mean ⇒ Object
- #mean_length ⇒ Object
- #min ⇒ Object
- #min_length ⇒ Object
- #mode ⇒ Object
- #mode_length ⇒ Object
- #null_count ⇒ Object
- #null_percent ⇒ Object
- #stddev ⇒ Object
- #stddev_length ⇒ Object
- #sum ⇒ Object
- #to_hash ⇒ Object
- #total_count ⇒ Object
- #type ⇒ Object
- #unique_count ⇒ Object
- #unique_count_lengths ⇒ Object
- #unique_values ⇒ Object
- #unique_values_lengths ⇒ Object
- #unknown_count ⇒ Object
- #unknown_percent ⇒ Object
- #update(value) ⇒ Object
Constructor Details
#initialize(name:, stats_to_collect: CORE_STATS, type: ::FlatKit::FieldType::GuessType, guess_threshold: DEFAULT_GUESS_THRESHOLD) ⇒ FieldStats
Returns a new instance of FieldStats.
47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
# File 'lib/flat_kit/field_stats.rb', line 47 def initialize(name:, stats_to_collect: CORE_STATS, type: ::FlatKit::FieldType::GuessType, guess_threshold: DEFAULT_GUESS_THRESHOLD) @name = name @field_type = type @guess_threshold = guess_threshold @type_counts = Hash.new(0) @out_of_type_count = 0 @values = [] @stats = nil @length_stats = nil @stats_to_collect = [stats_to_collect].flatten @stats_to_collect.each do |collection_set| next if ALL_STATS.include?(collection_set) valid_sets = ALL_STATS.map(&:to_s).join(", ") raise ArgumentError, "#{collection_set} is not a valid stats collection set, must be one of #{valid_sets}" end return if type.is_a?(Class) && (type.superclass == ::FlatKit::FieldType) raise ArgumentError, "type: must be FieldType subclasses - not #{type}" end |
Instance Attribute Details
#field_type ⇒ Object (readonly)
Returns the value of attribute field_type.
45 46 47 |
# File 'lib/flat_kit/field_stats.rb', line 45 def field_type @field_type end |
#name ⇒ Object (readonly)
Returns the value of attribute name.
45 46 47 |
# File 'lib/flat_kit/field_stats.rb', line 45 def name @name end |
#out_of_type_count ⇒ Object (readonly)
Returns the value of attribute out_of_type_count.
45 46 47 |
# File 'lib/flat_kit/field_stats.rb', line 45 def out_of_type_count @out_of_type_count end |
#type_counts ⇒ Object (readonly)
Returns the value of attribute type_counts.
45 46 47 |
# File 'lib/flat_kit/field_stats.rb', line 45 def type_counts @type_counts end |
Instance Method Details
#collecting_frequencies? ⇒ Boolean
88 89 90 |
# File 'lib/flat_kit/field_stats.rb', line 88 def collecting_frequencies? @stats_to_collect.include?(CARDINALITY_STATS) end |
#count ⇒ Object
96 97 98 |
# File 'lib/flat_kit/field_stats.rb', line 96 def count stats.count end |
#field_type_determined? ⇒ Boolean
72 73 74 |
# File 'lib/flat_kit/field_stats.rb', line 72 def field_type_determined? @field_type != ::FlatKit::FieldType::GuessType end |
#frequencies ⇒ Object
132 133 134 |
# File 'lib/flat_kit/field_stats.rb', line 132 def frequencies stats.frequencies if collecting_frequencies? end |
#length_frequencies ⇒ Object
164 165 166 |
# File 'lib/flat_kit/field_stats.rb', line 164 def length_frequencies length_stats.frequencies if @length_stats && collecting_frequencies? end |
#max ⇒ Object
100 101 102 |
# File 'lib/flat_kit/field_stats.rb', line 100 def max stats.max if stats.respond_to?(:max) end |
#max_length ⇒ Object
140 141 142 |
# File 'lib/flat_kit/field_stats.rb', line 140 def max_length length_stats.max if @length_stats end |
#mean ⇒ Object
104 105 106 |
# File 'lib/flat_kit/field_stats.rb', line 104 def mean stats.mean if stats.respond_to?(:mean) end |
#mean_length ⇒ Object
144 145 146 |
# File 'lib/flat_kit/field_stats.rb', line 144 def mean_length length_stats.mean if @length_stats end |
#min ⇒ Object
108 109 110 |
# File 'lib/flat_kit/field_stats.rb', line 108 def min stats.min if stats.respond_to?(:min) end |
#min_length ⇒ Object
136 137 138 |
# File 'lib/flat_kit/field_stats.rb', line 136 def min_length length_stats.min if @length_stats end |
#mode ⇒ Object
120 121 122 |
# File 'lib/flat_kit/field_stats.rb', line 120 def mode stats.mode if collecting_frequencies? end |
#mode_length ⇒ Object
152 153 154 |
# File 'lib/flat_kit/field_stats.rb', line 152 def mode_length length_stats.mode if @length_stats && collecting_frequencies? end |
#null_count ⇒ Object
168 169 170 |
# File 'lib/flat_kit/field_stats.rb', line 168 def null_count type_counts[FieldType::NullType] end |
#null_percent ⇒ Object
176 177 178 179 180 |
# File 'lib/flat_kit/field_stats.rb', line 176 def null_percent return 0 if total_count.zero? ((null_count.to_f / total_count) * 100.0).truncate(2) end |
#stddev ⇒ Object
112 113 114 |
# File 'lib/flat_kit/field_stats.rb', line 112 def stddev stats.stddev if stats.respond_to?(:stddev) end |
#stddev_length ⇒ Object
148 149 150 |
# File 'lib/flat_kit/field_stats.rb', line 148 def stddev_length length_stats.stddev if @length_stats end |
#sum ⇒ Object
116 117 118 |
# File 'lib/flat_kit/field_stats.rb', line 116 def sum stats.sum if stats.respond_to?(:sum) end |
#to_hash ⇒ Object
192 193 194 195 196 197 198 199 200 |
# File 'lib/flat_kit/field_stats.rb', line 192 def to_hash resolve_guess {}.tap do |h| EXPORT_FIELDS.each do |n| h[n] = send(n) end end end |
#total_count ⇒ Object
172 173 174 |
# File 'lib/flat_kit/field_stats.rb', line 172 def total_count stats.count + @out_of_type_count end |
#type ⇒ Object
92 93 94 |
# File 'lib/flat_kit/field_stats.rb', line 92 def type @field_type.type_name end |
#unique_count ⇒ Object
124 125 126 |
# File 'lib/flat_kit/field_stats.rb', line 124 def unique_count stats.unique_count if collecting_frequencies? end |
#unique_count_lengths ⇒ Object
156 157 158 |
# File 'lib/flat_kit/field_stats.rb', line 156 def unique_count_lengths length_stats.unique_count if @length_stats && collecting_frequencies? end |
#unique_values ⇒ Object
128 129 130 |
# File 'lib/flat_kit/field_stats.rb', line 128 def unique_values stats.unique_values if collecting_frequencies? end |
#unique_values_lengths ⇒ Object
160 161 162 |
# File 'lib/flat_kit/field_stats.rb', line 160 def unique_values_lengths length_stats.unique_values if @length_stats && collecting_frequencies? end |
#unknown_count ⇒ Object
182 183 184 |
# File 'lib/flat_kit/field_stats.rb', line 182 def unknown_count type_counts[FieldType::UnknownType] end |
#unknown_percent ⇒ Object
186 187 188 189 190 |
# File 'lib/flat_kit/field_stats.rb', line 186 def unknown_percent return 0 if total_count.zero? ((unknown_count.to_f / total_count) * 100.0).truncate(2) end |
#update(value) ⇒ Object
76 77 78 79 80 81 82 83 84 85 86 |
# File 'lib/flat_kit/field_stats.rb', line 76 def update(value) update_type_count(value) if field_type_determined? update_stats(value) else @values << value resolve_guess if @values.size >= @guess_threshold end end |