Class: Daru::Core::GroupBy
Constant Summary collapse
- TUPLE_SORTER =
lambda do |a, b| if a && b a.compact <=> b.compact else a ? 1 : -1 end end
Instance Attribute Summary collapse
-
#groups ⇒ Object
readonly
Returns the value of attribute groups.
Instance Method Summary collapse
-
#count ⇒ Object
Count groups, excludes missing values.
-
#each_group ⇒ Object
Iterate over each group created by group_by.
-
#first ⇒ Object
Get the first group.
-
#get_group(group) ⇒ Object
Returns one of the selected groups as a DataFrame.
-
#head(quantity = 5) ⇒ Object
Get the top ‘n’ groups.
-
#initialize(context, names) ⇒ GroupBy
constructor
A new instance of GroupBy.
-
#last ⇒ Object
Get the last group.
-
#max ⇒ Object
Find the max element of each numeric vector group.
-
#mean ⇒ Object
Calculate mean of numeric groups, excluding missing values.
-
#median ⇒ Object
Calculate the median of numeric groups, excluding missing values.
-
#min ⇒ Object
Find the min element of each numeric vector group.
-
#reduce(init = nil) ⇒ Object
Iteratively applies a function to the values in a group and accumulates the result.
-
#size ⇒ Object
Get a Daru::Vector of the size of each group.
-
#std ⇒ Object
Calculate sample standard deviation of numeric vector groups, excluding missing values.
-
#sum ⇒ Object
Calculate sum of numeric groups, excluding missing values.
-
#tail(quantity = 5) ⇒ Object
Get the bottom ‘n’ groups.
Constructor Details
#initialize(context, names) ⇒ GroupBy
Returns a new instance of GroupBy.
22 23 24 25 26 27 28 29 30 31 32 33 34 |
# File 'lib/daru/core/group_by.rb', line 22 def initialize context, names @groups = {} @non_group_vectors = context.vectors.to_a - names @context = context vectors = names.map { |vec| context[vec].to_a } tuples = vectors[0].zip(*vectors[1..-1]) keys = tuples.uniq.sort(&TUPLE_SORTER) keys.each do |key| @groups[key] = all_indices_for(tuples, key) end @groups.freeze end |
Instance Attribute Details
#groups ⇒ Object (readonly)
Returns the value of attribute groups.
4 5 6 |
# File 'lib/daru/core/group_by.rb', line 4 def groups @groups end |
Instance Method Details
#count ⇒ Object
Count groups, excludes missing values.
154 155 156 157 |
# File 'lib/daru/core/group_by.rb', line 154 def count width = @non_group_vectors.size Daru::DataFrame.new([size]*width, order: @non_group_vectors) end |
#each_group ⇒ Object
Iterate over each group created by group_by. A DataFrame is yielded in block.
8 9 10 11 12 |
# File 'lib/daru/core/group_by.rb', line 8 def each_group groups.keys.each do |k| yield get_group(k) end end |
#first ⇒ Object
Get the first group
50 51 52 |
# File 'lib/daru/core/group_by.rb', line 50 def first head(1) end |
#get_group(group) ⇒ Object
Returns one of the selected groups as a DataFrame.
191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 |
# File 'lib/daru/core/group_by.rb', line 191 def get_group group indexes = @groups[group] elements = @context.each_vector.map(&:to_a) transpose = elements.transpose rows = indexes.each.map { |idx| transpose[idx] } new_index = begin @context.index[indexes] rescue IndexError indexes end Daru::DataFrame.rows( rows, index: new_index, order: @context.vectors ) end |
#head(quantity = 5) ⇒ Object
Get the top ‘n’ groups
78 79 80 |
# File 'lib/daru/core/group_by.rb', line 78 def head quantity=5 select_groups_from :first, quantity end |
#last ⇒ Object
Get the last group
55 56 57 |
# File 'lib/daru/core/group_by.rb', line 55 def last tail(1) end |
#max ⇒ Object
Find the max element of each numeric vector group.
166 167 168 |
# File 'lib/daru/core/group_by.rb', line 166 def max apply_method :numeric, :max end |
#mean ⇒ Object
Calculate mean of numeric groups, excluding missing values.
122 123 124 |
# File 'lib/daru/core/group_by.rb', line 122 def mean apply_method :numeric, :mean end |
#median ⇒ Object
Calculate the median of numeric groups, excluding missing values.
127 128 129 |
# File 'lib/daru/core/group_by.rb', line 127 def median apply_method :numeric, :median end |
#min ⇒ Object
Find the min element of each numeric vector group.
171 172 173 |
# File 'lib/daru/core/group_by.rb', line 171 def min apply_method :numeric, :min end |
#reduce(init = nil) ⇒ Object
Iteratively applies a function to the values in a group and accumulates the result.
225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 |
# File 'lib/daru/core/group_by.rb', line 225 def reduce(init=nil) result_hash = @groups.each_with_object({}) do |(group, indices), h| group_indices = indices.map { |v| @context.index.to_a[v] } grouped_result = init group_indices.each do |idx| grouped_result = yield(grouped_result, @context.row[idx]) end h[group] = grouped_result end index = if multi_indexed_grouping? Daru::MultiIndex.from_tuples result_hash.keys else Daru::Index.new result_hash.keys.flatten end Daru::Vector.new(result_hash.values, index: index) end |
#size ⇒ Object
Get a Daru::Vector of the size of each group.
37 38 39 40 41 42 43 44 45 46 47 |
# File 'lib/daru/core/group_by.rb', line 37 def size index = if multi_indexed_grouping? Daru::MultiIndex.from_tuples @groups.keys else Daru::Index.new @groups.keys.flatten end values = @groups.values.map(&:size) Daru::Vector.new(values, index: index, name: :size) end |
#std ⇒ Object
Calculate sample standard deviation of numeric vector groups, excluding missing values.
161 162 163 |
# File 'lib/daru/core/group_by.rb', line 161 def std apply_method :numeric, :std end |
#sum ⇒ Object
Calculate sum of numeric groups, excluding missing values.
132 133 134 |
# File 'lib/daru/core/group_by.rb', line 132 def sum apply_method :numeric, :sum end |
#tail(quantity = 5) ⇒ Object
Get the bottom ‘n’ groups
101 102 103 |
# File 'lib/daru/core/group_by.rb', line 101 def tail quantity=5 select_groups_from :last, quantity end |