Class: Polars::GroupBy
- Inherits:
-
Object
- Object
- Polars::GroupBy
- Defined in:
- lib/polars/group_by.rb
Overview
Starts a new GroupBy operation.
Instance Method Summary collapse
-
#agg(*aggs, **named_aggs) ⇒ DataFrame
Compute aggregations for each group of a group by operation.
-
#count ⇒ DataFrame
Count the number of values in each group.
-
#each ⇒ Object
Allows iteration over the groups of the group by operation.
-
#first ⇒ DataFrame
Aggregate the first values in the group.
-
#head(n = 5) ⇒ DataFrame
Get the first
n
rows of each group. -
#last ⇒ DataFrame
Aggregate the last values in the group.
-
#max ⇒ DataFrame
Reduce the groups to the maximal value.
-
#mean ⇒ DataFrame
Reduce the groups to the mean values.
-
#median ⇒ DataFrame
Return the median per group.
-
#min ⇒ DataFrame
Reduce the groups to the minimal value.
-
#n_unique ⇒ DataFrame
Count the unique values per group.
-
#plot(*args, **options) ⇒ Vega::LiteChart
Plot data.
-
#quantile(quantile, interpolation: "nearest") ⇒ DataFrame
Compute the quantile per group.
-
#sum ⇒ DataFrame
Reduce the groups to the sum.
-
#tail(n = 5) ⇒ DataFrame
Get the last
n
rows of each group.
Instance Method Details
#agg(*aggs, **named_aggs) ⇒ DataFrame
Compute aggregations for each group of a group by operation.
203 204 205 206 207 208 |
# File 'lib/polars/group_by.rb', line 203 def agg(*aggs, **named_aggs) @df.lazy .group_by(@by, maintain_order: @maintain_order) .agg(*aggs, **named_aggs) .collect(no_optimization: true) end |
#count ⇒ DataFrame
Count the number of values in each group.
482 483 484 |
# File 'lib/polars/group_by.rb', line 482 def count agg(Polars.len.alias("count")) end |
#each ⇒ Object
Allows iteration over the groups of the group by operation.
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
# File 'lib/polars/group_by.rb', line 35 def each return to_enum(:each) unless block_given? temp_col = "__POLARS_GB_GROUP_INDICES" groups_df = @df.lazy .with_row_index(name: temp_col) .group_by(@by, maintain_order: @maintain_order) .agg(Polars.col(temp_col)) .collect(no_optimization: true) group_names = groups_df.select(Polars.all.exclude(temp_col)) # When grouping by a single column, group name is a single value # When grouping by multiple columns, group name is a tuple of values if @by.is_a?(::String) || @by.is_a?(Expr) _group_names = group_names.to_series.each else _group_names = group_names.iter_rows end _group_indices = groups_df.select(temp_col).to_series _current_index = 0 while _current_index < _group_indices.length group_name = _group_names.next group_data = @df[_group_indices[_current_index]] _current_index += 1 yield group_name, group_data end end |
#first ⇒ DataFrame
Aggregate the first values in the group.
337 338 339 |
# File 'lib/polars/group_by.rb', line 337 def first agg(Polars.all.first) end |
#head(n = 5) ⇒ DataFrame
Get the first n
rows of each group.
254 255 256 257 258 259 |
# File 'lib/polars/group_by.rb', line 254 def head(n = 5) @df.lazy .group_by(@by, maintain_order: @maintain_order) .head(n) .collect(no_optimization: true) end |
#last ⇒ DataFrame
Aggregate the last values in the group.
366 367 368 |
# File 'lib/polars/group_by.rb', line 366 def last agg(Polars.all.last) end |
#max ⇒ DataFrame
Reduce the groups to the maximal value.
453 454 455 |
# File 'lib/polars/group_by.rb', line 453 def max agg(Polars.all.max) end |
#mean ⇒ DataFrame
Reduce the groups to the mean values.
511 512 513 |
# File 'lib/polars/group_by.rb', line 511 def mean agg(Polars.all.mean) end |
#median ⇒ DataFrame
Return the median per group.
598 599 600 |
# File 'lib/polars/group_by.rb', line 598 def median agg(Polars.all.median) end |
#min ⇒ DataFrame
Reduce the groups to the minimal value.
424 425 426 |
# File 'lib/polars/group_by.rb', line 424 def min agg(Polars.all.min) end |
#n_unique ⇒ DataFrame
Count the unique values per group.
538 539 540 |
# File 'lib/polars/group_by.rb', line 538 def n_unique agg(Polars.all.n_unique) end |
#plot(*args, **options) ⇒ Vega::LiteChart
Plot data.
605 606 607 608 609 610 611 |
# File 'lib/polars/group_by.rb', line 605 def plot(*args, **) raise ArgumentError, "Multiple groups not supported" if @by.is_a?(::Array) && @by.size > 1 # same message as Ruby raise ArgumentError, "unknown keyword: :group" if .key?(:group) @df.plot(*args, **, group: @by) end |
#quantile(quantile, interpolation: "nearest") ⇒ DataFrame
Compute the quantile per group.
571 572 573 |
# File 'lib/polars/group_by.rb', line 571 def quantile(quantile, interpolation: "nearest") agg(Polars.all.quantile(quantile, interpolation: interpolation)) end |
#sum ⇒ DataFrame
Reduce the groups to the sum.
395 396 397 |
# File 'lib/polars/group_by.rb', line 395 def sum agg(Polars.all.sum) end |
#tail(n = 5) ⇒ DataFrame
Get the last n
rows of each group.
305 306 307 308 309 310 |
# File 'lib/polars/group_by.rb', line 305 def tail(n = 5) @df.lazy .group_by(@by, maintain_order: @maintain_order) .tail(n) .collect(no_optimization: true) end |