Class: Polars::GroupBy

Inherits:
Object
  • Object
show all
Defined in:
lib/polars/group_by.rb

Overview

Starts a new GroupBy operation.

Instance Method Summary collapse

Instance Method Details

#agg(*aggs, **named_aggs) ⇒ DataFrame

Compute aggregations for each group of a group by operation.

Examples:

Compute the aggregation of the columns for each group.

df = Polars::DataFrame.new(
  {
    "a" => ["a", "b", "a", "b", "c"],
    "b" => [1, 2, 1, 3, 3],
    "c" => [5, 4, 3, 2, 1]
  }
)
df.group_by("a").agg(Polars.col("b"), Polars.col("c"))
# =>
# shape: (3, 3)
# ┌─────┬───────────┬───────────┐
# │ a   ┆ b         ┆ c         │
# │ --- ┆ ---       ┆ ---       │
# │ str ┆ list[i64] ┆ list[i64] │
# ╞═════╪═══════════╪═══════════╡
# │ a   ┆ [1, 1]    ┆ [5, 3]    │
# │ b   ┆ [2, 3]    ┆ [4, 2]    │
# │ c   ┆ [3]       ┆ [1]       │
# └─────┴───────────┴───────────┘

Compute the sum of a column for each group.

df.group_by("a").agg(Polars.col("b").sum)
# =>
# shape: (3, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ str ┆ i64 │
# ╞═════╪═════╡
# │ a   ┆ 2   │
# │ b   ┆ 5   │
# │ c   ┆ 3   │
# └─────┴─────┘

Compute multiple aggregates at once by passing a list of expressions.

df.group_by("a").agg([Polars.sum("b"), Polars.mean("c")])
# =>
# shape: (3, 3)
# ┌─────┬─────┬─────┐
# │ a   ┆ b   ┆ c   │
# │ --- ┆ --- ┆ --- │
# │ str ┆ i64 ┆ f64 │
# ╞═════╪═════╪═════╡
# │ c   ┆ 3   ┆ 1.0 │
# │ a   ┆ 2   ┆ 4.0 │
# │ b   ┆ 5   ┆ 3.0 │
# └─────┴─────┴─────┘

Or use positional arguments to compute multiple aggregations in the same way.

df.group_by("a").agg(
  Polars.sum("b").name.suffix("_sum"),
  (Polars.col("c") ** 2).mean.name.suffix("_mean_squared")
)
# =>
# shape: (3, 3)
# ┌─────┬───────┬────────────────┐
# │ a   ┆ b_sum ┆ c_mean_squared │
# │ --- ┆ ---   ┆ ---            │
# │ str ┆ i64   ┆ f64            │
# ╞═════╪═══════╪════════════════╡
# │ a   ┆ 2     ┆ 17.0           │
# │ c   ┆ 3     ┆ 1.0            │
# │ b   ┆ 5     ┆ 10.0           │
# └─────┴───────┴────────────────┘

Use keyword arguments to easily name your expression inputs.

df.group_by("a").agg(
  b_sum: Polars.sum("b"),
  c_mean_squared: (Polars.col("c") ** 2).mean
)
# =>
# shape: (3, 3)
# ┌─────┬───────┬────────────────┐
# │ a   ┆ b_sum ┆ c_mean_squared │
# │ --- ┆ ---   ┆ ---            │
# │ str ┆ i64   ┆ f64            │
# ╞═════╪═══════╪════════════════╡
# │ a   ┆ 2     ┆ 17.0           │
# │ c   ┆ 3     ┆ 1.0            │
# │ b   ┆ 5     ┆ 10.0           │
# └─────┴───────┴────────────────┘

Parameters:

  • aggs (Array)

    Aggregations to compute for each group of the group by operation, specified as positional arguments. Accepts expression input. Strings are parsed as column names.

  • named_aggs (Hash)

    Additional aggregations, specified as keyword arguments. The resulting columns will be renamed to the keyword used.

Returns:



205
206
207
208
209
# File 'lib/polars/group_by.rb', line 205

def agg(*aggs, **named_aggs)
  _lgb
    .agg(*aggs, **named_aggs)
    .collect(optimizations: QueryOptFlags.none)
end

#allDataFrame

Aggregate the groups into Series.

Examples:

df = Polars::DataFrame.new({"a" => ["one", "two", "one", "two"], "b" => [1, 2, 3, 4]})
df.group_by("a", maintain_order: true).all
# =>
# shape: (2, 2)
# ┌─────┬───────────┐
# │ a   ┆ b         │
# │ --- ┆ ---       │
# │ str ┆ list[i64] │
# ╞═════╪═══════════╡
# │ one ┆ [1, 3]    │
# │ two ┆ [2, 4]    │
# └─────┴───────────┘

Returns:



386
387
388
# File 'lib/polars/group_by.rb', line 386

def all
  agg(F.all)
end

#countDataFrame

Count the number of values in each group.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 2, 3, 4, 5],
    "b" => [0.5, 0.5, 4, 10, 13, 14],
    "c" => [true, true, true, false, false, true],
    "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
  }
)
df.group_by("d", maintain_order: true).count
# =>
# shape: (3, 2)
# ┌────────┬───────┐
# │ d      ┆ count │
# │ ---    ┆ ---   │
# │ str    ┆ u32   │
# ╞════════╪═══════╡
# │ Apple  ┆ 3     │
# │ Orange ┆ 1     │
# │ Banana ┆ 2     │
# └────────┴───────┘

Returns:



611
612
613
# File 'lib/polars/group_by.rb', line 611

def count
  agg(Polars.len.alias("count"))
end

#eachObject

Allows iteration over the groups of the group by operation.

Examples:

df = Polars::DataFrame.new({"foo" => ["a", "a", "b"], "bar" => [1, 2, 3]})
df.group_by("foo", maintain_order: true).each.to_h
# =>
# {["a"]=>shape: (2, 2)
# ┌─────┬─────┐
# │ foo ┆ bar │
# │ --- ┆ --- │
# │ str ┆ i64 │
# ╞═════╪═════╡
# │ a   ┆ 1   │
# │ a   ┆ 2   │
# └─────┴─────┘, ["b"]=>shape: (1, 2)
# ┌─────┬─────┐
# │ foo ┆ bar │
# │ --- ┆ --- │
# │ str ┆ i64 │
# ╞═════╪═════╡
# │ b   ┆ 3   │
# └─────┴─────┘}

Returns:



37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# File 'lib/polars/group_by.rb', line 37

def each
  return to_enum(:each) unless block_given?

  temp_col = "__POLARS_GB_GROUP_INDICES"
  groups_df =
    @df.lazy
      .with_row_index(name: temp_col)
      .group_by(@by, **@named_by, maintain_order: @maintain_order)
      .agg(Polars.col(temp_col))
      .collect(optimizations: QueryOptFlags.none)

  group_names = groups_df.select(Polars.all.exclude(temp_col))

  # When grouping by a single column, group name is a single value
  # When grouping by multiple columns, group name is a tuple of values
  if @by.is_a?(::String) || @by.is_a?(Expr)
    _group_names = group_names.to_series.each
  else
    _group_names = group_names.iter_rows
  end

  _group_indices = groups_df.select(temp_col).to_series
  _current_index = 0

  while _current_index < _group_indices.length
    group_name = _group_names.next
    group_data = @df[_group_indices[_current_index]]
    _current_index += 1

    yield group_name, group_data
  end
end

#first(ignore_nulls: false) ⇒ DataFrame

Aggregate the first values in the group.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 2, 3, 4, 5],
    "b" => [0.5, 0.5, 4, 10, 13, 14],
    "c" => [true, true, true, false, false, true],
    "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
  }
)
df.group_by("d", maintain_order: true).first
# =>
# shape: (3, 4)
# ┌────────┬─────┬──────┬───────┐
# │ d      ┆ a   ┆ b    ┆ c     │
# │ ---    ┆ --- ┆ ---  ┆ ---   │
# │ str    ┆ i64 ┆ f64  ┆ bool  │
# ╞════════╪═════╪══════╪═══════╡
# │ Apple  ┆ 1   ┆ 0.5  ┆ true  │
# │ Orange ┆ 2   ┆ 0.5  ┆ true  │
# │ Banana ┆ 4   ┆ 13.0 ┆ false │
# └────────┴─────┴──────┴───────┘

Parameters:

  • ignore_nulls (Boolean) (defaults to: false)

    Ignore null values (default false). If set to true, the first non-null value for each aggregation is returned, otherwise nil is returned if no non-null value exists.

Returns:



461
462
463
# File 'lib/polars/group_by.rb', line 461

def first(ignore_nulls: false)
  agg(F.all.first(ignore_nulls: ignore_nulls))
end

#having(*predicates) ⇒ GroupBy

Filter groups with a list of predicates after aggregation.

Using this method is equivalent to adding the predicates to the aggregation and filtering afterwards.

This method can be chained and all conditions will be combined using &.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => ["a", "b", "a", "b", "c"]
  }
)
df.group_by("a").having(Polars.len > 1).agg
# =>
# shape: (2, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ str │
# ╞═════╡
# │ b   │
# │ a   │
# └─────┘

Parameters:

  • predicates (Array)

    Expressions that evaluate to a boolean value for each group. Typically, this requires the use of an aggregation function. Multiple predicates are combined using &.

Returns:



101
102
103
104
105
106
107
108
109
# File 'lib/polars/group_by.rb', line 101

def having(*predicates)
  GroupBy.new(
    @df,
    *@by,
    maintain_order: @maintain_order,
    predicates: Utils._chain_predicates(@predicates, predicates),
    **@named_by
  )
end

#head(n = 5) ⇒ DataFrame

Get the first n rows of each group.

Examples:

df = Polars::DataFrame.new(
  {
    "letters" => ["c", "c", "a", "c", "a", "b"],
    "nrs" => [1, 2, 3, 4, 5, 6]
  }
)
# =>
# shape: (6, 2)
# ┌─────────┬─────┐
# │ letters ┆ nrs │
# │ ---     ┆ --- │
# │ str     ┆ i64 │
# ╞═════════╪═════╡
# │ c       ┆ 1   │
# │ c       ┆ 2   │
# │ a       ┆ 3   │
# │ c       ┆ 4   │
# │ a       ┆ 5   │
# │ b       ┆ 6   │
# └─────────┴─────┘
df.group_by("letters").head(2).sort("letters")
# =>
# shape: (5, 2)
# ┌─────────┬─────┐
# │ letters ┆ nrs │
# │ ---     ┆ --- │
# │ str     ┆ i64 │
# ╞═════════╪═════╡
# │ a       ┆ 3   │
# │ a       ┆ 5   │
# │ b       ┆ 6   │
# │ c       ┆ 1   │
# │ c       ┆ 2   │
# └─────────┴─────┘

Parameters:

  • n (Integer) (defaults to: 5)

    Number of rows to return.

Returns:



317
318
319
# File 'lib/polars/group_by.rb', line 317

def head(n = 5)
  _lgb.head(n).collect(optimizations: QueryOptFlags._eager)
end

#last(ignore_nulls: false) ⇒ DataFrame

Aggregate the last values in the group.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 2, 3, 4, 5],
    "b" => [0.5, 0.5, 4, 10, 13, 14],
    "c" => [true, true, true, false, false, true],
    "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
  }
)
df.group_by("d", maintain_order: true).last
# =>
# shape: (3, 4)
# ┌────────┬─────┬──────┬───────┐
# │ d      ┆ a   ┆ b    ┆ c     │
# │ ---    ┆ --- ┆ ---  ┆ ---   │
# │ str    ┆ i64 ┆ f64  ┆ bool  │
# ╞════════╪═════╪══════╪═══════╡
# │ Apple  ┆ 3   ┆ 10.0 ┆ false │
# │ Orange ┆ 2   ┆ 0.5  ┆ true  │
# │ Banana ┆ 5   ┆ 14.0 ┆ true  │
# └────────┴─────┴──────┴───────┘

Parameters:

  • ignore_nulls (Boolean) (defaults to: false)

    Ignore null values (default false). If set to true, the last non-null value for each aggregation is returned, otherwise nil is returned if no non-null value exists.

Returns:



495
496
497
# File 'lib/polars/group_by.rb', line 495

def last(ignore_nulls: false)
  agg(F.all.last(ignore_nulls: ignore_nulls))
end

#len(name: nil) ⇒ DataFrame

Return the number of rows in each group.

Examples:

df = Polars::DataFrame.new({"a" => ["Apple", "Apple", "Orange"], "b" => [1, nil, 2]})
df.group_by("a").len
# =>
# shape: (2, 2)
# ┌────────┬─────┐
# │ a      ┆ len │
# │ ---    ┆ --- │
# │ str    ┆ u32 │
# ╞════════╪═════╡
# │ Apple  ┆ 2   │
# │ Orange ┆ 1   │
# └────────┴─────┘
df.group_by("a").len(name: "n")
# =>
# shape: (2, 2)
# ┌────────┬─────┐
# │ a      ┆ n   │
# │ ---    ┆ --- │
# │ str    ┆ u32 │
# ╞════════╪═════╡
# │ Apple  ┆ 2   │
# │ Orange ┆ 1   │
# └────────┴─────┘

Parameters:

  • name (String) (defaults to: nil)

    Assign a name to the resulting column; if unset, defaults to "len".

Returns:



423
424
425
426
427
428
429
# File 'lib/polars/group_by.rb', line 423

def len(name: nil)
  len_expr = F.len
  if !name.nil?
    len_expr = len_expr.alias(name)
  end
  agg(len_expr)
end

#map_groups(&function) ⇒ DataFrame

Note:

This method is much slower than the native expressions API. Only use it if you cannot implement your logic otherwise.

Apply a custom/user-defined function (UDF) over the groups as a sub-DataFrame.

Implementing logic using a Ruby function is almost always significantly slower and more memory intensive than implementing the same logic using the native expression API because:

  • The native expression engine runs in Rust; UDFs run in Ruby.
  • Use of Ruby UDFs forces the DataFrame to be materialized in memory.
  • Polars-native expressions can be parallelised (UDFs cannot).
  • Polars-native expressions can be logically optimised (UDFs cannot).

Wherever possible you should strongly prefer the native expression API to achieve the best performance.

Examples:

df = Polars::DataFrame.new(
  {
    "id" => [0, 1, 2, 3, 4],
    "color" => ["red", "green", "green", "red", "red"],
    "shape" => ["square", "triangle", "square", "triangle", "square"]
  }
)
df.group_by("color").map_groups { |group_df| group_df.sample(n: 2) }
# =>
# shape: (4, 3)
# ┌─────┬───────┬──────────┐
# │ id  ┆ color ┆ shape    │
# │ --- ┆ ---   ┆ ---      │
# │ i64 ┆ str   ┆ str      │
# ╞═════╪═══════╪══════════╡
# │ 1   ┆ green ┆ triangle │
# │ 2   ┆ green ┆ square   │
# │ 4   ┆ red   ┆ square   │
# │ 3   ┆ red   ┆ triangle │
# └─────┴───────┴──────────┘

Returns:



252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
# File 'lib/polars/group_by.rb', line 252

def map_groups(&function)
  if @predicates&.any?
    msg = "cannot call `map_groups` when filtering groups with `having`"
    raise TypeError, msg
  end
  if @named_by&.any?
    msg = "cannot call `map_groups` when grouping by named expressions"
    raise TypeError, msg
  end
  if !@by.all? { |c| Utils.strlike?(c) }
    msg = "cannot call `map_groups` when grouping by an expression"
    raise TypeError, msg
  end

  by_strs = @by.map(&:to_s)

  @df.class._from_rbdf(
    @df._df.group_by_map_groups(by_strs, function, @maintain_order)
  )
end

#maxDataFrame

Reduce the groups to the maximal value.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 2, 3, 4, 5],
    "b" => [0.5, 0.5, 4, 10, 13, 14],
    "c" => [true, true, true, false, false, true],
    "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
  }
)
df.group_by("d", maintain_order: true).max
# =>
# shape: (3, 4)
# ┌────────┬─────┬──────┬──────┐
# │ d      ┆ a   ┆ b    ┆ c    │
# │ ---    ┆ --- ┆ ---  ┆ ---  │
# │ str    ┆ i64 ┆ f64  ┆ bool │
# ╞════════╪═════╪══════╪══════╡
# │ Apple  ┆ 3   ┆ 10.0 ┆ true │
# │ Orange ┆ 2   ┆ 0.5  ┆ true │
# │ Banana ┆ 5   ┆ 14.0 ┆ true │
# └────────┴─────┴──────┴──────┘

Returns:



582
583
584
# File 'lib/polars/group_by.rb', line 582

def max
  agg(Polars.all.max)
end

#meanDataFrame

Reduce the groups to the mean values.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 2, 3, 4, 5],
    "b" => [0.5, 0.5, 4, 10, 13, 14],
    "c" => [true, true, true, false, false, true],
    "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
  }
)
df.group_by("d", maintain_order: true).mean
# =>
# shape: (3, 4)
# ┌────────┬─────┬──────────┬──────────┐
# │ d      ┆ a   ┆ b        ┆ c        │
# │ ---    ┆ --- ┆ ---      ┆ ---      │
# │ str    ┆ f64 ┆ f64      ┆ f64      │
# ╞════════╪═════╪══════════╪══════════╡
# │ Apple  ┆ 2.0 ┆ 4.833333 ┆ 0.666667 │
# │ Orange ┆ 2.0 ┆ 0.5      ┆ 1.0      │
# │ Banana ┆ 4.5 ┆ 13.5     ┆ 0.5      │
# └────────┴─────┴──────────┴──────────┘

Returns:



640
641
642
# File 'lib/polars/group_by.rb', line 640

def mean
  agg(Polars.all.mean)
end

#medianDataFrame

Return the median per group.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 2, 3, 4, 5],
    "b" => [0.5, 0.5, 4, 10, 13, 14],
    "d" => ["Apple", "Banana", "Apple", "Apple", "Banana", "Banana"]
  }
)
df.group_by("d", maintain_order: true).median
# =>
# shape: (2, 3)
# ┌────────┬─────┬──────┐
# │ d      ┆ a   ┆ b    │
# │ ---    ┆ --- ┆ ---  │
# │ str    ┆ f64 ┆ f64  │
# ╞════════╪═════╪══════╡
# │ Apple  ┆ 2.0 ┆ 4.0  │
# │ Banana ┆ 4.0 ┆ 13.0 │
# └────────┴─────┴──────┘

Returns:



727
728
729
# File 'lib/polars/group_by.rb', line 727

def median
  agg(Polars.all.median)
end

#minDataFrame

Reduce the groups to the minimal value.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 2, 3, 4, 5],
    "b" => [0.5, 0.5, 4, 10, 13, 14],
    "c" => [true, true, true, false, false, true],
    "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"],
  }
)
df.group_by("d", maintain_order: true).min
# =>
# shape: (3, 4)
# ┌────────┬─────┬──────┬───────┐
# │ d      ┆ a   ┆ b    ┆ c     │
# │ ---    ┆ --- ┆ ---  ┆ ---   │
# │ str    ┆ i64 ┆ f64  ┆ bool  │
# ╞════════╪═════╪══════╪═══════╡
# │ Apple  ┆ 1   ┆ 0.5  ┆ false │
# │ Orange ┆ 2   ┆ 0.5  ┆ true  │
# │ Banana ┆ 4   ┆ 13.0 ┆ false │
# └────────┴─────┴──────┴───────┘

Returns:



553
554
555
# File 'lib/polars/group_by.rb', line 553

def min
  agg(Polars.all.min)
end

#n_uniqueDataFrame

Count the unique values per group.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 1, 3, 4, 5],
    "b" => [0.5, 0.5, 0.5, 10, 13, 14],
    "d" => ["Apple", "Banana", "Apple", "Apple", "Banana", "Banana"]
  }
)
df.group_by("d", maintain_order: true).n_unique
# =>
# shape: (2, 3)
# ┌────────┬─────┬─────┐
# │ d      ┆ a   ┆ b   │
# │ ---    ┆ --- ┆ --- │
# │ str    ┆ u32 ┆ u32 │
# ╞════════╪═════╪═════╡
# │ Apple  ┆ 2   ┆ 2   │
# │ Banana ┆ 3   ┆ 3   │
# └────────┴─────┴─────┘

Returns:



667
668
669
# File 'lib/polars/group_by.rb', line 667

def n_unique
  agg(Polars.all.n_unique)
end

#quantile(quantile, interpolation: "nearest") ⇒ DataFrame

Compute the quantile per group.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 2, 3, 4, 5],
    "b" => [0.5, 0.5, 4, 10, 13, 14],
    "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
  }
)
df.group_by("d", maintain_order: true).quantile(1)
# =>
# shape: (3, 3)
# ┌────────┬─────┬──────┐
# │ d      ┆ a   ┆ b    │
# │ ---    ┆ --- ┆ ---  │
# │ str    ┆ f64 ┆ f64  │
# ╞════════╪═════╪══════╡
# │ Apple  ┆ 3.0 ┆ 10.0 │
# │ Orange ┆ 2.0 ┆ 0.5  │
# │ Banana ┆ 5.0 ┆ 14.0 │
# └────────┴─────┴──────┘

Parameters:

  • quantile (Float)

    Quantile between 0.0 and 1.0.

  • interpolation ("nearest", "higher", "lower", "midpoint", "linear") (defaults to: "nearest")

    Interpolation method.

Returns:



700
701
702
# File 'lib/polars/group_by.rb', line 700

def quantile(quantile, interpolation: "nearest")
  agg(Polars.all.quantile(quantile, interpolation: interpolation))
end

#sumDataFrame

Reduce the groups to the sum.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 2, 3, 4, 5],
    "b" => [0.5, 0.5, 4, 10, 13, 14],
    "c" => [true, true, true, false, false, true],
    "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
  }
)
df.group_by("d", maintain_order: true).sum
# =>
# shape: (3, 4)
# ┌────────┬─────┬──────┬─────┐
# │ d      ┆ a   ┆ b    ┆ c   │
# │ ---    ┆ --- ┆ ---  ┆ --- │
# │ str    ┆ i64 ┆ f64  ┆ u32 │
# ╞════════╪═════╪══════╪═════╡
# │ Apple  ┆ 6   ┆ 14.5 ┆ 2   │
# │ Orange ┆ 2   ┆ 0.5  ┆ 1   │
# │ Banana ┆ 9   ┆ 27.0 ┆ 1   │
# └────────┴─────┴──────┴─────┘

Returns:



524
525
526
# File 'lib/polars/group_by.rb', line 524

def sum
  agg(Polars.all.sum)
end

#tail(n = 5) ⇒ DataFrame

Get the last n rows of each group.

Examples:

df = Polars::DataFrame.new(
  {
    "letters" => ["c", "c", "a", "c", "a", "b"],
    "nrs" => [1, 2, 3, 4, 5, 6]
  }
)
# =>
# shape: (6, 2)
# ┌─────────┬─────┐
# │ letters ┆ nrs │
# │ ---     ┆ --- │
# │ str     ┆ i64 │
# ╞═════════╪═════╡
# │ c       ┆ 1   │
# │ c       ┆ 2   │
# │ a       ┆ 3   │
# │ c       ┆ 4   │
# │ a       ┆ 5   │
# │ b       ┆ 6   │
# └─────────┴─────┘
df.group_by("letters").tail(2).sort("letters")
# =>
# shape: (5, 2)
# ┌─────────┬─────┐
# │ letters ┆ nrs │
# │ ---     ┆ --- │
# │ str     ┆ i64 │
# ╞═════════╪═════╡
# │ a       ┆ 3   │
# │ a       ┆ 5   │
# │ b       ┆ 6   │
# │ c       ┆ 2   │
# │ c       ┆ 4   │
# └─────────┴─────┘

Parameters:

  • n (Integer) (defaults to: 5)

    Number of rows to return.

Returns:



365
366
367
# File 'lib/polars/group_by.rb', line 365

def tail(n = 5)
  _lgb.tail(n).collect(optimizations: QueryOptFlags._eager)
end