Class: Polars::Expr

Inherits:
Object
  • Object
show all
Defined in:
lib/polars/expr.rb

Overview

Expressions that can be used in various contexts.

Direct Known Subclasses

Selector

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.deserialize(source) ⇒ Expr

Note:

This function uses marshaling if the logical plan contains Ruby UDFs, and as such inherits the security implications. Deserializing can execute arbitrary code, so it should only be attempted on trusted data.

Note:

Serialization is not stable across Polars versions: a LazyFrame serialized in one Polars version may not be deserializable in another Polars version.

Read a serialized expression from a file.

Examples:

expr = Polars.col("foo").sum.over("bar")
bytes = expr.meta.serialize
Polars::Expr.deserialize(StringIO.new(bytes))
# => col("foo").sum().over([col("bar")])

Raises:

  • (Todo)


168
169
170
171
172
173
174
175
176
177
178
# File 'lib/polars/expr.rb', line 168

def self.deserialize(source)
  raise Todo unless RbExpr.respond_to?(:deserialize_binary)

  if Utils.pathlike?(source)
    source = Utils.normalize_filepath(source)
  end

  deserializer = RbExpr.method(:deserialize_binary)

  _from_rbexpr(deserializer.(source))
end

Instance Method Details

#!Expr Also known as: ~

Performs boolean not.



134
135
136
# File 'lib/polars/expr.rb', line 134

def !
  is_not
end

#!=(other) ⇒ Expr

Not equal.



113
114
115
# File 'lib/polars/expr.rb', line 113

def !=(other)
  wrap_expr(_rbexpr.neq(_to_expr(other)._rbexpr))
end

#%(other) ⇒ Expr

Returns the modulo.



77
78
79
# File 'lib/polars/expr.rb', line 77

def %(other)
  wrap_expr(_rbexpr % _to_rbexpr(other))
end

#&(other) ⇒ Expr

Bitwise AND.



33
34
35
36
# File 'lib/polars/expr.rb', line 33

def &(other)
  other = Utils.parse_into_expression(other)
  wrap_expr(_rbexpr.and_(other))
end

#*(other) ⇒ Expr

Performs multiplication.



63
64
65
# File 'lib/polars/expr.rb', line 63

def *(other)
  wrap_expr(_rbexpr * _to_rbexpr(other))
end

#**(power) ⇒ Expr

Raises to the power of exponent.



84
85
86
87
# File 'lib/polars/expr.rb', line 84

def **(power)
  exponent = Utils.parse_into_expression(power)
  wrap_expr(_rbexpr.pow(exponent))
end

#+(other) ⇒ Expr

Performs addition.



49
50
51
# File 'lib/polars/expr.rb', line 49

def +(other)
  wrap_expr(_rbexpr + _to_rbexpr(other))
end

#-(other) ⇒ Expr

Performs subtraction.



56
57
58
# File 'lib/polars/expr.rb', line 56

def -(other)
  wrap_expr(_rbexpr - _to_rbexpr(other))
end

#-@Expr

Performs negation.



142
143
144
# File 'lib/polars/expr.rb', line 142

def -@
  wrap_expr(_rbexpr.neg)
end

#/(other) ⇒ Expr

Performs division.



70
71
72
# File 'lib/polars/expr.rb', line 70

def /(other)
  wrap_expr(_rbexpr / _to_rbexpr(other))
end

#<(other) ⇒ Expr

Less than.



120
121
122
# File 'lib/polars/expr.rb', line 120

def <(other)
  wrap_expr(_rbexpr.lt(_to_expr(other)._rbexpr))
end

#<=(other) ⇒ Expr

Less than or equal.



99
100
101
# File 'lib/polars/expr.rb', line 99

def <=(other)
  wrap_expr(_rbexpr.lt_eq(_to_expr(other)._rbexpr))
end

#==(other) ⇒ Expr

Equal.



106
107
108
# File 'lib/polars/expr.rb', line 106

def ==(other)
  wrap_expr(_rbexpr.eq(_to_expr(other)._rbexpr))
end

#>(other) ⇒ Expr

Greater than.



127
128
129
# File 'lib/polars/expr.rb', line 127

def >(other)
  wrap_expr(_rbexpr.gt(_to_expr(other)._rbexpr))
end

#>=(other) ⇒ Expr

Greater than or equal.



92
93
94
# File 'lib/polars/expr.rb', line 92

def >=(other)
  wrap_expr(_rbexpr.gt_eq(_to_expr(other)._rbexpr))
end

#^(other) ⇒ Expr

Bitwise XOR.



25
26
27
28
# File 'lib/polars/expr.rb', line 25

def ^(other)
  other = Utils.parse_into_expression(other)
  wrap_expr(_rbexpr.xor_(other))
end

#absExpr

Compute absolute values.

Examples:

df = Polars::DataFrame.new(
  {
    "A" => [-1.0, 0.0, 1.0, 2.0]
  }
)
df.select(Polars.col("A").abs)
# =>
# shape: (4, 1)
# ┌─────┐
# │ A   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 1.0 │
# │ 0.0 │
# │ 1.0 │
# │ 2.0 │
# └─────┘


6637
6638
6639
# File 'lib/polars/expr.rb', line 6637

def abs
  wrap_expr(_rbexpr.abs)
end

#add(other) ⇒ Expr

Method equivalent of addition operator expr + other.

Examples:

df = Polars::DataFrame.new({"x" => [1, 2, 3, 4, 5]})
df.with_columns(
  Polars.col("x").add(2).alias("x+int"),
  Polars.col("x").add(Polars.col("x").cum_prod).alias("x+expr")
)
# =>
# shape: (5, 3)
# ┌─────┬───────┬────────┐
# │ x   ┆ x+int ┆ x+expr │
# │ --- ┆ ---   ┆ ---    │
# │ i64 ┆ i64   ┆ i64    │
# ╞═════╪═══════╪════════╡
# │ 1   ┆ 3     ┆ 2      │
# │ 2   ┆ 4     ┆ 4      │
# │ 3   ┆ 5     ┆ 9      │
# │ 4   ┆ 6     ┆ 28     │
# │ 5   ┆ 7     ┆ 125    │
# └─────┴───────┴────────┘
df = Polars::DataFrame.new(
  {"x" => ["a", "d", "g"], "y": ["b", "e", "h"], "z": ["c", "f", "i"]}
)
df.with_columns(Polars.col("x").add(Polars.col("y")).add(Polars.col("z")).alias("xyz"))
# =>
# shape: (3, 4)
# ┌─────┬─────┬─────┬─────┐
# │ x   ┆ y   ┆ z   ┆ xyz │
# │ --- ┆ --- ┆ --- ┆ --- │
# │ str ┆ str ┆ str ┆ str │
# ╞═════╪═════╪═════╪═════╡
# │ a   ┆ b   ┆ c   ┆ abc │
# │ d   ┆ e   ┆ f   ┆ def │
# │ g   ┆ h   ┆ i   ┆ ghi │
# └─────┴─────┴─────┴─────┘


4119
4120
4121
# File 'lib/polars/expr.rb', line 4119

def add(other)
  self + other
end

#agg_groupsExpr

Get the group indexes of the group by operation.

Should be used in aggregation context only.

Examples:

df = Polars::DataFrame.new(
  {
    "group" => [
      "one",
      "one",
      "one",
      "two",
      "two",
      "two"
    ],
    "value" => [94, 95, 96, 97, 97, 99]
  }
)
df.group_by("group", maintain_order: true).agg(Polars.col("value").agg_groups)
# =>
# shape: (2, 2)
# ┌───────┬───────────┐
# │ group ┆ value     │
# │ ---   ┆ ---       │
# │ str   ┆ list[u32] │
# ╞═══════╪═══════════╡
# │ one   ┆ [0, 1, 2] │
# │ two   ┆ [3, 4, 5] │
# └───────┴───────────┘


738
739
740
# File 'lib/polars/expr.rb', line 738

def agg_groups
  wrap_expr(_rbexpr.agg_groups)
end

#alias(name) ⇒ Expr

Rename the output of an expression.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 3],
    "b" => ["a", "b", nil]
  }
)
df.select(
  [
    Polars.col("a").alias("bar"),
    Polars.col("b").alias("foo")
  ]
)
# =>
# shape: (3, 2)
# ┌─────┬──────┐
# │ bar ┆ foo  │
# │ --- ┆ ---  │
# │ i64 ┆ str  │
# ╞═════╪══════╡
# │ 1   ┆ a    │
# │ 2   ┆ b    │
# │ 3   ┆ null │
# └─────┴──────┘


400
401
402
# File 'lib/polars/expr.rb', line 400

def alias(name)
  wrap_expr(_rbexpr.alias(name))
end

#all(ignore_nulls: true) ⇒ Boolean

Check if all boolean values in a Boolean column are true.

This method is an expression - not to be confused with Polars.all which is a function to select all columns.

Examples:

df = Polars::DataFrame.new(
  {"TT" => [true, true], "TF" => [true, false], "FF" => [false, false]}
)
df.select(Polars.col("*").all)
# =>
# shape: (1, 3)
# ┌──────┬───────┬───────┐
# │ TT   ┆ TF    ┆ FF    │
# │ ---  ┆ ---   ┆ ---   │
# │ bool ┆ bool  ┆ bool  │
# ╞══════╪═══════╪═══════╡
# │ true ┆ false ┆ false │
# └──────┴───────┴───────┘


251
252
253
# File 'lib/polars/expr.rb', line 251

def all(ignore_nulls: true)
  wrap_expr(_rbexpr.all(ignore_nulls))
end

#and_(*others) ⇒ Expr

Method equivalent of bitwise "and" operator expr & other & ....

Examples:

df = Polars::DataFrame.new(
  {
    "x" => [5, 6, 7, 4, 8],
    "y" => [1.5, 2.5, 1.0, 4.0, -5.75],
    "z" => [-9, 2, -1, 4, 8]
  }
)
df.select(
  (Polars.col("x") >= Polars.col("z"))
  .and_(
    Polars.col("y") >= Polars.col("z"),
    Polars.col("y") == Polars.col("y"),
    Polars.col("z") <= Polars.col("x"),
    Polars.col("y") != Polars.col("x"),
  )
  .alias("all")
)
# =>
# shape: (5, 1)
# ┌───────┐
# │ all   │
# │ ---   │
# │ bool  │
# ╞═══════╡
# │ true  │
# │ true  │
# │ true  │
# │ false │
# │ false │
# └───────┘


3756
3757
3758
# File 'lib/polars/expr.rb', line 3756

def and_(*others)
  ([self] + others).reduce(:&)
end

#any(ignore_nulls: true) ⇒ Boolean

Check if any boolean value in a Boolean column is true.

Examples:

df = Polars::DataFrame.new({"TF" => [true, false], "FF" => [false, false]})
df.select(Polars.all.any)
# =>
# shape: (1, 2)
# ┌──────┬───────┐
# │ TF   ┆ FF    │
# │ ---  ┆ ---   │
# │ bool ┆ bool  │
# ╞══════╪═══════╡
# │ true ┆ false │
# └──────┴───────┘


226
227
228
# File 'lib/polars/expr.rb', line 226

def any(ignore_nulls: true)
  wrap_expr(_rbexpr.any(ignore_nulls))
end

#append(other, upcast: true) ⇒ Expr

Append expressions.

This is done by adding the chunks of other to this Series.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [8, 9, 10],
    "b" => [nil, 4, 4]
  }
)
df.select(Polars.all.head(1).append(Polars.all.tail(1)))
# =>
# shape: (2, 2)
# ┌─────┬──────┐
# │ a   ┆ b    │
# │ --- ┆ ---  │
# │ i64 ┆ i64  │
# ╞═════╪══════╡
# │ 8   ┆ null │
# │ 10  ┆ 4    │
# └─────┴──────┘


850
851
852
853
# File 'lib/polars/expr.rb', line 850

def append(other, upcast: true)
  other = Utils.parse_into_expression(other)
  wrap_expr(_rbexpr.append(other, upcast))
end

#approx_n_uniqueExpr

Approx count unique values.

This is done using the HyperLogLog++ algorithm for cardinality estimation.

Examples:

df = Polars::DataFrame.new({"a" => [1, 1, 2]})
df.select(Polars.col("a").approx_n_unique)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 2   │
# └─────┘


2515
2516
2517
# File 'lib/polars/expr.rb', line 2515

def approx_n_unique
  wrap_expr(_rbexpr.approx_n_unique)
end

#arccosExpr

Compute the element-wise value for the inverse cosine.

Examples:

df = Polars::DataFrame.new({"a" => [0.0]})
df.select(Polars.col("a").arccos)
# =>
# shape: (1, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 1.570796 │
# └──────────┘


7058
7059
7060
# File 'lib/polars/expr.rb', line 7058

def arccos
  wrap_expr(_rbexpr.arccos)
end

#arccoshExpr

Compute the element-wise value for the inverse hyperbolic cosine.

Examples:

df = Polars::DataFrame.new({"a" => [1.0]})
df.select(Polars.col("a").arccosh)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 0.0 │
# └─────┘


7178
7179
7180
# File 'lib/polars/expr.rb', line 7178

def arccosh
  wrap_expr(_rbexpr.arccosh)
end

#arcsinExpr

Compute the element-wise value for the inverse sine.

Examples:

df = Polars::DataFrame.new({"a" => [1.0]})
df.select(Polars.col("a").arcsin)
# =>
# shape: (1, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 1.570796 │
# └──────────┘


7038
7039
7040
# File 'lib/polars/expr.rb', line 7038

def arcsin
  wrap_expr(_rbexpr.arcsin)
end

#arcsinhExpr

Compute the element-wise value for the inverse hyperbolic sine.

Examples:

df = Polars::DataFrame.new({"a" => [1.0]})
df.select(Polars.col("a").arcsinh)
# =>
# shape: (1, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 0.881374 │
# └──────────┘


7158
7159
7160
# File 'lib/polars/expr.rb', line 7158

def arcsinh
  wrap_expr(_rbexpr.arcsinh)
end

#arctanExpr

Compute the element-wise value for the inverse tangent.

Examples:

df = Polars::DataFrame.new({"a" => [1.0]})
df.select(Polars.col("a").arctan)
# =>
# shape: (1, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 0.785398 │
# └──────────┘


7078
7079
7080
# File 'lib/polars/expr.rb', line 7078

def arctan
  wrap_expr(_rbexpr.arctan)
end

#arctanhExpr

Compute the element-wise value for the inverse hyperbolic tangent.

Examples:

df = Polars::DataFrame.new({"a" => [1.0]})
df.select(Polars.col("a").arctanh)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ inf │
# └─────┘


7198
7199
7200
# File 'lib/polars/expr.rb', line 7198

def arctanh
  wrap_expr(_rbexpr.arctanh)
end

#arg_maxExpr

Get the index of the maximal value.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [20, 10, 30]
  }
)
df.select(Polars.col("a").arg_max)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 2   │
# └─────┘


1725
1726
1727
# File 'lib/polars/expr.rb', line 1725

def arg_max
  wrap_expr(_rbexpr.arg_max)
end

#arg_minExpr

Get the index of the minimal value.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [20, 10, 30]
  }
)
df.select(Polars.col("a").arg_min)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 1   │
# └─────┘


1749
1750
1751
# File 'lib/polars/expr.rb', line 1749

def arg_min
  wrap_expr(_rbexpr.arg_min)
end

#arg_sort(descending: false, nulls_last: false) ⇒ Expr

Get the index values that would sort this column.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [20, 10, 30]
  }
)
df.select(Polars.col("a").arg_sort)
# =>
# shape: (3, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 1   │
# │ 0   │
# │ 2   │
# └─────┘


1701
1702
1703
# File 'lib/polars/expr.rb', line 1701

def arg_sort(descending: false, nulls_last: false)
  wrap_expr(_rbexpr.arg_sort(descending, nulls_last))
end

#arg_trueExpr

Note:

Modifies number of rows returned, so will fail in combination with other expressions. Use as only expression in select / with_columns.

Return indices where expression evaluates true.

Examples:

df = Polars::DataFrame.new({"a" => [1, 1, 2, 1]})
df.select((Polars.col("a") == 1).arg_true)
# =>
# shape: (3, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 0   │
# │ 1   │
# │ 3   │
# └─────┘


277
278
279
# File 'lib/polars/expr.rb', line 277

def arg_true
  wrap_expr(Plr.arg_where(_rbexpr))
end

#arg_uniqueExpr

Get index of first unique value.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [8, 9, 10],
    "b" => [nil, 4, 4]
  }
)
df.select(Polars.col("a").arg_unique)
# =>
# shape: (3, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 0   │
# │ 1   │
# │ 2   │
# └─────┘
df.select(Polars.col("b").arg_unique)
# =>
# shape: (2, 1)
# ┌─────┐
# │ b   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 0   │
# │ 1   │
# └─────┘


2606
2607
2608
# File 'lib/polars/expr.rb', line 2606

def arg_unique
  wrap_expr(_rbexpr.arg_unique)
end

#arrArrayExpr

Create an object namespace of all array related methods.



8412
8413
8414
# File 'lib/polars/expr.rb', line 8412

def arr
  ArrayExpr.new(self)
end

#backward_fill(limit: nil) ⇒ Expr

Fill missing values with the next to be seen values.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, nil],
    "b" => [4, nil, 6]
  }
)
df.select(Polars.all.backward_fill)
# =>
# shape: (3, 2)
# ┌──────┬─────┐
# │ a    ┆ b   │
# │ ---  ┆ --- │
# │ i64  ┆ i64 │
# ╞══════╪═════╡
# │ 1    ┆ 4   │
# │ 2    ┆ 6   │
# │ null ┆ 6   │
# └──────┴─────┘


2163
2164
2165
# File 'lib/polars/expr.rb', line 2163

def backward_fill(limit: nil)
  fill_null(strategy: "backward", limit: limit)
end

#binBinaryExpr

Create an object namespace of all binary related methods.



8419
8420
8421
# File 'lib/polars/expr.rb', line 8419

def bin
  BinaryExpr.new(self)
end

#bitwise_andExpr

Perform an aggregation of bitwise ANDs.

Examples:

df = Polars::DataFrame.new({"n" => [-1, 0, 1]})
df.select(Polars.col("n").bitwise_and)
# =>
# shape: (1, 1)
# ┌─────┐
# │ n   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 0   │
# └─────┘
df = Polars::DataFrame.new(
  {"grouper" => ["a", "a", "a", "b", "b"], "n" => [-1, 0, 1, -1, 1]}
)
df.group_by("grouper", maintain_order: true).agg(Polars.col("n").bitwise_and)
# =>
# shape: (2, 2)
# ┌─────────┬─────┐
# │ grouper ┆ n   │
# │ ---     ┆ --- │
# │ str     ┆ i64 │
# ╞═════════╪═════╡
# │ a       ┆ 0   │
# │ b       ┆ 1   │
# └─────────┴─────┘


8326
8327
8328
# File 'lib/polars/expr.rb', line 8326

def bitwise_and
  wrap_expr(_rbexpr.bitwise_and)
end

#bitwise_count_onesExpr

Evaluate the number of set bits.



8255
8256
8257
# File 'lib/polars/expr.rb', line 8255

def bitwise_count_ones
  wrap_expr(_rbexpr.bitwise_count_ones)
end

#bitwise_count_zerosExpr

Evaluate the number of unset bits.



8262
8263
8264
# File 'lib/polars/expr.rb', line 8262

def bitwise_count_zeros
  wrap_expr(_rbexpr.bitwise_count_zeros)
end

#bitwise_leading_onesExpr

Evaluate the number most-significant set bits before seeing an unset bit.



8269
8270
8271
# File 'lib/polars/expr.rb', line 8269

def bitwise_leading_ones
  wrap_expr(_rbexpr.bitwise_leading_ones)
end

#bitwise_leading_zerosExpr

Evaluate the number most-significant unset bits before seeing a set bit.



8276
8277
8278
# File 'lib/polars/expr.rb', line 8276

def bitwise_leading_zeros
  wrap_expr(_rbexpr.bitwise_leading_zeros)
end

#bitwise_orExpr

Perform an aggregation of bitwise ORs.

Examples:

df = Polars::DataFrame.new({"n" => [-1, 0, 1]})
df.select(Polars.col("n").bitwise_or)
# =>
# shape: (1, 1)
# ┌─────┐
# │ n   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ -1  │
# └─────┘
df = Polars::DataFrame.new(
  {"grouper" => ["a", "a", "a", "b", "b"], "n" => [-1, 0, 1, -1, 1]}
)
df.group_by("grouper", maintain_order: true).agg(Polars.col("n").bitwise_or)
# =>
# shape: (2, 2)
# ┌─────────┬─────┐
# │ grouper ┆ n   │
# │ ---     ┆ --- │
# │ str     ┆ i64 │
# ╞═════════╪═════╡
# │ a       ┆ -1  │
# │ b       ┆ -1  │
# └─────────┴─────┘


8362
8363
8364
# File 'lib/polars/expr.rb', line 8362

def bitwise_or
  wrap_expr(_rbexpr.bitwise_or)
end

#bitwise_trailing_onesExpr

Evaluate the number least-significant set bits before seeing an unset bit.



8283
8284
8285
# File 'lib/polars/expr.rb', line 8283

def bitwise_trailing_ones
  wrap_expr(_rbexpr.bitwise_trailing_ones)
end

#bitwise_trailing_zerosExpr

Evaluate the number least-significant unset bits before seeing a set bit.



8290
8291
8292
# File 'lib/polars/expr.rb', line 8290

def bitwise_trailing_zeros
  wrap_expr(_rbexpr.bitwise_trailing_zeros)
end

#bitwise_xorExpr

Perform an aggregation of bitwise XORs.

Examples:

df = Polars::DataFrame.new({"n" => [-1, 0, 1]})
df.select(Polars.col("n").bitwise_xor)
# =>
# shape: (1, 1)
# ┌─────┐
# │ n   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ -2  │
# └─────┘
df = Polars::DataFrame.new(
  {"grouper" => ["a", "a", "a", "b", "b"], "n" => [-1, 0, 1, -1, 1]}
)
df.group_by("grouper", maintain_order: true).agg(Polars.col("n").bitwise_xor)
# =>
# shape: (2, 2)
# ┌─────────┬─────┐
# │ grouper ┆ n   │
# │ ---     ┆ --- │
# │ str     ┆ i64 │
# ╞═════════╪═════╡
# │ a       ┆ -2  │
# │ b       ┆ -2  │
# └─────────┴─────┘


8398
8399
8400
# File 'lib/polars/expr.rb', line 8398

def bitwise_xor
  wrap_expr(_rbexpr.bitwise_xor)
end

#bottom_k(k: 5) ⇒ Expr

Return the k smallest elements.

If 'reverse: true` the smallest elements will be given.

Examples:

df = Polars::DataFrame.new(
  {
    "value" => [1, 98, 2, 3, 99, 4]
  }
)
df.select(
  [
    Polars.col("value").top_k.alias("top_k"),
    Polars.col("value").bottom_k.alias("bottom_k")
  ]
)
# =>
# shape: (5, 2)
# ┌───────┬──────────┐
# │ top_k ┆ bottom_k │
# │ ---   ┆ ---      │
# │ i64   ┆ i64      │
# ╞═══════╪══════════╡
# │ 99    ┆ 1        │
# │ 98    ┆ 2        │
# │ 4     ┆ 3        │
# │ 3     ┆ 4        │
# │ 2     ┆ 98       │
# └───────┴──────────┘


1563
1564
1565
1566
# File 'lib/polars/expr.rb', line 1563

def bottom_k(k: 5)
  k = Utils.parse_into_expression(k)
  wrap_expr(_rbexpr.bottom_k(k))
end

#bottom_k_by(by, k: 5, reverse: false) ⇒ Expr

Return the elements corresponding to the k smallest elements of the by column(s).

Non-null elements are always preferred over null elements, regardless of the value of reverse. The output is not guaranteed to be in any particular order, call :func:sort after this function if you wish the output to be sorted.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 3, 4, 5, 6],
    "b" => [6, 5, 4, 3, 2, 1],
    "c" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"],
  }
)
# =>
# shape: (6, 3)
# ┌─────┬─────┬────────┐
# │ a   ┆ b   ┆ c      │
# │ --- ┆ --- ┆ ---    │
# │ i64 ┆ i64 ┆ str    │
# ╞═════╪═════╪════════╡
# │ 1   ┆ 6   ┆ Apple  │
# │ 2   ┆ 5   ┆ Orange │
# │ 3   ┆ 4   ┆ Apple  │
# │ 4   ┆ 3   ┆ Apple  │
# │ 5   ┆ 2   ┆ Banana │
# │ 6   ┆ 1   ┆ Banana │
# └─────┴─────┴────────┘

Get the bottom 2 rows by column a or b.

df.select(
  Polars.all.bottom_k_by("a", k: 2).name.suffix("_btm_by_a"),
  Polars.all.bottom_k_by("b", k: 2).name.suffix("_btm_by_b")
)
# =>
# shape: (2, 6)
# ┌────────────┬────────────┬────────────┬────────────┬────────────┬────────────┐
# │ a_btm_by_a ┆ b_btm_by_a ┆ c_btm_by_a ┆ a_btm_by_b ┆ b_btm_by_b ┆ c_btm_by_b │
# │ ---        ┆ ---        ┆ ---        ┆ ---        ┆ ---        ┆ ---        │
# │ i64        ┆ i64        ┆ str        ┆ i64        ┆ i64        ┆ str        │
# ╞════════════╪════════════╪════════════╪════════════╪════════════╪════════════╡
# │ 1          ┆ 6          ┆ Apple      ┆ 6          ┆ 1          ┆ Banana     │
# │ 2          ┆ 5          ┆ Orange     ┆ 5          ┆ 2          ┆ Banana     │
# └────────────┴────────────┴────────────┴────────────┴────────────┴────────────┘

Get the bottom 2 rows by multiple columns with given order.

df.select(
  Polars.all
  .bottom_k_by(["c", "a"], k: 2, reverse: [false, true])
  .name.suffix("_by_ca"),
  Polars.all
  .bottom_k_by(["c", "b"], k: 2, reverse: [false, true])
  .name.suffix("_by_cb"),
)
# =>
# shape: (2, 6)
# ┌─────────┬─────────┬─────────┬─────────┬─────────┬─────────┐
# │ a_by_ca ┆ b_by_ca ┆ c_by_ca ┆ a_by_cb ┆ b_by_cb ┆ c_by_cb │
# │ ---     ┆ ---     ┆ ---     ┆ ---     ┆ ---     ┆ ---     │
# │ i64     ┆ i64     ┆ str     ┆ i64     ┆ i64     ┆ str     │
# ╞═════════╪═════════╪═════════╪═════════╪═════════╪═════════╡
# │ 4       ┆ 3       ┆ Apple   ┆ 1       ┆ 6       ┆ Apple   │
# │ 3       ┆ 4       ┆ Apple   ┆ 3       ┆ 4       ┆ Apple   │
# └─────────┴─────────┴─────────┴─────────┴─────────┴─────────┘

Get the bottom 2 rows by column a in each group.

df.group_by("c", maintain_order: true)
  .agg(Polars.all.bottom_k_by("a", k: 2))
  .explode(Polars.all.exclude("c"))
# =>
# shape: (5, 3)
# ┌────────┬─────┬─────┐
# │ c      ┆ a   ┆ b   │
# │ ---    ┆ --- ┆ --- │
# │ str    ┆ i64 ┆ i64 │
# ╞════════╪═════╪═════╡
# │ Apple  ┆ 1   ┆ 6   │
# │ Apple  ┆ 3   ┆ 4   │
# │ Orange ┆ 2   ┆ 5   │
# │ Banana ┆ 5   ┆ 2   │
# │ Banana ┆ 6   ┆ 1   │
# └────────┴─────┴─────┘


1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
# File 'lib/polars/expr.rb', line 1663

def bottom_k_by(
  by,
  k: 5,
  reverse: false
)
  k = Utils.parse_into_expression(k)
  by = Utils.parse_into_list_of_expressions(by)
  reverse = Utils.extend_bool(reverse, by.length, "reverse", "by")
  wrap_expr(_rbexpr.bottom_k_by(by, k, reverse))
end

#cast(dtype, strict: true, wrap_numerical: false) ⇒ Expr

Cast between data types.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 3],
    "b" => ["4", "5", "6"]
  }
)
df.with_columns(
  [
    Polars.col("a").cast(Polars::Float64),
    Polars.col("b").cast(Polars::Int32)
  ]
)
# =>
# shape: (3, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ f64 ┆ i32 │
# ╞═════╪═════╡
# │ 1.0 ┆ 4   │
# │ 2.0 ┆ 5   │
# │ 3.0 ┆ 6   │
# └─────┴─────┘


1304
1305
1306
1307
# File 'lib/polars/expr.rb', line 1304

def cast(dtype, strict: true, wrap_numerical: false)
  dtype = Utils.parse_into_datatype_expr(dtype)
  wrap_expr(_rbexpr.cast(dtype._rbdatatype_expr, strict, wrap_numerical))
end

#catCatExpr

Create an object namespace of all categorical related methods.



8426
8427
8428
# File 'lib/polars/expr.rb', line 8426

def cat
  CatExpr.new(self)
end

#cbrtExpr

Compute the cube root of the elements.

Examples:

df = Polars::DataFrame.new({"values" => [1.0, 2.0, 4.0]})
df.select(Polars.col("values").cbrt)
# =>
# shape: (3, 1)
# ┌──────────┐
# │ values   │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 1.0      │
# │ 1.259921 │
# │ 1.587401 │
# └──────────┘


321
322
323
# File 'lib/polars/expr.rb', line 321

def cbrt
  wrap_expr(_rbexpr.cbrt)
end

#ceilExpr

Rounds up to the nearest integer value.

Only works on floating point Series.

Examples:

df = Polars::DataFrame.new({"a" => [0.3, 0.5, 1.0, 1.1]})
df.select(Polars.col("a").ceil)
# =>
# shape: (4, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 1.0 │
# │ 1.0 │
# │ 1.0 │
# │ 2.0 │
# └─────┘


1145
1146
1147
# File 'lib/polars/expr.rb', line 1145

def ceil
  wrap_expr(_rbexpr.ceil)
end

#clip(lower_bound = nil, upper_bound = nil) ⇒ Expr

Set values outside the given boundaries to the boundary value.

Only works for numeric and temporal columns. If you want to clip other data types, consider writing a when-then-otherwise expression.

Examples:

df = Polars::DataFrame.new({"foo" => [-50, 5, nil, 50]})
df.with_columns(Polars.col("foo").clip(1, 10).alias("foo_clipped"))
# =>
# shape: (4, 2)
# ┌──────┬─────────────┐
# │ foo  ┆ foo_clipped │
# │ ---  ┆ ---         │
# │ i64  ┆ i64         │
# ╞══════╪═════════════╡
# │ -50  ┆ 1           │
# │ 5    ┆ 5           │
# │ null ┆ null        │
# │ 50   ┆ 10          │
# └──────┴─────────────┘


6862
6863
6864
6865
6866
6867
6868
6869
6870
# File 'lib/polars/expr.rb', line 6862

def clip(lower_bound = nil, upper_bound = nil)
  if !lower_bound.nil?
    lower_bound = Utils.parse_into_expression(lower_bound)
  end
  if !upper_bound.nil?
    upper_bound = Utils.parse_into_expression(upper_bound)
  end
  wrap_expr(_rbexpr.clip(lower_bound, upper_bound))
end

#cosExpr

Compute the element-wise value for the cosine.

Examples:

df = Polars::DataFrame.new({"a" => [0.0]})
df.select(Polars.col("a").cos)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 1.0 │
# └─────┘


6978
6979
6980
# File 'lib/polars/expr.rb', line 6978

def cos
  wrap_expr(_rbexpr.cos)
end

#coshExpr

Compute the element-wise value for the hyperbolic cosine.

Examples:

df = Polars::DataFrame.new({"a" => [1.0]})
df.select(Polars.col("a").cosh)
# =>
# shape: (1, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 1.543081 │
# └──────────┘


7118
7119
7120
# File 'lib/polars/expr.rb', line 7118

def cosh
  wrap_expr(_rbexpr.cosh)
end

#cotExpr

Compute the element-wise value for the cotangent.

Examples:

df = Polars::DataFrame.new({"a" => [1.0]})
df.select(Polars.col("a").cot.round(2))
# =>
# shape: (1, 1)
# ┌──────┐
# │ a    │
# │ ---  │
# │ f64  │
# ╞══════╡
# │ 0.64 │
# └──────┘


7018
7019
7020
# File 'lib/polars/expr.rb', line 7018

def cot
  wrap_expr(_rbexpr.cot)
end

#countExpr

Count the number of values in this expression.

Examples:

df = Polars::DataFrame.new({"a" => [8, 9, 10], "b" => [nil, 4, 4]})
df.select(Polars.all.count)
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ u32 ┆ u32 │
# ╞═════╪═════╡
# │ 3   ┆ 2   │
# └─────┴─────┘


758
759
760
# File 'lib/polars/expr.rb', line 758

def count
  wrap_expr(_rbexpr.count)
end

#cum_count(reverse: false) ⇒ Expr

Get an array with the cumulative count computed at every element.

Counting from 0 to len

Examples:

df = Polars::DataFrame.new({"a" => ["x", "k", nil, "d"]})
df.with_columns(
  [
    Polars.col("a").cum_count.alias("cum_count"),
    Polars.col("a").cum_count(reverse: true).alias("cum_count_reverse")
  ]
)
# =>
# shape: (4, 3)
# ┌──────┬───────────┬───────────────────┐
# │ a    ┆ cum_count ┆ cum_count_reverse │
# │ ---  ┆ ---       ┆ ---               │
# │ str  ┆ u32       ┆ u32               │
# ╞══════╪═══════════╪═══════════════════╡
# │ x    ┆ 1         ┆ 3                 │
# │ k    ┆ 2         ┆ 2                 │
# │ null ┆ 2         ┆ 1                 │
# │ d    ┆ 3         ┆ 1                 │
# └──────┴───────────┴───────────────────┘


1095
1096
1097
# File 'lib/polars/expr.rb', line 1095

def cum_count(reverse: false)
  wrap_expr(_rbexpr.cum_count(reverse))
end

#cum_max(reverse: false) ⇒ Expr

Get an array with the cumulative max computed at every element.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
df.select(
  [
    Polars.col("a").cum_max,
    Polars.col("a").cum_max(reverse: true).alias("a_reverse")
  ]
)
# =>
# shape: (4, 2)
# ┌─────┬───────────┐
# │ a   ┆ a_reverse │
# │ --- ┆ ---       │
# │ i64 ┆ i64       │
# ╞═════╪═══════════╡
# │ 1   ┆ 4         │
# │ 2   ┆ 4         │
# │ 3   ┆ 4         │
# │ 4   ┆ 4         │
# └─────┴───────────┘


1062
1063
1064
# File 'lib/polars/expr.rb', line 1062

def cum_max(reverse: false)
  wrap_expr(_rbexpr.cum_max(reverse))
end

#cum_min(reverse: false) ⇒ Expr

Get an array with the cumulative min computed at every element.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
df.select(
  [
    Polars.col("a").cum_min,
    Polars.col("a").cum_min(reverse: true).alias("a_reverse")
  ]
)
# =>
# shape: (4, 2)
# ┌─────┬───────────┐
# │ a   ┆ a_reverse │
# │ --- ┆ ---       │
# │ i64 ┆ i64       │
# ╞═════╪═══════════╡
# │ 1   ┆ 1         │
# │ 1   ┆ 2         │
# │ 1   ┆ 3         │
# │ 1   ┆ 4         │
# └─────┴───────────┘


1031
1032
1033
# File 'lib/polars/expr.rb', line 1031

def cum_min(reverse: false)
  wrap_expr(_rbexpr.cum_min(reverse))
end

#cum_prod(reverse: false) ⇒ Expr

Note:

Dtypes in \{Int8, UInt8, Int16, UInt16} are cast to Int64 before summing to prevent overflow issues.

Get an array with the cumulative product computed at every element.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
df.select(
  [
    Polars.col("a").cum_prod,
    Polars.col("a").cum_prod(reverse: true).alias("a_reverse")
  ]
)
# =>
# shape: (4, 2)
# ┌─────┬───────────┐
# │ a   ┆ a_reverse │
# │ --- ┆ ---       │
# │ i64 ┆ i64       │
# ╞═════╪═══════════╡
# │ 1   ┆ 24        │
# │ 2   ┆ 24        │
# │ 6   ┆ 12        │
# │ 24  ┆ 4         │
# └─────┴───────────┘


1000
1001
1002
# File 'lib/polars/expr.rb', line 1000

def cum_prod(reverse: false)
  wrap_expr(_rbexpr.cum_prod(reverse))
end

#cum_sum(reverse: false) ⇒ Expr

Note:

Dtypes in \{Int8, UInt8, Int16, UInt16} are cast to Int64 before summing to prevent overflow issues.

Get an array with the cumulative sum computed at every element.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
df.select(
  [
    Polars.col("a").cum_sum,
    Polars.col("a").cum_sum(reverse: true).alias("a_reverse")
  ]
)
# =>
# shape: (4, 2)
# ┌─────┬───────────┐
# │ a   ┆ a_reverse │
# │ --- ┆ ---       │
# │ i64 ┆ i64       │
# ╞═════╪═══════════╡
# │ 1   ┆ 10        │
# │ 3   ┆ 9         │
# │ 6   ┆ 7         │
# │ 10  ┆ 4         │
# └─────┴───────────┘


965
966
967
# File 'lib/polars/expr.rb', line 965

def cum_sum(reverse: false)
  wrap_expr(_rbexpr.cum_sum(reverse))
end

#cumulative_eval(expr, min_samples: 1) ⇒ Expr

Note:

This functionality is experimental and may change without it being considered a breaking change.

Note:

This can be really slow as it can have O(n^2) complexity. Don't use this for operations that visit all elements.

Run an expression over a sliding window that increases 1 slot every iteration.

Examples:

df = Polars::DataFrame.new({"values" => [1, 2, 3, 4, 5]})
df.select(
  [
    Polars.col("values").cumulative_eval(
      Polars.element.first - Polars.element.last ** 2
    )
  ]
)
# =>
# shape: (5, 1)
# ┌────────┐
# │ values │
# │ ---    │
# │ i64    │
# ╞════════╡
# │ 0      │
# │ -3     │
# │ -8     │
# │ -15    │
# │ -24    │
# └────────┘


7814
7815
7816
7817
7818
# File 'lib/polars/expr.rb', line 7814

def cumulative_eval(expr, min_samples: 1)
  wrap_expr(
    _rbexpr.cumulative_eval(expr._rbexpr, min_samples)
  )
end

#cut(breaks, labels: nil, left_closed: false, include_breaks: false) ⇒ Expr

Bin continuous values into discrete categories.

Examples:

Divide a column into three categories.

df = Polars::DataFrame.new({"foo" => [-2, -1, 0, 1, 2]})
df.with_columns(
  Polars.col("foo").cut([-1, 1], labels: ["a", "b", "c"]).alias("cut")
)
# =>
# shape: (5, 2)
# ┌─────┬─────┐
# │ foo ┆ cut │
# │ --- ┆ --- │
# │ i64 ┆ cat │
# ╞═════╪═════╡
# │ -2  ┆ a   │
# │ -1  ┆ a   │
# │ 0   ┆ b   │
# │ 1   ┆ b   │
# │ 2   ┆ c   │
# └─────┴─────┘

Add both the category and the breakpoint.

df.with_columns(
  Polars.col("foo").cut([-1, 1], include_breaks: true).alias("cut")
).unnest("cut")
# =>
# shape: (5, 3)
# ┌─────┬────────────┬────────────┐
# │ foo ┆ breakpoint ┆ category   │
# │ --- ┆ ---        ┆ ---        │
# │ i64 ┆ f64        ┆ cat        │
# ╞═════╪════════════╪════════════╡
# │ -2  ┆ -1.0       ┆ (-inf, -1] │
# │ -1  ┆ -1.0       ┆ (-inf, -1] │
# │ 0   ┆ 1.0        ┆ (-1, 1]    │
# │ 1   ┆ 1.0        ┆ (-1, 1]    │
# │ 2   ┆ inf        ┆ (1, inf]   │
# └─────┴────────────┴────────────┘


3227
3228
3229
# File 'lib/polars/expr.rb', line 3227

def cut(breaks, labels: nil, left_closed: false, include_breaks: false)
  wrap_expr(_rbexpr.cut(breaks, labels, left_closed, include_breaks))
end

#degreesExpr

Convert from radians to degrees.

Examples:

df = Polars::DataFrame.new({"a" => (-4...5).map { |x| x * Math::PI }})
df.select(Polars.col("a").degrees)
# =>
# shape: (9, 1)
# ┌────────┐
# │ a      │
# │ ---    │
# │ f64    │
# ╞════════╡
# │ -720.0 │
# │ -540.0 │
# │ -360.0 │
# │ -180.0 │
# │ 0.0    │
# │ 180.0  │
# │ 360.0  │
# │ 540.0  │
# │ 720.0  │
# └────────┘


7226
7227
7228
# File 'lib/polars/expr.rb', line 7226

def degrees
  wrap_expr(_rbexpr.degrees)
end

#diff(n: 1, null_behavior: "ignore") ⇒ Expr

Calculate the n-th discrete difference.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [20, 10, 30]
  }
)
df.select(Polars.col("a").diff)
# =>
# shape: (3, 1)
# ┌──────┐
# │ a    │
# │ ---  │
# │ i64  │
# ╞══════╡
# │ null │
# │ -10  │
# │ 20   │
# └──────┘


6732
6733
6734
6735
# File 'lib/polars/expr.rb', line 6732

def diff(n: 1, null_behavior: "ignore")
  n = Utils.parse_into_expression(n)
  wrap_expr(_rbexpr.diff(n, null_behavior))
end

#dot(other) ⇒ Expr

Compute the dot/inner product between two Expressions.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 3, 5],
    "b" => [2, 4, 6]
  }
)
df.select(Polars.col("a").dot(Polars.col("b")))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 44  │
# └─────┘


1231
1232
1233
1234
# File 'lib/polars/expr.rb', line 1231

def dot(other)
  other = Utils.parse_into_expression(other, str_as_lit: false)
  wrap_expr(_rbexpr.dot(other))
end

#drop_nansExpr

Drop floating point NaN values.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [8, 9, 10, 11],
    "b" => [nil, 4.0, 4.0, Float::NAN]
  }
)
df.select(Polars.col("b").drop_nans)
# =>
# shape: (3, 1)
# ┌──────┐
# │ b    │
# │ ---  │
# │ f64  │
# ╞══════╡
# │ null │
# │ 4.0  │
# │ 4.0  │
# └──────┘


930
931
932
# File 'lib/polars/expr.rb', line 930

def drop_nans
  wrap_expr(_rbexpr.drop_nans)
end

#drop_nullsExpr

Drop null values.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [8, 9, 10, 11],
    "b" => [nil, 4.0, 4.0, Float::NAN]
  }
)
df.select(Polars.col("b").drop_nulls)
# =>
# shape: (3, 1)
# ┌─────┐
# │ b   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 4.0 │
# │ 4.0 │
# │ NaN │
# └─────┘


903
904
905
# File 'lib/polars/expr.rb', line 903

def drop_nulls
  wrap_expr(_rbexpr.drop_nulls)
end

#dtDateTimeExpr

Create an object namespace of all datetime related methods.



8433
8434
8435
# File 'lib/polars/expr.rb', line 8433

def dt
  DateTimeExpr.new(self)
end

#entropy(base: nil, normalize: true) ⇒ Expr

Computes the entropy.

Uses the formula -sum(pk * log(pk) where pk are discrete probabilities.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3]})
df.select(Polars.col("a").entropy(base: 2))
# =>
# shape: (1, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 1.459148 │
# └──────────┘
df.select(Polars.col("a").entropy(base: 2, normalize: false))
# =>
# shape: (1, 1)
# ┌───────────┐
# │ a         │
# │ ---       │
# │ f64       │
# ╞═══════════╡
# │ -6.754888 │
# └───────────┘


7764
7765
7766
7767
7768
7769
7770
7771
7772
# File 'lib/polars/expr.rb', line 7764

def entropy(base: nil, normalize: true)
  # TODO update (including param docs)
  if base.nil?
    warn "The default `base` for `entropy` method will change from `2` to `Math::E` in a future version"
    base = 2
  end

  wrap_expr(_rbexpr.entropy(base, normalize))
end

#eq(other) ⇒ Expr

Method equivalent of equality operator expr == other.

Examples:

df = Polars::DataFrame.new(
  {
    "x" => [1.0, 2.0, Float::NAN, 4.0],
    "y" => [2.0, 2.0, Float::NAN, 4.0]
  }
)
df.with_columns(
  Polars.col("x").eq(Polars.col("y")).alias("x == y")
)
# =>
# shape: (4, 3)
# ┌─────┬─────┬────────┐
# │ x   ┆ y   ┆ x == y │
# │ --- ┆ --- ┆ ---    │
# │ f64 ┆ f64 ┆ bool   │
# ╞═════╪═════╪════════╡
# │ 1.0 ┆ 2.0 ┆ false  │
# │ 2.0 ┆ 2.0 ┆ true   │
# │ NaN ┆ NaN ┆ true   │
# │ 4.0 ┆ 4.0 ┆ true   │
# └─────┴─────┴────────┘


3829
3830
3831
# File 'lib/polars/expr.rb', line 3829

def eq(other)
  self == other
end

#eq_missing(other) ⇒ Expr

Method equivalent of equality operator expr == other where nil == nil.

This differs from default eq where null values are propagated.

Examples:

df = Polars::DataFrame.new(
  data={
    "x" => [1.0, 2.0, Float::NAN, 4.0, nil, nil],
    "y" => [2.0, 2.0, Float::NAN, 4.0, 5.0, nil]
  }
)
df.with_columns(
  Polars.col("x").eq(Polars.col("y")).alias("x eq y"),
  Polars.col("x").eq_missing(Polars.col("y")).alias("x eq_missing y")
)
# =>
# shape: (6, 4)
# ┌──────┬──────┬────────┬────────────────┐
# │ x    ┆ y    ┆ x eq y ┆ x eq_missing y │
# │ ---  ┆ ---  ┆ ---    ┆ ---            │
# │ f64  ┆ f64  ┆ bool   ┆ bool           │
# ╞══════╪══════╪════════╪════════════════╡
# │ 1.0  ┆ 2.0  ┆ false  ┆ false          │
# │ 2.0  ┆ 2.0  ┆ true   ┆ true           │
# │ NaN  ┆ NaN  ┆ true   ┆ true           │
# │ 4.0  ┆ 4.0  ┆ true   ┆ true           │
# │ null ┆ 5.0  ┆ null   ┆ false          │
# │ null ┆ null ┆ null   ┆ true           │
# └──────┴──────┴────────┴────────────────┘


3867
3868
3869
3870
# File 'lib/polars/expr.rb', line 3867

def eq_missing(other)
  other = Utils.parse_into_expression(other, str_as_lit: true)
  wrap_expr(_rbexpr.eq_missing(other))
end

#ewm_mean(com: nil, span: nil, half_life: nil, alpha: nil, adjust: true, min_samples: 1, ignore_nulls: false) ⇒ Expr

Exponentially-weighted moving average.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3]})
df.select(Polars.col("a").ewm_mean(com: 1))
# =>
# shape: (3, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 1.0      │
# │ 1.666667 │
# │ 2.428571 │
# └──────────┘


7412
7413
7414
7415
7416
7417
7418
7419
7420
7421
7422
7423
# File 'lib/polars/expr.rb', line 7412

def ewm_mean(
  com: nil,
  span: nil,
  half_life: nil,
  alpha: nil,
  adjust: true,
  min_samples: 1,
  ignore_nulls: false
)
  alpha = _prepare_alpha(com, span, half_life, alpha)
  wrap_expr(_rbexpr.ewm_mean(alpha, adjust, min_samples, ignore_nulls))
end

#ewm_mean_by(by, half_life:) ⇒ Expr

Compute time-based exponentially weighted moving average.

Examples:

df = Polars::DataFrame.new(
  {
    "values": [0, 1, 2, nil, 4],
    "times": [
        Date.new(2020, 1, 1),
        Date.new(2020, 1, 3),
        Date.new(2020, 1, 10),
        Date.new(2020, 1, 15),
        Date.new(2020, 1, 17)
    ]
  }
).sort("times")
df.with_columns(
  result: Polars.col("values").ewm_mean_by("times", half_life: "4d")
)
# =>
# shape: (5, 3)
# ┌────────┬────────────┬──────────┐
# │ values ┆ times      ┆ result   │
# │ ---    ┆ ---        ┆ ---      │
# │ i64    ┆ date       ┆ f64      │
# ╞════════╪════════════╪══════════╡
# │ 0      ┆ 2020-01-01 ┆ 0.0      │
# │ 1      ┆ 2020-01-03 ┆ 0.292893 │
# │ 2      ┆ 2020-01-10 ┆ 1.492474 │
# │ null   ┆ 2020-01-15 ┆ null     │
# │ 4      ┆ 2020-01-17 ┆ 3.254508 │
# └────────┴────────────┴──────────┘


7485
7486
7487
7488
7489
7490
7491
7492
# File 'lib/polars/expr.rb', line 7485

def ewm_mean_by(
  by,
  half_life:
)
  by = Utils.parse_into_expression(by)
  half_life = Utils.parse_as_duration_string(half_life)
  wrap_expr(_rbexpr.ewm_mean_by(by, half_life))
end

#ewm_std(com: nil, span: nil, half_life: nil, alpha: nil, adjust: true, bias: false, min_samples: 1, ignore_nulls: false) ⇒ Expr

Exponentially-weighted moving standard deviation.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3]})
df.select(Polars.col("a").ewm_std(com: 1))
# =>
# shape: (3, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 0.0      │
# │ 0.707107 │
# │ 0.963624 │
# └──────────┘


7512
7513
7514
7515
7516
7517
7518
7519
7520
7521
7522
7523
7524
# File 'lib/polars/expr.rb', line 7512

def ewm_std(
  com: nil,
  span: nil,
  half_life: nil,
  alpha: nil,
  adjust: true,
  bias: false,
  min_samples: 1,
  ignore_nulls: false
)
  alpha = _prepare_alpha(com, span, half_life, alpha)
  wrap_expr(_rbexpr.ewm_std(alpha, adjust, bias, min_samples, ignore_nulls))
end

#ewm_var(com: nil, span: nil, half_life: nil, alpha: nil, adjust: true, bias: false, min_samples: 1, ignore_nulls: false) ⇒ Expr

Exponentially-weighted moving variance.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3]})
df.select(Polars.col("a").ewm_var(com: 1))
# =>
# shape: (3, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 0.0      │
# │ 0.5      │
# │ 0.928571 │
# └──────────┘


7544
7545
7546
7547
7548
7549
7550
7551
7552
7553
7554
7555
7556
# File 'lib/polars/expr.rb', line 7544

def ewm_var(
  com: nil,
  span: nil,
  half_life: nil,
  alpha: nil,
  adjust: true,
  bias: false,
  min_samples: 1,
  ignore_nulls: false
)
  alpha = _prepare_alpha(com, span, half_life, alpha)
  wrap_expr(_rbexpr.ewm_var(alpha, adjust, bias, min_samples, ignore_nulls))
end

#exclude(columns, *more_columns) ⇒ Expr

Exclude certain columns from a wildcard/regex selection.

You may also use regexes in the exclude list. They must start with ^ and end with $.

Examples:

df = Polars::DataFrame.new(
  {
    "aa" => [1, 2, 3],
    "ba" => ["a", "b", nil],
    "cc" => [nil, 2.5, 1.5]
  }
)
df.select(Polars.all.exclude("ba"))
# =>
# shape: (3, 2)
# ┌─────┬──────┐
# │ aa  ┆ cc   │
# │ --- ┆ ---  │
# │ i64 ┆ f64  │
# ╞═════╪══════╡
# │ 1   ┆ null │
# │ 2   ┆ 2.5  │
# │ 3   ┆ 1.5  │
# └─────┴──────┘


438
439
440
# File 'lib/polars/expr.rb', line 438

def exclude(columns, *more_columns)
  meta.as_selector.exclude(columns, *more_columns).as_expr
end

#expExpr

Compute the exponential, element-wise.

Examples:

df = Polars::DataFrame.new({"values" => [1.0, 2.0, 4.0]})
df.select(Polars.col("values").exp)
# =>
# shape: (3, 1)
# ┌──────────┐
# │ values   │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 2.718282 │
# │ 7.389056 │
# │ 54.59815 │
# └──────────┘


365
366
367
# File 'lib/polars/expr.rb', line 365

def exp
  wrap_expr(_rbexpr.exp)
end

#explode(empty_as_null: true, keep_nulls: true) ⇒ Expr

Explode a list or utf8 Series.

This means that every item is expanded to a new row.

Examples:

df = Polars::DataFrame.new({"b" => [[1, 2, 3], [4, 5, 6]]})
df.select(Polars.col("b").explode)
# =>
# shape: (6, 1)
# ┌─────┐
# │ b   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 2   │
# │ 3   │
# │ 4   │
# │ 5   │
# │ 6   │
# └─────┘


3615
3616
3617
# File 'lib/polars/expr.rb', line 3615

def explode(empty_as_null: true, keep_nulls: true)
  wrap_expr(_rbexpr.explode(empty_as_null, keep_nulls))
end

#extExtensionExpr

Create an object namespace of all extension type related expressions.



8468
8469
8470
# File 'lib/polars/expr.rb', line 8468

def ext
  ExtensionExpr.new(self)
end

#extend_constant(value, n) ⇒ Expr

Extend the Series with given number of values.

Examples:

df = Polars::DataFrame.new({"values" => [1, 2, 3]})
df.select(Polars.col("values").extend_constant(99, 2))
# =>
# shape: (5, 1)
# ┌────────┐
# │ values │
# │ ---    │
# │ i64    │
# ╞════════╡
# │ 1      │
# │ 2      │
# │ 3      │
# │ 99     │
# │ 99     │
# └────────┘


7584
7585
7586
7587
7588
# File 'lib/polars/expr.rb', line 7584

def extend_constant(value, n)
  value = Utils.parse_into_expression(value, str_as_lit: true)
  n = Utils.parse_into_expression(n)
  wrap_expr(_rbexpr.extend_constant(value, n))
end

#fill_nan(value) ⇒ Expr

Fill floating point NaN value with a fill value.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1.0, nil, Float::NAN],
    "b" => [4.0, Float::NAN, 6]
  }
)
df.fill_nan("zero")
# =>
# shape: (3, 2)
# ┌──────┬──────┐
# │ a    ┆ b    │
# │ ---  ┆ ---  │
# │ str  ┆ str  │
# ╞══════╪══════╡
# │ 1.0  ┆ 4.0  │
# │ null ┆ zero │
# │ zero ┆ 6.0  │
# └──────┴──────┘


2102
2103
2104
2105
# File 'lib/polars/expr.rb', line 2102

def fill_nan(value)
  fill_value_rbexpr = Utils.parse_into_expression(value, str_as_lit: true)
  wrap_expr(_rbexpr.fill_nan(fill_value_rbexpr))
end

#fill_null(value = nil, strategy: nil, limit: nil) ⇒ Expr

Fill null values using the specified value or strategy.

To interpolate over null values see interpolate.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, nil],
    "b" => [4, nil, 6]
  }
)
df.fill_null(strategy: "zero")
# =>
# shape: (3, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 1   ┆ 4   │
# │ 2   ┆ 0   │
# │ 0   ┆ 6   │
# └─────┴─────┘
df.fill_null(99)
# =>
# shape: (3, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 1   ┆ 4   │
# │ 2   ┆ 99  │
# │ 99  ┆ 6   │
# └─────┴─────┘
df.fill_null(strategy: "forward")
# =>
# shape: (3, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 1   ┆ 4   │
# │ 2   ┆ 4   │
# │ 2   ┆ 6   │
# └─────┴─────┘


2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
# File 'lib/polars/expr.rb', line 2062

def fill_null(value = nil, strategy: nil, limit: nil)
  if !value.nil? && !strategy.nil?
    raise ArgumentError, "cannot specify both 'value' and 'strategy'."
  elsif value.nil? && strategy.nil?
    raise ArgumentError, "must specify either a fill 'value' or 'strategy'"
  elsif ["forward", "backward"].include?(strategy) && !limit.nil?
    raise ArgumentError, "can only specify 'limit' when strategy is set to 'backward' or 'forward'"
  end

  if !value.nil?
    value = Utils.parse_into_expression(value, str_as_lit: true)
    wrap_expr(_rbexpr.fill_null(value))
  else
    wrap_expr(_rbexpr.fill_null_with_strategy(strategy, limit))
  end
end

#filter(*predicates, **constraints) ⇒ Expr

Filter a single column.

Mostly useful in an aggregation context. If you want to filter on a DataFrame level, use LazyFrame#filter.

Examples:

df = Polars::DataFrame.new(
  {
    "group_col" => ["g1", "g1", "g2"],
    "b" => [1, 2, 3]
  }
)
(
  df.group_by("group_col").agg(
    [
      Polars.col("b").filter(Polars.col("b") < 2).sum.alias("lt"),
      Polars.col("b").filter(Polars.col("b") >= 2).sum.alias("gte")
    ]
  )
).sort("group_col")
# =>
# shape: (2, 3)
# ┌───────────┬─────┬─────┐
# │ group_col ┆ lt  ┆ gte │
# │ ---       ┆ --- ┆ --- │
# │ str       ┆ i64 ┆ i64 │
# ╞═══════════╪═════╪═════╡
# │ g1        ┆ 1   ┆ 2   │
# │ g2        ┆ 0   ┆ 3   │
# └───────────┴─────┴─────┘


3414
3415
3416
3417
3418
3419
# File 'lib/polars/expr.rb', line 3414

def filter(*predicates, **constraints)
  predicate = Utils.parse_predicates_constraints_into_expression(
    *predicates, **constraints
  )
  wrap_expr(_rbexpr.filter(predicate))
end

#first(ignore_nulls: false) ⇒ Expr

Get the first value.

Examples:

df = Polars::DataFrame.new({"a" => [1, 1, 2]})
df.select(Polars.col("a").first)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# └─────┘


2659
2660
2661
# File 'lib/polars/expr.rb', line 2659

def first(ignore_nulls: false)
  wrap_expr(_rbexpr.first(ignore_nulls))
end

#flattenExpr

Deprecated.

Expr#flatten is deprecated and will be removed in a future version. Use Expr.list.explode(keep_nulls: false, empty_as_null: false) instead, which provides the behavior you likely expect.

Explode a list or utf8 Series. This means that every item is expanded to a new row.

Alias for #explode.

Examples:

df = Polars::DataFrame.new(
  {
    "group" => ["a", "b", "b"],
    "values" => [[1, 2], [2, 3], [4]]
  }
)
df.group_by("group").agg(Polars.col("values").flatten)
# =>
# shape: (2, 2)
# ┌───────┬───────────┐
# │ group ┆ values    │
# │ ---   ┆ ---       │
# │ str   ┆ list[i64] │
# ╞═══════╪═══════════╡
# │ a     ┆ [1, 2]    │
# │ b     ┆ [2, 3, 4] │
# └───────┴───────────┘


3588
3589
3590
# File 'lib/polars/expr.rb', line 3588

def flatten
  explode(empty_as_null: true, keep_nulls: true)
end

#floorExpr

Rounds down to the nearest integer value.

Only works on floating point Series.

Examples:

df = Polars::DataFrame.new({"a" => [0.3, 0.5, 1.0, 1.1]})
df.select(Polars.col("a").floor)
# =>
# shape: (4, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 0.0 │
# │ 0.0 │
# │ 1.0 │
# │ 1.0 │
# └─────┘


1120
1121
1122
# File 'lib/polars/expr.rb', line 1120

def floor
  wrap_expr(_rbexpr.floor)
end

#floordiv(other) ⇒ Expr

Method equivalent of integer division operator expr // other.

Examples:

df = Polars::DataFrame.new({"x" => [1, 2, 3, 4, 5]})
df.with_columns(
  Polars.col("x").truediv(2).alias("x/2"),
  Polars.col("x").floordiv(2).alias("x//2")
)
# =>
# shape: (5, 3)
# ┌─────┬─────┬──────┐
# │ x   ┆ x/2 ┆ x//2 │
# │ --- ┆ --- ┆ ---  │
# │ i64 ┆ f64 ┆ i64  │
# ╞═════╪═════╪══════╡
# │ 1   ┆ 0.5 ┆ 0    │
# │ 2   ┆ 1.0 ┆ 1    │
# │ 3   ┆ 1.5 ┆ 1    │
# │ 4   ┆ 2.0 ┆ 2    │
# │ 5   ┆ 2.5 ┆ 2    │
# └─────┴─────┴──────┘


4149
4150
4151
# File 'lib/polars/expr.rb', line 4149

def floordiv(other)
  wrap_expr(_rbexpr.floordiv(_to_rbexpr(other)))
end

#forward_fill(limit: nil) ⇒ Expr

Fill missing values with the latest seen values.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, nil],
    "b" => [4, nil, 6]
  }
)
df.select(Polars.all.forward_fill)
# =>
# shape: (3, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 1   ┆ 4   │
# │ 2   ┆ 4   │
# │ 2   ┆ 6   │
# └─────┴─────┘


2133
2134
2135
# File 'lib/polars/expr.rb', line 2133

def forward_fill(limit: nil)
  fill_null(strategy: "forward", limit: limit)
end

#gather(indices) ⇒ Expr

Take values by index.

Examples:

df = Polars::DataFrame.new(
  {
    "group" => [
      "one",
      "one",
      "one",
      "two",
      "two",
      "two"
    ],
    "value" => [1, 98, 2, 3, 99, 4]
  }
)
df.group_by("group", maintain_order: true).agg(Polars.col("value").gather([2, 1]))
# =>
# shape: (2, 2)
# ┌───────┬───────────┐
# │ group ┆ value     │
# │ ---   ┆ ---       │
# │ str   ┆ list[i64] │
# ╞═══════╪═══════════╡
# │ one   ┆ [2, 98]   │
# │ two   ┆ [4, 99]   │
# └───────┴───────────┘


1918
1919
1920
1921
1922
1923
1924
1925
# File 'lib/polars/expr.rb', line 1918

def gather(indices)
  if indices.is_a?(::Array)
    indices_lit_rbexpr = Polars.lit(Series.new("", indices, dtype: Int64))._rbexpr
  else
    indices_lit_rbexpr = Utils.parse_into_expression(indices)
  end
  wrap_expr(_rbexpr.gather(indices_lit_rbexpr))
end

#gather_every(n, offset = 0) ⇒ Expr

Take every nth value in the Series and return as a new Series.

Examples:

df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7, 8, 9]})
df.select(Polars.col("foo").gather_every(3))
# =>
# shape: (3, 1)
# ┌─────┐
# │ foo │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 4   │
# │ 7   │
# └─────┘


3637
3638
3639
# File 'lib/polars/expr.rb', line 3637

def gather_every(n, offset = 0)
  wrap_expr(_rbexpr.gather_every(n, offset))
end

#ge(other) ⇒ Expr

Method equivalent of "greater than or equal" operator expr >= other.

Examples:

df = Polars::DataFrame.new(
  {
    "x" => [5.0, 4.0, Float::NAN, 2.0],
    "y" => [5.0, 3.0, Float::NAN, 1.0]
  }
)
df.with_columns(
  Polars.col("x").ge(Polars.col("y")).alias("x >= y")
)
# =>
# shape: (4, 3)
# ┌─────┬─────┬────────┐
# │ x   ┆ y   ┆ x >= y │
# │ --- ┆ --- ┆ ---    │
# │ f64 ┆ f64 ┆ bool   │
# ╞═════╪═════╪════════╡
# │ 5.0 ┆ 5.0 ┆ true   │
# │ 4.0 ┆ 3.0 ┆ true   │
# │ NaN ┆ NaN ┆ true   │
# │ 2.0 ┆ 1.0 ┆ true   │
# └─────┴─────┴────────┘


3901
3902
3903
# File 'lib/polars/expr.rb', line 3901

def ge(other)
  self >= other
end

#get(index, null_on_oob: false) ⇒ Expr

Return a single value by index.

Examples:

df = Polars::DataFrame.new(
  {
    "group" => [
      "one",
      "one",
      "one",
      "two",
      "two",
      "two"
    ],
    "value" => [1, 98, 2, 3, 99, 4]
  }
)
df.group_by("group", maintain_order: true).agg(Polars.col("value").get(1))
# =>
# shape: (2, 2)
# ┌───────┬───────┐
# │ group ┆ value │
# │ ---   ┆ ---   │
# │ str   ┆ i64   │
# ╞═══════╪═══════╡
# │ one   ┆ 98    │
# │ two   ┆ 99    │
# └───────┴───────┘


1964
1965
1966
1967
# File 'lib/polars/expr.rb', line 1964

def get(index, null_on_oob: false)
  index_lit = Utils.parse_into_expression(index)
  wrap_expr(_rbexpr.get(index_lit, null_on_oob))
end

#gt(other) ⇒ Expr

Method equivalent of "greater than" operator expr > other.

Examples:

df = Polars::DataFrame.new(
  {
    "x" => [5.0, 4.0, Float::NAN, 2.0],
    "y" => [5.0, 3.0, Float::NAN, 1.0]
  }
)
df.with_columns(
    Polars.col("x").gt(Polars.col("y")).alias("x > y")
)
# =>
# shape: (4, 3)
# ┌─────┬─────┬───────┐
# │ x   ┆ y   ┆ x > y │
# │ --- ┆ --- ┆ ---   │
# │ f64 ┆ f64 ┆ bool  │
# ╞═════╪═════╪═══════╡
# │ 5.0 ┆ 5.0 ┆ false │
# │ 4.0 ┆ 3.0 ┆ true  │
# │ NaN ┆ NaN ┆ false │
# │ 2.0 ┆ 1.0 ┆ true  │
# └─────┴─────┴───────┘


3934
3935
3936
# File 'lib/polars/expr.rb', line 3934

def gt(other)
  self > other
end

#has_nullsExpr

Check whether the expression contains one or more null values.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [nil, 1, nil],
    "b" => [10, nil, 300],
    "c" => [350, 650, 850]
  }
)
df.select(Polars.all.has_nulls)
# =>
# shape: (1, 3)
# ┌──────┬──────┬───────┐
# │ a    ┆ b    ┆ c     │
# │ ---  ┆ ---  ┆ ---   │
# │ bool ┆ bool ┆ bool  │
# ╞══════╪══════╪═══════╡
# │ true ┆ true ┆ false │
# └──────┴──────┴───────┘


2566
2567
2568
# File 'lib/polars/expr.rb', line 2566

def has_nulls
  null_count > 0
end

#hash_(seed = 0, seed_1 = nil, seed_2 = nil, seed_3 = nil) ⇒ Expr

Hash the elements in the selection.

The hash value is of type UInt64.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, nil],
    "b" => ["x", nil, "z"]
  }
)
df.with_columns(Polars.all.hash_(10, 20, 30, 40))
# =>
# shape: (3, 2)
# ┌──────────────────────┬──────────────────────┐
# │ a                    ┆ b                    │
# │ ---                  ┆ ---                  │
# │ u64                  ┆ u64                  │
# ╞══════════════════════╪══════════════════════╡
# │ 4629889412789719550  ┆ 6959506404929392568  │
# │ 16386608652769605760 ┆ 11638928888656214026 │
# │ 11638928888656214026 ┆ 11040941213715918520 │
# └──────────────────────┴──────────────────────┘


4561
4562
4563
4564
4565
4566
4567
# File 'lib/polars/expr.rb', line 4561

def hash_(seed = 0, seed_1 = nil, seed_2 = nil, seed_3 = nil)
  k0 = seed
  k1 = seed_1.nil? ? seed : seed_1
  k2 = seed_2.nil? ? seed : seed_2
  k3 = seed_3.nil? ? seed : seed_3
  wrap_expr(_rbexpr._hash(k0, k1, k2, k3))
end

#head(n = 10) ⇒ Expr

Get the first n rows.

Examples:

df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7]})
df.head(3)
# =>
# shape: (3, 1)
# ┌─────┐
# │ foo │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 2   │
# │ 3   │
# └─────┘


3662
3663
3664
# File 'lib/polars/expr.rb', line 3662

def head(n = 10)
  wrap_expr(_rbexpr.head(n))
end

#hist(bins: nil, bin_count: nil, include_category: false, include_breakpoint: false) ⇒ Expr

Note:

This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.

Bin values into buckets and count their occurrences.

Examples:

df = Polars::DataFrame.new({"a" => [1, 3, 8, 8, 2, 1, 3]})
df.select(Polars.col("a").hist(bins: [1, 2, 3]))
# =>
# shape: (2, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 3   │
# │ 2   │
# └─────┘
df.select(
  Polars.col("a").hist(
    bins: [1, 2, 3], include_breakpoint: true, include_category: true
  )
)
# =>
# shape: (2, 1)
# ┌──────────────────────┐
# │ a                    │
# │ ---                  │
# │ struct[3]            │
# ╞══════════════════════╡
# │ {2.0,"[1.0, 2.0]",3} │
# │ {3.0,"(2.0, 3.0]",2} │
# └──────────────────────┘


7922
7923
7924
7925
7926
7927
7928
7929
7930
7931
7932
7933
7934
7935
7936
7937
# File 'lib/polars/expr.rb', line 7922

def hist(
  bins: nil,
  bin_count: nil,
  include_category: false,
  include_breakpoint: false
)
  if !bins.nil?
    if bins.is_a?(::Array)
      bins = Polars::Series.new(bins)
    end
    bins = Utils.parse_into_expression(bins)
  end
  wrap_expr(
    _rbexpr.hist(bins, bin_count, include_category, include_breakpoint)
  )
end

#implodeExpr

Aggregate to list.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 3],
    "b" => [4, 5, 6]
  }
)
df.select(Polars.all.implode)
# =>
# shape: (1, 2)
# ┌───────────┬───────────┐
# │ a         ┆ b         │
# │ ---       ┆ ---       │
# │ list[i64] ┆ list[i64] │
# ╞═══════════╪═══════════╡
# │ [1, 2, 3] ┆ [4, 5, 6] │
# └───────────┴───────────┘


7870
7871
7872
# File 'lib/polars/expr.rb', line 7870

def implode
  wrap_expr(_rbexpr.implode)
end

#index_of(element) ⇒ Expr

Get the index of the first occurrence of a value, or nil if it's not found.

Examples:

df = Polars::DataFrame.new({"a" => [1, nil, 17]})
df.select(
  [
    Polars.col("a").index_of(17).alias("seventeen"),
    Polars.col("a").index_of(nil).alias("null"),
    Polars.col("a").index_of(55).alias("fiftyfive")
  ]
)
# =>
# shape: (1, 3)
# ┌───────────┬──────┬───────────┐
# │ seventeen ┆ null ┆ fiftyfive │
# │ ---       ┆ ---  ┆ ---       │
# │ u32       ┆ u32  ┆ u32       │
# ╞═══════════╪══════╪═══════════╡
# │ 2         ┆ 1    ┆ null      │
# └───────────┴──────┴───────────┘


1778
1779
1780
1781
# File 'lib/polars/expr.rb', line 1778

def index_of(element)
  element = Utils.parse_into_expression(element, str_as_lit: true)
  wrap_expr(_rbexpr.index_of(element))
end

#inspect_(fmt = "%s") ⇒ Expr

Print the value that this expression evaluates to and pass on the value.

Examples:

df = Polars::DataFrame.new({"foo" => [1, 1, 2]})
df.select(Polars.col("foo").cum_sum.inspect_("value is: %s").alias("bar"))
# =>
# value is: shape: (3,)
# Series: 'foo' [i64]
# [
#         1
#         2
#         4
# ]
# shape: (3, 1)
# ┌─────┐
# │ bar │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 2   │
# │ 4   │
# └─────┘


4634
4635
4636
4637
4638
4639
4640
4641
# File 'lib/polars/expr.rb', line 4634

def inspect_(fmt = "%s")
  inspect = lambda do |s|
    puts(fmt % [s])
    s
  end

  map_batches(return_dtype: F.dtype_of(self), &inspect)
end

#interpolate(method: "linear") ⇒ Expr

Fill nulls with linear interpolation over missing values.

Can also be used to regrid data to a new grid - see examples below.

Examples:

Fill nulls with linear interpolation

df = Polars::DataFrame.new(
  {
    "a" => [1, nil, 3],
    "b" => [1.0, Float::NAN, 3.0]
  }
)
df.select(Polars.all.interpolate)
# =>
# shape: (3, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ f64 ┆ f64 │
# ╞═════╪═════╡
# │ 1.0 ┆ 1.0 │
# │ 2.0 ┆ NaN │
# │ 3.0 ┆ 3.0 │
# └─────┴─────┘


4668
4669
4670
# File 'lib/polars/expr.rb', line 4668

def interpolate(method: "linear")
  wrap_expr(_rbexpr.interpolate(method))
end

#interpolate_by(by) ⇒ Expr

Fill null values using interpolation based on another column.

Examples:

Fill null values using linear interpolation.

df = Polars::DataFrame.new(
  {
    "a" => [1, nil, nil, 3],
    "b" => [1, 2, 7, 8]
  }
)
df.with_columns(a_interpolated: Polars.col("a").interpolate_by("b"))
# =>
# shape: (4, 3)
# ┌──────┬─────┬────────────────┐
# │ a    ┆ b   ┆ a_interpolated │
# │ ---  ┆ --- ┆ ---            │
# │ i64  ┆ i64 ┆ f64            │
# ╞══════╪═════╪════════════════╡
# │ 1    ┆ 1   ┆ 1.0            │
# │ null ┆ 2   ┆ 1.285714       │
# │ null ┆ 7   ┆ 2.714286       │
# │ 3    ┆ 8   ┆ 3.0            │
# └──────┴─────┴────────────────┘


4698
4699
4700
4701
# File 'lib/polars/expr.rb', line 4698

def interpolate_by(by)
  by = Utils.parse_into_expression(by)
  wrap_expr(_rbexpr.interpolate_by(by))
end

#is_between(lower_bound, upper_bound, closed: "both") ⇒ Expr

Check if this expression is between start and end.

Examples:

df = Polars::DataFrame.new({"num" => [1, 2, 3, 4, 5]})
df.with_columns(Polars.col("num").is_between(2, 4).alias("is_between"))
# =>
# shape: (5, 2)
# ┌─────┬────────────┐
# │ num ┆ is_between │
# │ --- ┆ ---        │
# │ i64 ┆ bool       │
# ╞═════╪════════════╡
# │ 1   ┆ false      │
# │ 2   ┆ true       │
# │ 3   ┆ true       │
# │ 4   ┆ true       │
# │ 5   ┆ false      │
# └─────┴────────────┘

Use the closed argument to include or exclude the values at the bounds:

df.with_columns(
  Polars.col("num").is_between(2, 4, closed: "left").alias("is_between")
)
# =>
# shape: (5, 2)
# ┌─────┬────────────┐
# │ num ┆ is_between │
# │ --- ┆ ---        │
# │ i64 ┆ bool       │
# ╞═════╪════════════╡
# │ 1   ┆ false      │
# │ 2   ┆ true       │
# │ 3   ┆ true       │
# │ 4   ┆ false      │
# │ 5   ┆ false      │
# └─────┴────────────┘

You can also use strings as well as numeric/temporal values:

df = Polars::DataFrame.new({"a" => ["a", "b", "c", "d", "e"]})
df.with_columns(
  Polars.col("a")
    .is_between(Polars.lit("a"), Polars.lit("c"), closed: "both")
    .alias("is_between")
)
# =>
# shape: (5, 2)
# ┌─────┬────────────┐
# │ a   ┆ is_between │
# │ --- ┆ ---        │
# │ str ┆ bool       │
# ╞═════╪════════════╡
# │ a   ┆ true       │
# │ b   ┆ true       │
# │ c   ┆ true       │
# │ d   ┆ false      │
# │ e   ┆ false      │
# └─────┴────────────┘


4481
4482
4483
4484
4485
4486
4487
4488
# File 'lib/polars/expr.rb', line 4481

def is_between(lower_bound, upper_bound, closed: "both")
  lower_bound = Utils.parse_into_expression(lower_bound)
  upper_bound = Utils.parse_into_expression(upper_bound)

  wrap_expr(
    _rbexpr.is_between(lower_bound, upper_bound, closed)
  )
end

#is_close(other, abs_tol: 0.0, rel_tol: 1.0e-09, nans_equal: false) ⇒ Expr

Check if this expression is close, i.e. almost equal, to the other expression.

Examples:

df = Polars::DataFrame.new({"a" => [1.5, 2.0, 2.5], "b" => [1.55, 2.2, 3.0]})
df.with_columns(Polars.col("a").is_close("b", abs_tol: 0.1).alias("is_close"))
# =>
# shape: (3, 3)
# ┌─────┬──────┬──────────┐
# │ a   ┆ b    ┆ is_close │
# │ --- ┆ ---  ┆ ---      │
# │ f64 ┆ f64  ┆ bool     │
# ╞═════╪══════╪══════════╡
# │ 1.5 ┆ 1.55 ┆ true     │
# │ 2.0 ┆ 2.2  ┆ false    │
# │ 2.5 ┆ 3.0  ┆ false    │
# └─────┴──────┴──────────┘


4517
4518
4519
4520
4521
4522
4523
4524
4525
# File 'lib/polars/expr.rb', line 4517

def is_close(
  other,
  abs_tol: 0.0,
  rel_tol: 1.0e-09,
  nans_equal: false
)
  other = Utils.parse_into_expression(other)
  wrap_expr(_rbexpr.is_close(other, abs_tol, rel_tol, nans_equal))
end

#is_duplicatedExpr

Get mask of duplicated values.

Examples:

df = Polars::DataFrame.new({"a" => [1, 1, 2]})
df.select(Polars.col("a").is_duplicated)
# =>
# shape: (3, 1)
# ┌───────┐
# │ a     │
# │ ---   │
# │ bool  │
# ╞═══════╡
# │ true  │
# │ true  │
# │ false │
# └───────┘


3049
3050
3051
# File 'lib/polars/expr.rb', line 3049

def is_duplicated
  wrap_expr(_rbexpr.is_duplicated)
end

#is_finiteExpr

Returns a boolean Series indicating which values are finite.

Examples:

df = Polars::DataFrame.new(
  {
    "A" => [1.0, 2],
    "B" => [3.0, Float::INFINITY]
  }
)
df.select(Polars.all.is_finite)
# =>
# shape: (2, 2)
# ┌──────┬───────┐
# │ A    ┆ B     │
# │ ---  ┆ ---   │
# │ bool ┆ bool  │
# ╞══════╪═══════╡
# │ true ┆ true  │
# │ true ┆ false │
# └──────┴───────┘


611
612
613
# File 'lib/polars/expr.rb', line 611

def is_finite
  wrap_expr(_rbexpr.is_finite)
end

#is_first_distinctExpr

Get a mask of the first unique value.

Examples:

df = Polars::DataFrame.new(
  {
    "num" => [1, 2, 3, 1, 5]
  }
)
df.with_columns(Polars.col("num").is_first_distinct.alias("is_first"))
# =>
# shape: (5, 2)
# ┌─────┬──────────┐
# │ num ┆ is_first │
# │ --- ┆ ---      │
# │ i64 ┆ bool     │
# ╞═════╪══════════╡
# │ 1   ┆ true     │
# │ 2   ┆ true     │
# │ 3   ┆ true     │
# │ 1   ┆ false    │
# │ 5   ┆ true     │
# └─────┴──────────┘


3003
3004
3005
# File 'lib/polars/expr.rb', line 3003

def is_first_distinct
  wrap_expr(_rbexpr.is_first_distinct)
end

#is_in(other, nulls_equal: false) ⇒ Expr Also known as: in?

Check if elements of this expression are present in the other Series.

Examples:

df = Polars::DataFrame.new(
  {"sets" => [[1, 2, 3], [1, 2], [9, 10]], "optional_members" => [1, 2, 3]}
)
df.with_columns(contains: Polars.col("optional_members").is_in("sets"))
# =>
# shape: (3, 3)
# ┌───────────┬──────────────────┬──────────┐
# │ sets      ┆ optional_members ┆ contains │
# │ ---       ┆ ---              ┆ ---      │
# │ list[i64] ┆ i64              ┆ bool     │
# ╞═══════════╪══════════════════╪══════════╡
# │ [1, 2, 3] ┆ 1                ┆ true     │
# │ [1, 2]    ┆ 2                ┆ true     │
# │ [9, 10]   ┆ 3                ┆ false    │
# └───────────┴──────────────────┴──────────┘


4374
4375
4376
4377
# File 'lib/polars/expr.rb', line 4374

def is_in(other, nulls_equal: false)
  other = Utils.parse_into_expression(other)
  wrap_expr(_rbexpr.is_in(other, nulls_equal))
end

#is_infiniteExpr

Returns a boolean Series indicating which values are infinite.

Examples:

df = Polars::DataFrame.new(
  {
    "A" => [1.0, 2],
    "B" => [3.0, Float::INFINITY]
  }
)
df.select(Polars.all.is_infinite)
# =>
# shape: (2, 2)
# ┌───────┬───────┐
# │ A     ┆ B     │
# │ ---   ┆ ---   │
# │ bool  ┆ bool  │
# ╞═══════╪═══════╡
# │ false ┆ false │
# │ false ┆ true  │
# └───────┴───────┘


637
638
639
# File 'lib/polars/expr.rb', line 637

def is_infinite
  wrap_expr(_rbexpr.is_infinite)
end

#is_last_distinctExpr

Return a boolean mask indicating the last occurrence of each distinct value.

Examples:

df = Polars::DataFrame.new({"a" => [1, 1, 2, 3, 2]})
df.with_columns(Polars.col("a").is_last_distinct.alias("last"))
# =>
# shape: (5, 2)
# ┌─────┬───────┐
# │ a   ┆ last  │
# │ --- ┆ ---   │
# │ i64 ┆ bool  │
# ╞═════╪═══════╡
# │ 1   ┆ false │
# │ 1   ┆ true  │
# │ 2   ┆ false │
# │ 3   ┆ true  │
# │ 2   ┆ true  │
# └─────┴───────┘


3027
3028
3029
# File 'lib/polars/expr.rb', line 3027

def is_last_distinct
  wrap_expr(_rbexpr.is_last_distinct)
end

#is_nanExpr

Note:

Floating point NaN (Not A Number) should not be confused with missing data represented as nil.

Returns a boolean Series indicating which values are NaN.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, nil, 1, 5],
    "b" => [1.0, 2.0, Float::NAN, 1.0, 5.0]
  }
)
df.with_columns(Polars.col(Polars::Float64).is_nan.name.suffix("_isnan"))
# =>
# shape: (5, 3)
# ┌──────┬─────┬─────────┐
# │ a    ┆ b   ┆ b_isnan │
# │ ---  ┆ --- ┆ ---     │
# │ i64  ┆ f64 ┆ bool    │
# ╞══════╪═════╪═════════╡
# │ 1    ┆ 1.0 ┆ false   │
# │ 2    ┆ 2.0 ┆ false   │
# │ null ┆ NaN ┆ true    │
# │ 1    ┆ 1.0 ┆ false   │
# │ 5    ┆ 5.0 ┆ false   │
# └──────┴─────┴─────────┘


670
671
672
# File 'lib/polars/expr.rb', line 670

def is_nan
  wrap_expr(_rbexpr.is_nan)
end

#is_notExpr Also known as: not_

Negate a boolean expression.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [true, false, false],
    "b" => ["a", "b", nil]
  }
)
# =>
# shape: (3, 2)
# ┌───────┬──────┐
# │ a     ┆ b    │
# │ ---   ┆ ---  │
# │ bool  ┆ str  │
# ╞═══════╪══════╡
# │ true  ┆ a    │
# │ false ┆ b    │
# │ false ┆ null │
# └───────┴──────┘
df.select(Polars.col("a").is_not)
# =>
# shape: (3, 1)
# ┌───────┐
# │ a     │
# │ ---   │
# │ bool  │
# ╞═══════╡
# │ false │
# │ true  │
# │ true  │
# └───────┘


526
527
528
# File 'lib/polars/expr.rb', line 526

def is_not
  wrap_expr(_rbexpr.not_)
end

#is_not_nanExpr

Note:

Floating point NaN (Not A Number) should not be confused with missing data represented as nil.

Returns a boolean Series indicating which values are not NaN.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, nil, 1, 5],
    "b" => [1.0, 2.0, Float::NAN, 1.0, 5.0]
  }
)
df.with_columns(Polars.col(Polars::Float64).is_not_nan.name.suffix("_is_not_nan"))
# =>
# shape: (5, 3)
# ┌──────┬─────┬──────────────┐
# │ a    ┆ b   ┆ b_is_not_nan │
# │ ---  ┆ --- ┆ ---          │
# │ i64  ┆ f64 ┆ bool         │
# ╞══════╪═════╪══════════════╡
# │ 1    ┆ 1.0 ┆ true         │
# │ 2    ┆ 2.0 ┆ true         │
# │ null ┆ NaN ┆ false        │
# │ 1    ┆ 1.0 ┆ true         │
# │ 5    ┆ 5.0 ┆ true         │
# └──────┴─────┴──────────────┘


703
704
705
# File 'lib/polars/expr.rb', line 703

def is_not_nan
  wrap_expr(_rbexpr.is_not_nan)
end

#is_not_nullExpr

Returns a boolean Series indicating which values are not null.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, nil, 1, 5],
    "b" => [1.0, 2.0, Float::NAN, 1.0, 5.0]
  }
)
df.with_columns(Polars.all.is_not_null.name.suffix("_not_null"))
# =>
# shape: (5, 4)
# ┌──────┬─────┬────────────┬────────────┐
# │ a    ┆ b   ┆ a_not_null ┆ b_not_null │
# │ ---  ┆ --- ┆ ---        ┆ ---        │
# │ i64  ┆ f64 ┆ bool       ┆ bool       │
# ╞══════╪═════╪════════════╪════════════╡
# │ 1    ┆ 1.0 ┆ true       ┆ true       │
# │ 2    ┆ 2.0 ┆ true       ┆ true       │
# │ null ┆ NaN ┆ false      ┆ true       │
# │ 1    ┆ 1.0 ┆ true       ┆ true       │
# │ 5    ┆ 5.0 ┆ true       ┆ true       │
# └──────┴─────┴────────────┴────────────┘


585
586
587
# File 'lib/polars/expr.rb', line 585

def is_not_null
  wrap_expr(_rbexpr.is_not_null)
end

#is_nullExpr

Returns a boolean Series indicating which values are null.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, nil, 1, 5],
    "b" => [1.0, 2.0, Float::NAN, 1.0, 5.0]
  }
)
df.with_columns(Polars.all.is_null.name.suffix("_isnull"))
# =>
# shape: (5, 4)
# ┌──────┬─────┬──────────┬──────────┐
# │ a    ┆ b   ┆ a_isnull ┆ b_isnull │
# │ ---  ┆ --- ┆ ---      ┆ ---      │
# │ i64  ┆ f64 ┆ bool     ┆ bool     │
# ╞══════╪═════╪══════════╪══════════╡
# │ 1    ┆ 1.0 ┆ false    ┆ false    │
# │ 2    ┆ 2.0 ┆ false    ┆ false    │
# │ null ┆ NaN ┆ true     ┆ false    │
# │ 1    ┆ 1.0 ┆ false    ┆ false    │
# │ 5    ┆ 5.0 ┆ false    ┆ false    │
# └──────┴─────┴──────────┴──────────┘


556
557
558
# File 'lib/polars/expr.rb', line 556

def is_null
  wrap_expr(_rbexpr.is_null)
end

#is_uniqueExpr

Get mask of unique values.

Examples:

df = Polars::DataFrame.new({"a" => [1, 1, 2]})
df.select(Polars.col("a").is_unique)
# =>
# shape: (3, 1)
# ┌───────┐
# │ a     │
# │ ---   │
# │ bool  │
# ╞═══════╡
# │ false │
# │ false │
# │ true  │
# └───────┘


2975
2976
2977
# File 'lib/polars/expr.rb', line 2975

def is_unique
  wrap_expr(_rbexpr.is_unique)
end

#item(allow_empty: false) ⇒ Expr

Get the single value.

This raises an error if there is not exactly one value.

Examples:

df = Polars::DataFrame.new({"a" => [1]})
df.select(Polars.col("a").item)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# └─────┘
df.head(0).select(Polars.col("a").item(allow_empty: true))
# =>
# shape: (1, 1)
# ┌──────┐
# │ a    │
# │ ---  │
# │ i64  │
# ╞══════╡
# │ null │
# └──────┘


2721
2722
2723
# File 'lib/polars/expr.rb', line 2721

def item(allow_empty: false)
  Utils.wrap_expr(_rbexpr.item(allow_empty))
end

#kurtosis(fisher: true, bias: true) ⇒ Expr

Compute the kurtosis (Fisher or Pearson) of a dataset.

Kurtosis is the fourth central moment divided by the square of the variance. If Fisher's definition is used, then 3.0 is subtracted from the result to give 0.0 for a normal distribution. If bias is false then the kurtosis is calculated using k statistics to eliminate bias coming from biased moment estimators

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3, 2, 1]})
df.select(Polars.col("a").kurtosis)
# =>
# shape: (1, 1)
# ┌───────────┐
# │ a         │
# │ ---       │
# │ f64       │
# ╞═══════════╡
# │ -1.153061 │
# └───────────┘


6831
6832
6833
# File 'lib/polars/expr.rb', line 6831

def kurtosis(fisher: true, bias: true)
  wrap_expr(_rbexpr.kurtosis(fisher, bias))
end

#last(ignore_nulls: false) ⇒ Expr

Get the last value.

Examples:

df = Polars::DataFrame.new({"a" => [1, 1, 2]})
df.select(Polars.col("a").last)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 2   │
# └─────┘


2684
2685
2686
# File 'lib/polars/expr.rb', line 2684

def last(ignore_nulls: false)
  wrap_expr(_rbexpr.last(ignore_nulls))
end

#le(other) ⇒ Expr

Method equivalent of "less than or equal" operator expr <= other.

Examples:

df = Polars::DataFrame.new(
  {
    "x" => [5.0, 4.0, Float::NAN, 0.5],
    "y" => [5.0, 3.5, Float::NAN, 2.0]
  }
)
df.with_columns(
  Polars.col("x").le(Polars.col("y")).alias("x <= y")
)
# =>
# shape: (4, 3)
# ┌─────┬─────┬────────┐
# │ x   ┆ y   ┆ x <= y │
# │ --- ┆ --- ┆ ---    │
# │ f64 ┆ f64 ┆ bool   │
# ╞═════╪═════╪════════╡
# │ 5.0 ┆ 5.0 ┆ true   │
# │ 4.0 ┆ 3.5 ┆ false  │
# │ NaN ┆ NaN ┆ true   │
# │ 0.5 ┆ 2.0 ┆ true   │
# └─────┴─────┴────────┘


3967
3968
3969
# File 'lib/polars/expr.rb', line 3967

def le(other)
  self <= other
end

#lenExpr Also known as: length

Count the number of values in this expression.

Examples:

df = Polars::DataFrame.new({"a" => [8, 9, 10], "b" => [nil, 4, 4]})
df.select(Polars.all.len)
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ u32 ┆ u32 │
# ╞═════╪═════╡
# │ 3   ┆ 3   │
# └─────┴─────┘


778
779
780
# File 'lib/polars/expr.rb', line 778

def len
  wrap_expr(_rbexpr.len)
end

#limit(n = 10) ⇒ Expr

Get the first n rows.

Alias for #head.

Examples:

df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7]})
df.select(Polars.col("foo").limit(3))
# =>
# shape: (3, 1)
# ┌─────┐
# │ foo │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 2   │
# │ 3   │
# └─────┘


3714
3715
3716
# File 'lib/polars/expr.rb', line 3714

def limit(n = 10)
  head(n)
end

#listListExpr

Create an object namespace of all list related methods.



8405
8406
8407
# File 'lib/polars/expr.rb', line 8405

def list
  ListExpr.new(self)
end

#log(base = Math::E) ⇒ Expr

Compute the logarithm to a given base.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3]})
df.select(Polars.col("a").log(2))
# =>
# shape: (3, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 0.0      │
# │ 1.0      │
# │ 1.584963 │
# └──────────┘


7700
7701
7702
7703
# File 'lib/polars/expr.rb', line 7700

def log(base = Math::E)
  base_rbexpr = Utils.parse_into_expression(base)
  wrap_expr(_rbexpr.log(base_rbexpr))
end

#log10Expr

Compute the base 10 logarithm of the input array, element-wise.

Examples:

df = Polars::DataFrame.new({"values" => [1.0, 2.0, 4.0]})
df.select(Polars.col("values").log10)
# =>
# shape: (3, 1)
# ┌─────────┐
# │ values  │
# │ ---     │
# │ f64     │
# ╞═════════╡
# │ 0.0     │
# │ 0.30103 │
# │ 0.60206 │
# └─────────┘


343
344
345
# File 'lib/polars/expr.rb', line 343

def log10
  log(10)
end

#log1pExpr

Compute the natural logarithm of each element plus one.

This computes log(1 + x) but is more numerically stable for x close to zero.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3]})
df.select(Polars.col("a").log1p)
# =>
# shape: (3, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 0.693147 │
# │ 1.098612 │
# │ 1.386294 │
# └──────────┘


7725
7726
7727
# File 'lib/polars/expr.rb', line 7725

def log1p
  wrap_expr(_rbexpr.log1p)
end

#lower_boundExpr

Calculate the lower bound.

Returns a unit Series with the lowest value possible for the dtype of this expression.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3, 2, 1]})
df.select(Polars.col("a").lower_bound)
# =>
# shape: (1, 1)
# ┌──────────────────────┐
# │ a                    │
# │ ---                  │
# │ i64                  │
# ╞══════════════════════╡
# │ -9223372036854775808 │
# └──────────────────────┘


6891
6892
6893
# File 'lib/polars/expr.rb', line 6891

def lower_bound
  wrap_expr(_rbexpr.lower_bound)
end

#lt(other) ⇒ Expr

Method equivalent of "less than" operator expr < other.

Examples:

df = Polars::DataFrame.new(
  {
    "x" => [1.0, 2.0, Float::NAN, 3.0],
    "y" => [2.0, 2.0, Float::NAN, 4.0]
  }
)
df.with_columns(
  Polars.col("x").lt(Polars.col("y")).alias("x < y"),
)
# =>
# shape: (4, 3)
# ┌─────┬─────┬───────┐
# │ x   ┆ y   ┆ x < y │
# │ --- ┆ --- ┆ ---   │
# │ f64 ┆ f64 ┆ bool  │
# ╞═════╪═════╪═══════╡
# │ 1.0 ┆ 2.0 ┆ true  │
# │ 2.0 ┆ 2.0 ┆ false │
# │ NaN ┆ NaN ┆ false │
# │ 3.0 ┆ 4.0 ┆ true  │
# └─────┴─────┴───────┘


4000
4001
4002
# File 'lib/polars/expr.rb', line 4000

def lt(other)
  self < other
end

#map_batches(return_dtype: nil, is_elementwise: false, returns_scalar: false, &function) ⇒ Expr

Apply a custom Ruby function to a Series or array of Series.

Examples:

df = Polars::DataFrame.new(
  {
    "sine" => [0.0, 1.0, 0.0, -1.0],
    "cosine" => [1.0, 0.0, -1.0, 0.0]
  }
)
df.select(Polars.all.map_batches(returns_scalar: true) { |x| x.to_numo.argmax })
# =>
# shape: (1, 2)
# ┌──────┬────────┐
# │ sine ┆ cosine │
# │ ---  ┆ ---    │
# │ i64  ┆ i64    │
# ╞══════╪════════╡
# │ 1    ┆ 0      │
# └──────┴────────┘


3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
# File 'lib/polars/expr.rb', line 3453

def map_batches(
  return_dtype: nil,
  is_elementwise: false,
  returns_scalar: false,
  &function
)
  _wrap = lambda do |sl, *args, **kwargs|
    function.(sl[0], *args, **kwargs)
  end

  F.map_batches(
    [self],
    return_dtype: return_dtype,
    is_elementwise: is_elementwise,
    returns_scalar: returns_scalar,
    &_wrap
  )
end

#map_elements(return_dtype: nil, skip_nulls: true, pass_name: false, strategy: "thread_local", returns_scalar: false, &function) ⇒ Expr

Apply a custom/user-defined function (UDF) in a GroupBy or Projection context.

Depending on the context it has the following behavior:

  • Selection Expects f to be of type Callable[[Any], Any]. Applies a Ruby function over each individual value in the column.
  • GroupBy Expects f to be of type Callable[[Series], Series]. Applies a Ruby function over each group.

Implementing logic using a Ruby function is almost always significantly slower and more memory intensive than implementing the same logic using the native expression API because:

  • The native expression engine runs in Rust; UDFs run in Ruby.
  • Use of Ruby UDFs forces the DataFrame to be materialized in memory.
  • Polars-native expressions can be parallelised (UDFs cannot).
  • Polars-native expressions can be logically optimised (UDFs cannot).

Wherever possible you should strongly prefer the native expression API to achieve the best performance.

Examples:

The function is applied to each element of column 'a':

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 3, 1],
    "b" => ["a", "b", "c", "c"]
  }
)
df.with_columns(
  Polars.col("a")
  .map_elements(return_dtype: Polars.self_dtype) { |x| x * 2 }
  .alias("a_times_2")
)
# =>
# shape: (4, 3)
# ┌─────┬─────┬───────────┐
# │ a   ┆ b   ┆ a_times_2 │
# │ --- ┆ --- ┆ ---       │
# │ i64 ┆ str ┆ i64       │
# ╞═════╪═════╪═══════════╡
# │ 1   ┆ a   ┆ 2         │
# │ 2   ┆ b   ┆ 4         │
# │ 3   ┆ c   ┆ 6         │
# │ 1   ┆ c   ┆ 2         │
# └─────┴─────┴───────────┘


3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
# File 'lib/polars/expr.rb', line 3526

def map_elements(
  return_dtype: nil,
  skip_nulls: true,
  pass_name: false,
  strategy: "thread_local",
  returns_scalar: false,
  &function
)
  if pass_name
    raise Todo
  else
    wrap_f = lambda do |x, **kwargs|
      return_dtype = kwargs[:return_dtype]
      x.map_elements(return_dtype: return_dtype, skip_nulls: skip_nulls, &function)
    end
  end

  if strategy == "thread_local"
    map_batches(
      return_dtype: return_dtype,
      returns_scalar: false,
      is_elementwise: true,
      &wrap_f
    )
  elsif strategy == "threading"
    raise Todo
  else
    msg = "strategy #{strategy.inspect} is not supported"
    raise ArgumentError, msg
  end
end

#maxExpr

Get maximum value.

Examples:

df = Polars::DataFrame.new({"a" => [-1.0, Float::NAN, 1.0]})
df.select(Polars.col("a").max)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 1.0 │
# └─────┘


2265
2266
2267
# File 'lib/polars/expr.rb', line 2265

def max
  wrap_expr(_rbexpr.max)
end

#max_by(by) ⇒ Expr

Note:

This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.

Get maximum value, ordered by another expression.

If the by expression has multiple values equal to the maximum it is not defined which value will be chosen.

Examples:

df = Polars::DataFrame.new({"a" => [-1.0, Float::NAN, 1.0], "b" => ["x", "y", "z"]})
df.select(Polars.col("b").max_by("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ b   │
# │ --- │
# │ str │
# ╞═════╡
# │ z   │
# └─────┘


2296
2297
2298
2299
# File 'lib/polars/expr.rb', line 2296

def max_by(by)
  by_rbexpr = Utils.parse_into_expression(by)
  wrap_expr(_rbexpr.max_by(by_rbexpr))
end

#meanExpr

Get mean value.

Examples:

df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
df.select(Polars.col("a").mean)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 0.0 │
# └─────┘


2433
2434
2435
# File 'lib/polars/expr.rb', line 2433

def mean
  wrap_expr(_rbexpr.mean)
end

#medianExpr

Get median value using linear interpolation.

Examples:

df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
df.select(Polars.col("a").median)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 0.0 │
# └─────┘


2453
2454
2455
# File 'lib/polars/expr.rb', line 2453

def median
  wrap_expr(_rbexpr.median)
end

#metaMetaExpr

Create an object namespace of all meta related expression methods.



8440
8441
8442
# File 'lib/polars/expr.rb', line 8440

def meta
  MetaExpr.new(self)
end

#minExpr

Get minimum value.

Examples:

df = Polars::DataFrame.new({"a" => [-1.0, Float::NAN, 1.0]})
df.select(Polars.col("a").min)
# =>
# shape: (1, 1)
# ┌──────┐
# │ a    │
# │ ---  │
# │ f64  │
# ╞══════╡
# │ -1.0 │
# └──────┘


2317
2318
2319
# File 'lib/polars/expr.rb', line 2317

def min
  wrap_expr(_rbexpr.min)
end

#min_by(by) ⇒ Expr

Note:

This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.

Get minimum value, ordered by another expression.

If the by expression has multiple values equal to the minimum it is not defined which value will be chosen.

Examples:

df = Polars::DataFrame.new({"a" => [-1.0, Float::NAN, 1.0], "b" => ["x", "y", "z"]})
df.select(Polars.col("b").min_by("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ b   │
# │ --- │
# │ str │
# ╞═════╡
# │ x   │
# └─────┘


2348
2349
2350
2351
# File 'lib/polars/expr.rb', line 2348

def min_by(by)
  by_rbexpr = Utils.parse_into_expression(by)
  wrap_expr(_rbexpr.min_by(by_rbexpr))
end

#mod(other) ⇒ Expr

Method equivalent of modulus operator expr % other.

Examples:

df = Polars::DataFrame.new({"x" => [0, 1, 2, 3, 4]})
df.with_columns(Polars.col("x").mod(2).alias("x%2"))
# =>
# shape: (5, 2)
# ┌─────┬─────┐
# │ x   ┆ x%2 │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 0   ┆ 0   │
# │ 1   ┆ 1   │
# │ 2   ┆ 0   │
# │ 3   ┆ 1   │
# │ 4   ┆ 0   │
# └─────┴─────┘


4176
4177
4178
# File 'lib/polars/expr.rb', line 4176

def mod(other)
  self % other
end

#mode(maintain_order: false) ⇒ Expr

Compute the most occurring value(s).

Can return multiple Values.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 1, 2, 3],
    "b" => [1, 1, 2, 2]
  }
)
df.select(Polars.all.mode.first)
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 1   ┆ 1   │
# │ 1   ┆ 2   │
# └─────┴─────┘


1263
1264
1265
# File 'lib/polars/expr.rb', line 1263

def mode(maintain_order: false)
  wrap_expr(_rbexpr.mode(maintain_order))
end

#mul(other) ⇒ Expr

Method equivalent of multiplication operator expr * other.

Examples:

df = Polars::DataFrame.new({"x" => [1, 2, 4, 8, 16]})
df.with_columns(
  Polars.col("x").mul(2).alias("x*2"),
  Polars.col("x").mul(Polars.col("x").log(2)).alias("x * xlog2"),
)
# =>
# shape: (5, 3)
# ┌─────┬─────┬───────────┐
# │ x   ┆ x*2 ┆ x * xlog2 │
# │ --- ┆ --- ┆ ---       │
# │ i64 ┆ i64 ┆ f64       │
# ╞═════╪═════╪═══════════╡
# │ 1   ┆ 2   ┆ 0.0       │
# │ 2   ┆ 4   ┆ 2.0       │
# │ 4   ┆ 8   ┆ 8.0       │
# │ 8   ┆ 16  ┆ 24.0      │
# │ 16  ┆ 32  ┆ 64.0      │
# └─────┴─────┴───────────┘


4206
4207
4208
# File 'lib/polars/expr.rb', line 4206

def mul(other)
  self * other
end

#n_uniqueExpr

Count unique values.

Examples:

df = Polars::DataFrame.new({"a" => [1, 1, 2]})
df.select(Polars.col("a").n_unique)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 2   │
# └─────┘


2493
2494
2495
# File 'lib/polars/expr.rb', line 2493

def n_unique
  wrap_expr(_rbexpr.n_unique)
end

#nameNameExpr

Create an object namespace of all expressions that modify expression names.



8447
8448
8449
# File 'lib/polars/expr.rb', line 8447

def name
  NameExpr.new(self)
end

#nan_maxExpr

Get maximum value, but propagate/poison encountered NaN values.

Examples:

df = Polars::DataFrame.new({"a" => [0.0, Float::NAN]})
df.select(Polars.col("a").nan_max)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ NaN │
# └─────┘


2369
2370
2371
# File 'lib/polars/expr.rb', line 2369

def nan_max
  wrap_expr(_rbexpr.nan_max)
end

#nan_minExpr

Get minimum value, but propagate/poison encountered NaN values.

Examples:

df = Polars::DataFrame.new({"a" => [0.0, Float::NAN]})
df.select(Polars.col("a").nan_min)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ NaN │
# └─────┘


2389
2390
2391
# File 'lib/polars/expr.rb', line 2389

def nan_min
  wrap_expr(_rbexpr.nan_min)
end

#ne(other) ⇒ Expr

Method equivalent of inequality operator expr != other.

Examples:

df = Polars::DataFrame.new(
  {
    "x" => [1.0, 2.0, Float::NAN, 4.0],
    "y" => [2.0, 2.0, Float::NAN, 4.0]
  }
)
df.with_columns(
  Polars.col("x").ne(Polars.col("y")).alias("x != y"),
)
# =>
# shape: (4, 3)
# ┌─────┬─────┬────────┐
# │ x   ┆ y   ┆ x != y │
# │ --- ┆ --- ┆ ---    │
# │ f64 ┆ f64 ┆ bool   │
# ╞═════╪═════╪════════╡
# │ 1.0 ┆ 2.0 ┆ true   │
# │ 2.0 ┆ 2.0 ┆ false  │
# │ NaN ┆ NaN ┆ false  │
# │ 4.0 ┆ 4.0 ┆ false  │
# └─────┴─────┴────────┘


4033
4034
4035
# File 'lib/polars/expr.rb', line 4033

def ne(other)
  self != other
end

#ne_missing(other) ⇒ Expr

Method equivalent of equality operator expr != other where nil == nil.

This differs from default ne where null values are propagated.

Examples:

df = Polars::DataFrame.new(
  {
    "x" => [1.0, 2.0, Float::NAN, 4.0, nil, nil],
    "y" => [2.0, 2.0, Float::NAN, 4.0, 5.0, nil]
  }
)
df.with_columns(
  Polars.col("x").ne(Polars.col("y")).alias("x ne y"),
  Polars.col("x").ne_missing(Polars.col("y")).alias("x ne_missing y")
)
# =>
# shape: (6, 4)
# ┌──────┬──────┬────────┬────────────────┐
# │ x    ┆ y    ┆ x ne y ┆ x ne_missing y │
# │ ---  ┆ ---  ┆ ---    ┆ ---            │
# │ f64  ┆ f64  ┆ bool   ┆ bool           │
# ╞══════╪══════╪════════╪════════════════╡
# │ 1.0  ┆ 2.0  ┆ true   ┆ true           │
# │ 2.0  ┆ 2.0  ┆ false  ┆ false          │
# │ NaN  ┆ NaN  ┆ false  ┆ false          │
# │ 4.0  ┆ 4.0  ┆ false  ┆ false          │
# │ null ┆ 5.0  ┆ null   ┆ true           │
# │ null ┆ null ┆ null   ┆ false          │
# └──────┴──────┴────────┴────────────────┘


4071
4072
4073
4074
# File 'lib/polars/expr.rb', line 4071

def ne_missing(other)
  other = Utils.parse_into_expression(other, str_as_lit: true)
  wrap_expr(_rbexpr.neq_missing(other))
end

#negExpr

Method equivalent of unary minus operator -expr.

Examples:

df = Polars::DataFrame.new({"a" => [-1, 0, 2, nil]})
df.with_columns(Polars.col("a").neg)
# =>
# shape: (4, 1)
# ┌──────┐
# │ a    │
# │ ---  │
# │ i64  │
# ╞══════╡
# │ 1    │
# │ 0    │
# │ -2   │
# │ null │
# └──────┘


4259
4260
4261
# File 'lib/polars/expr.rb', line 4259

def neg
  -self
end

#null_countExpr

Count null values.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [nil, 1, nil],
    "b" => [1, 2, 3]
  }
)
df.select(Polars.all.null_count)
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ u32 ┆ u32 │
# ╞═════╪═════╡
# │ 2   ┆ 0   │
# └─────┴─────┘


2540
2541
2542
# File 'lib/polars/expr.rb', line 2540

def null_count
  wrap_expr(_rbexpr.null_count)
end

#or_(*others) ⇒ Expr

Method equivalent of bitwise "or" operator expr | other | ....

Examples:

df = Polars::DataFrame.new(
  {
    "x" => [5, 6, 7, 4, 8],
    "y" => [1.5, 2.5, 1.0, 4.0, -5.75],
    "z" => [-9, 2, -1, 4, 8]
  }
)
df.select(
  (Polars.col("x") == Polars.col("y"))
  .or_(
    Polars.col("x") == Polars.col("y"),
    Polars.col("y") == Polars.col("z"),
    Polars.col("y").cast(Integer) == Polars.col("z"),
  )
  .alias("any")
)
# =>
# shape: (5, 1)
# ┌───────┐
# │ any   │
# │ ---   │
# │ bool  │
# ╞═══════╡
# │ false │
# │ true  │
# │ false │
# │ true  │
# │ false │
# └───────┘


3797
3798
3799
# File 'lib/polars/expr.rb', line 3797

def or_(*others)
  ([self] + others).reduce(:|)
end

#over(partition_by = nil, *more_exprs, order_by: nil, descending: false, nulls_last: false, mapping_strategy: "group_to_rows") ⇒ Expr

Apply window function over a subgroup.

This is similar to a group by + aggregation + self join. Or similar to window functions in Postgres.

Examples:

df = Polars::DataFrame.new(
  {
    "groups" => ["g1", "g1", "g2"],
    "values" => [1, 2, 3]
  }
)
df.with_columns(
  Polars.col("values").max.over("groups").alias("max_by_group")
)
# =>
# shape: (3, 3)
# ┌────────┬────────┬──────────────┐
# │ groups ┆ values ┆ max_by_group │
# │ ---    ┆ ---    ┆ ---          │
# │ str    ┆ i64    ┆ i64          │
# ╞════════╪════════╪══════════════╡
# │ g1     ┆ 1      ┆ 2            │
# │ g1     ┆ 2      ┆ 2            │
# │ g2     ┆ 3      ┆ 3            │
# └────────┴────────┴──────────────┘
df = Polars::DataFrame.new(
  {
    "groups" => [1, 1, 2, 2, 1, 2, 3, 3, 1],
    "values" => [1, 2, 3, 4, 5, 6, 7, 8, 8]
  }
)
df.lazy
  .select([Polars.col("groups").sum.over("groups")])
  .collect
# =>
# shape: (9, 1)
# ┌────────┐
# │ groups │
# │ ---    │
# │ i64    │
# ╞════════╡
# │ 4      │
# │ 4      │
# │ 6      │
# │ 6      │
# │ 4      │
# │ 6      │
# │ 6      │
# │ 6      │
# │ 4      │
# └────────┘
df = Polars::DataFrame.new(
  {
    "store_id" => ["a", "a", "b", "b"],
    "date" => [Date.new(2024, 9, 18), Date.new(2024, 9, 17), Date.new(2024, 9, 18), Date.new(2024, 9, 16)],
    "sales" => [7, 9, 8, 10]
  }
)
df.with_columns(
  cumulative_sales: Polars.col("sales").cum_sum.over("store_id", order_by: "date")
)
# =>
# shape: (4, 4)
# ┌──────────┬────────────┬───────┬──────────────────┐
# │ store_id ┆ date       ┆ sales ┆ cumulative_sales │
# │ ---      ┆ ---        ┆ ---   ┆ ---              │
# │ str      ┆ date       ┆ i64   ┆ i64              │
# ╞══════════╪════════════╪═══════╪══════════════════╡
# │ a        ┆ 2024-09-18 ┆ 7     ┆ 16               │
# │ a        ┆ 2024-09-17 ┆ 9     ┆ 9                │
# │ b        ┆ 2024-09-18 ┆ 8     ┆ 18               │
# │ b        ┆ 2024-09-16 ┆ 10    ┆ 10               │
# └──────────┴────────────┴───────┴──────────────────┘


2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
# File 'lib/polars/expr.rb', line 2840

def over(partition_by = nil, *more_exprs, order_by: nil, descending: false, nulls_last: false, mapping_strategy: "group_to_rows")
  partition_by_rbexprs =
    if !partition_by.nil?
      Utils.parse_into_list_of_expressions(partition_by, *more_exprs)
    else
      nil
    end

  order_by_rbexprs = !order_by.nil? ? Utils.parse_into_list_of_expressions(order_by) : nil

  wrap_expr(_rbexpr.over(partition_by_rbexprs, order_by_rbexprs, descending, nulls_last, mapping_strategy))
end

#pct_change(n: 1) ⇒ Expr

Computes percentage change between values.

Percentage change (as fraction) between current element and most-recent non-null element at least n period(s) before the current element.

Computes the change from the previous row by default.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [10, 11, 12, nil, 12]
  }
)
df.with_columns(Polars.col("a").pct_change.alias("pct_change"))
# =>
# shape: (5, 2)
# ┌──────┬────────────┐
# │ a    ┆ pct_change │
# │ ---  ┆ ---        │
# │ i64  ┆ f64        │
# ╞══════╪════════════╡
# │ 10   ┆ null       │
# │ 11   ┆ 0.1        │
# │ 12   ┆ 0.090909   │
# │ null ┆ null       │
# │ 12   ┆ null       │
# └──────┴────────────┘


6769
6770
6771
6772
# File 'lib/polars/expr.rb', line 6769

def pct_change(n: 1)
  n = Utils.parse_into_expression(n)
  wrap_expr(_rbexpr.pct_change(n))
end

#peak_maxExpr

Get a boolean mask of the local maximum peaks.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3, 4, 5]})
df.select(Polars.col("a").peak_max)
# =>
# shape: (5, 1)
# ┌───────┐
# │ a     │
# │ ---   │
# │ bool  │
# ╞═══════╡
# │ false │
# │ false │
# │ false │
# │ false │
# │ true  │
# └───────┘


3073
3074
3075
# File 'lib/polars/expr.rb', line 3073

def peak_max
  wrap_expr(_rbexpr.peak_max)
end

#peak_minExpr

Get a boolean mask of the local minimum peaks.

Examples:

df = Polars::DataFrame.new({"a" => [4, 1, 3, 2, 5]})
df.select(Polars.col("a").peak_min)
# =>
# shape: (5, 1)
# ┌───────┐
# │ a     │
# │ ---   │
# │ bool  │
# ╞═══════╡
# │ false │
# │ true  │
# │ false │
# │ true  │
# │ false │
# └───────┘


3097
3098
3099
# File 'lib/polars/expr.rb', line 3097

def peak_min
  wrap_expr(_rbexpr.peak_min)
end

#pipe(function, *args, **kwargs) ⇒ Object

Offers a structured way to apply a sequence of user-defined functions (UDFs).

Examples:

extract_number = lambda do |expr|
  # Extract the digits from a string.
  expr.str.extract('\d+', group_index: 0).cast(Polars::Int64)
end

scale_negative_even = lambda do |expr, n: 1|
  # Set even numbers negative, and scale by a user-supplied value.
  expr = Polars.when(expr % 2 == 0).then(-expr).otherwise(expr)
  expr * n
end

df = Polars::DataFrame.new({"val" => ["a: 1", "b: 2", "c: 3", "d: 4"]})
df.with_columns(
  udfs: Polars.col("val").pipe(extract_number).pipe(scale_negative_even, n: 5)
)
# =>
# shape: (4, 2)
# ┌──────┬──────┐
# │ val  ┆ udfs │
# │ ---  ┆ ---  │
# │ str  ┆ i64  │
# ╞══════╪══════╡
# │ a: 1 ┆ 5    │
# │ b: 2 ┆ -10  │
# │ c: 3 ┆ 15   │
# │ d: 4 ┆ -20  │
# └──────┴──────┘


482
483
484
485
486
487
488
# File 'lib/polars/expr.rb', line 482

def pipe(
  function,
  *args,
  **kwargs
)
  function.(self, *args, **kwargs)
end

#pow(exponent) ⇒ Expr

Raise expression to the power of exponent.

Examples:

df = Polars::DataFrame.new({"x" => [1, 2, 4, 8]})
df.with_columns(
  Polars.col("x").pow(3).alias("cube"),
  Polars.col("x").pow(Polars.col("x").log(2)).alias("x ** xlog2")
)
# =>
# shape: (4, 3)
# ┌─────┬──────┬────────────┐
# │ x   ┆ cube ┆ x ** xlog2 │
# │ --- ┆ ---  ┆ ---        │
# │ i64 ┆ i64  ┆ f64        │
# ╞═════╪══════╪════════════╡
# │ 1   ┆ 1    ┆ 1.0        │
# │ 2   ┆ 8    ┆ 2.0        │
# │ 4   ┆ 64   ┆ 16.0       │
# │ 8   ┆ 512  ┆ 512.0      │
# └─────┴──────┴────────────┘


4317
4318
4319
# File 'lib/polars/expr.rb', line 4317

def pow(exponent)
  self**exponent
end

#productExpr

Compute the product of an expression.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3]})
df.select(Polars.col("a").product)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 6   │
# └─────┘


2473
2474
2475
# File 'lib/polars/expr.rb', line 2473

def product
  wrap_expr(_rbexpr.product)
end

#qcut(quantiles, labels: nil, left_closed: false, allow_duplicates: false, include_breaks: false) ⇒ Expr

Bin continuous values into discrete categories based on their quantiles.

Examples:

Divide a column into three categories according to pre-defined quantile probabilities.

df = Polars::DataFrame.new({"foo" => [-2, -1, 0, 1, 2]})
df.with_columns(
  Polars.col("foo").qcut([0.25, 0.75], labels: ["a", "b", "c"]).alias("qcut")
)
# =>
# shape: (5, 2)
# ┌─────┬──────┐
# │ foo ┆ qcut │
# │ --- ┆ ---  │
# │ i64 ┆ cat  │
# ╞═════╪══════╡
# │ -2  ┆ a    │
# │ -1  ┆ a    │
# │ 0   ┆ b    │
# │ 1   ┆ b    │
# │ 2   ┆ c    │
# └─────┴──────┘

Divide a column into two categories using uniform quantile probabilities.

df.with_columns(
  Polars.col("foo")
    .qcut(2, labels: ["low", "high"], left_closed: true)
    .alias("qcut")
)
# =>
# shape: (5, 2)
# ┌─────┬──────┐
# │ foo ┆ qcut │
# │ --- ┆ ---  │
# │ i64 ┆ cat  │
# ╞═════╪══════╡
# │ -2  ┆ low  │
# │ -1  ┆ low  │
# │ 0   ┆ high │
# │ 1   ┆ high │
# │ 2   ┆ high │
# └─────┴──────┘

Add both the category and the breakpoint.

df.with_columns(
  Polars.col("foo").qcut([0.25, 0.75], include_breaks: true).alias("qcut")
).unnest("qcut")
# =>
# shape: (5, 3)
# ┌─────┬────────────┬────────────┐
# │ foo ┆ breakpoint ┆ category   │
# │ --- ┆ ---        ┆ ---        │
# │ i64 ┆ f64        ┆ cat        │
# ╞═════╪════════════╪════════════╡
# │ -2  ┆ -1.0       ┆ (-inf, -1] │
# │ -1  ┆ -1.0       ┆ (-inf, -1] │
# │ 0   ┆ 1.0        ┆ (-1, 1]    │
# │ 1   ┆ 1.0        ┆ (-1, 1]    │
# │ 2   ┆ inf        ┆ (1, inf]   │
# └─────┴────────────┴────────────┘


3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
# File 'lib/polars/expr.rb', line 3308

def qcut(quantiles, labels: nil, left_closed: false, allow_duplicates: false, include_breaks: false)
  if quantiles.is_a?(Integer)
    rbexpr = _rbexpr.qcut_uniform(
      quantiles, labels, left_closed, allow_duplicates, include_breaks
    )
  else
    rbexpr = _rbexpr.qcut(
      quantiles, labels, left_closed, allow_duplicates, include_breaks
    )
  end

  wrap_expr(rbexpr)
end

#quantile(quantile, interpolation: "nearest") ⇒ Expr

Get quantile value.

Examples:

df = Polars::DataFrame.new({"a" => [0, 1, 2, 3, 4, 5]})
df.select(Polars.col("a").quantile(0.3))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 2.0 │
# └─────┘
df.select(Polars.col("a").quantile(0.3, interpolation: "higher"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 2.0 │
# └─────┘
df.select(Polars.col("a").quantile(0.3, interpolation: "lower"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 1.0 │
# └─────┘
df.select(Polars.col("a").quantile(0.3, interpolation: "midpoint"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 1.5 │
# └─────┘
df.select(Polars.col("a").quantile(0.3, interpolation: "linear"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 1.5 │
# └─────┘


3170
3171
3172
3173
# File 'lib/polars/expr.rb', line 3170

def quantile(quantile, interpolation: "nearest")
  quantile = Utils.parse_into_expression(quantile, str_as_lit: false)
  wrap_expr(_rbexpr.quantile(quantile, interpolation))
end

#radiansExpr

Convert from degrees to radians.

Examples:

df = Polars::DataFrame.new({"a" => [-720, -540, -360, -180, 0, 180, 360, 540, 720]})
df.select(Polars.col("a").radians)
# =>
# shape: (9, 1)
# ┌────────────┐
# │ a          │
# │ ---        │
# │ f64        │
# ╞════════════╡
# │ -12.566371 │
# │ -9.424778  │
# │ -6.283185  │
# │ -3.141593  │
# │ 0.0        │
# │ 3.141593   │
# │ 6.283185   │
# │ 9.424778   │
# │ 12.566371  │
# └────────────┘


7254
7255
7256
# File 'lib/polars/expr.rb', line 7254

def radians
  wrap_expr(_rbexpr.radians)
end

#rank(method: "average", descending: false, seed: nil) ⇒ Expr

Assign ranks to data, dealing with ties appropriately.

Examples:

The 'average' method:

df = Polars::DataFrame.new({"a" => [3, 6, 1, 1, 6]})
df.select(Polars.col("a").rank)
# =>
# shape: (5, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 3.0 │
# │ 4.5 │
# │ 1.5 │
# │ 1.5 │
# │ 4.5 │
# └─────┘

The 'ordinal' method:

df = Polars::DataFrame.new({"a" => [3, 6, 1, 1, 6]})
df.select(Polars.col("a").rank(method: "ordinal"))
# =>
# shape: (5, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 3   │
# │ 4   │
# │ 1   │
# │ 2   │
# │ 5   │
# └─────┘


6701
6702
6703
# File 'lib/polars/expr.rb', line 6701

def rank(method: "average", descending: false, seed: nil)
  wrap_expr(_rbexpr.rank(method, descending, seed))
end

#rechunkExpr

Create a single chunk of memory for this Series.

Examples:

Create a Series with 3 nulls, append column a then rechunk

df = Polars::DataFrame.new({"a" => [1, 1, 2]})
df.select(Polars.repeat(nil, 3).append(Polars.col("a")).rechunk)
# =>
# shape: (6, 1)
# ┌────────┐
# │ repeat │
# │ ---    │
# │ i64    │
# ╞════════╡
# │ null   │
# │ null   │
# │ null   │
# │ 1      │
# │ 1      │
# │ 2      │
# └────────┘


876
877
878
# File 'lib/polars/expr.rb', line 876

def rechunk
  wrap_expr(_rbexpr.rechunk)
end

#reinterpret(signed: nil) ⇒ Expr

Reinterpret the underlying bits as a signed/unsigned integer.

This operation is only allowed for 64bit integers. For lower bits integers, you can safely use that cast operation.

Examples:

s = Polars::Series.new("a", [1, 1, 2], dtype: Polars::UInt64)
df = Polars::DataFrame.new([s])
df.select(
  [
    Polars.col("a").reinterpret(signed: true).alias("reinterpreted"),
    Polars.col("a").alias("original")
  ]
)
# =>
# shape: (3, 2)
# ┌───────────────┬──────────┐
# │ reinterpreted ┆ original │
# │ ---           ┆ ---      │
# │ i64           ┆ u64      │
# ╞═══════════════╪══════════╡
# │ 1             ┆ 1        │
# │ 1             ┆ 1        │
# │ 2             ┆ 2        │
# └───────────────┴──────────┘


4599
4600
4601
4602
4603
4604
4605
4606
4607
# File 'lib/polars/expr.rb', line 4599

def reinterpret(signed: nil)
  # TODO update
  if signed.nil?
    warn "The default `signed` for `reinterpret` method will change from `false` to `true` in a future version"
    signed = false
  end

  wrap_expr(_rbexpr.reinterpret(signed))
end

#repeat_by(by) ⇒ Expr

Repeat the elements in this Series as specified in the given expression.

The repeated elements are expanded into a List.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => ["x", "y", "z"],
    "n" => [1, 2, 3]
  }
)
df.select(Polars.col("a").repeat_by("n"))
# =>
# shape: (3, 1)
# ┌─────────────────┐
# │ a               │
# │ ---             │
# │ list[str]       │
# ╞═════════════════╡
# │ ["x"]           │
# │ ["y", "y"]      │
# │ ["z", "z", "z"] │
# └─────────────────┘


4410
4411
4412
4413
# File 'lib/polars/expr.rb', line 4410

def repeat_by(by)
  by = Utils.parse_into_expression(by, str_as_lit: false)
  wrap_expr(_rbexpr.repeat_by(by))
end

#replace(old, new = NO_DEFAULT, default: NO_DEFAULT, return_dtype: nil) ⇒ Expr

Replace values by different values.

Examples:

Replace a single value by another value. Values that were not replaced remain unchanged.

df = Polars::DataFrame.new({"a" => [1, 2, 2, 3]})
df.with_columns(replaced: Polars.col("a").replace(2, 100))
# =>
# shape: (4, 2)
# ┌─────┬──────────┐
# │ a   ┆ replaced │
# │ --- ┆ ---      │
# │ i64 ┆ i64      │
# ╞═════╪══════════╡
# │ 1   ┆ 1        │
# │ 2   ┆ 100      │
# │ 2   ┆ 100      │
# │ 3   ┆ 3        │
# └─────┴──────────┘

Replace multiple values by passing arrays to the old and new parameters.

df.with_columns(replaced: Polars.col("a").replace([2, 3], [100, 200]))
# =>
# shape: (4, 2)
# ┌─────┬──────────┐
# │ a   ┆ replaced │
# │ --- ┆ ---      │
# │ i64 ┆ i64      │
# ╞═════╪══════════╡
# │ 1   ┆ 1        │
# │ 2   ┆ 100      │
# │ 2   ┆ 100      │
# │ 3   ┆ 200      │
# └─────┴──────────┘

Passing a mapping with replacements is also supported as syntactic sugar. Specify a default to set all values that were not matched.

mapping = {2 => 100, 3 => 200}
df.with_columns(replaced: Polars.col("a").replace(mapping, default: -1))
# =>
# shape: (4, 2)
# ┌─────┬──────────┐
# │ a   ┆ replaced │
# │ --- ┆ ---      │
# │ i64 ┆ i64      │
# ╞═════╪══════════╡
# │ 1   ┆ -1       │
# │ 2   ┆ 100      │
# │ 2   ┆ 100      │
# │ 3   ┆ 200      │
# └─────┴──────────┘

Replacing by values of a different data type sets the return type based on a combination of the new data type and either the original data type or the default data type if it was set.

df = Polars::DataFrame.new({"a" => ["x", "y", "z"]})
mapping = {"x" => 1, "y" => 2, "z" => 3}
df.with_columns(replaced: Polars.col("a").replace(mapping))
# =>
# shape: (3, 2)
# ┌─────┬──────────┐
# │ a   ┆ replaced │
# │ --- ┆ ---      │
# │ str ┆ str      │
# ╞═════╪══════════╡
# │ x   ┆ 1        │
# │ y   ┆ 2        │
# │ z   ┆ 3        │
# └─────┴──────────┘
df.with_columns(replaced: Polars.col("a").replace(mapping, default: nil))
# =>
# shape: (3, 2)
# ┌─────┬──────────┐
# │ a   ┆ replaced │
# │ --- ┆ ---      │
# │ str ┆ i64      │
# ╞═════╪══════════╡
# │ x   ┆ 1        │
# │ y   ┆ 2        │
# │ z   ┆ 3        │
# └─────┴──────────┘

Set the return_dtype parameter to control the resulting data type directly.

df.with_columns(
  replaced: Polars.col("a").replace(mapping, return_dtype: Polars::UInt8)
)
# =>
# shape: (3, 2)
# ┌─────┬──────────┐
# │ a   ┆ replaced │
# │ --- ┆ ---      │
# │ str ┆ u8       │
# ╞═════╪══════════╡
# │ x   ┆ 1        │
# │ y   ┆ 2        │
# │ z   ┆ 3        │
# └─────┴──────────┘

Expression input is supported for all parameters.

df = Polars::DataFrame.new({"a" => [1, 2, 2, 3], "b" => [1.5, 2.5, 5.0, 1.0]})
df.with_columns(
  replaced: Polars.col("a").replace(
    Polars.col("a").max,
    Polars.col("b").sum,
    default: Polars.col("b")
  )
)
# =>
# shape: (4, 3)
# ┌─────┬─────┬──────────┐
# │ a   ┆ b   ┆ replaced │
# │ --- ┆ --- ┆ ---      │
# │ i64 ┆ f64 ┆ f64      │
# ╞═════╪═════╪══════════╡
# │ 1   ┆ 1.5 ┆ 1.5      │
# │ 2   ┆ 2.5 ┆ 2.5      │
# │ 2   ┆ 5.0 ┆ 5.0      │
# │ 3   ┆ 1.0 ┆ 10.0     │
# └─────┴─────┴──────────┘


8075
8076
8077
8078
8079
8080
8081
8082
8083
8084
8085
8086
8087
8088
8089
8090
8091
8092
8093
8094
8095
8096
8097
8098
8099
8100
8101
8102
# File 'lib/polars/expr.rb', line 8075

def replace(old, new = NO_DEFAULT, default: NO_DEFAULT, return_dtype: nil)
  if !default.eql?(NO_DEFAULT)
    return replace_strict(old, new, default: default, return_dtype: return_dtype)
  end

  if new.eql?(NO_DEFAULT) && old.is_a?(Hash)
    new = Series.new(old.values)
    old = Series.new(old.keys)
  else
    if old.is_a?(::Array)
      old = Series.new(old)
    end
    if new.is_a?(::Array)
      new = Series.new(new)
    end
  end

  old = Utils.parse_into_expression(old, str_as_lit: true)
  new = Utils.parse_into_expression(new, str_as_lit: true)

  result = wrap_expr(_rbexpr.replace(old, new))

  if !return_dtype.nil?
    result = result.cast(return_dtype)
  end

  result
end

#replace_strict(old, new = NO_DEFAULT, default: NO_DEFAULT, return_dtype: nil) ⇒ Expr

Note:

The global string cache must be enabled when replacing categorical values.

Replace all values by different values.

Examples:

Replace values by passing arrays to the old and new parameters.

df = Polars::DataFrame.new({"a" => [1, 2, 2, 3]})
df.with_columns(
  replaced: Polars.col("a").replace_strict([1, 2, 3], [100, 200, 300])
)
# =>
# shape: (4, 2)
# ┌─────┬──────────┐
# │ a   ┆ replaced │
# │ --- ┆ ---      │
# │ i64 ┆ i64      │
# ╞═════╪══════════╡
# │ 1   ┆ 100      │
# │ 2   ┆ 200      │
# │ 2   ┆ 200      │
# │ 3   ┆ 300      │
# └─────┴──────────┘

By default, an error is raised if any non-null values were not replaced. Specify a default to set all values that were not matched.

mapping = {2 => 200, 3 => 300}
df.with_columns(replaced: Polars.col("a").replace_strict(mapping, default: -1))
# =>
# shape: (4, 2)
# ┌─────┬──────────┐
# │ a   ┆ replaced │
# │ --- ┆ ---      │
# │ i64 ┆ i64      │
# ╞═════╪══════════╡
# │ 1   ┆ -1       │
# │ 2   ┆ 200      │
# │ 2   ┆ 200      │
# │ 3   ┆ 300      │
# └─────┴──────────┘

Replacing by values of a different data type sets the return type based on a combination of the new data type and the default data type.

df = Polars::DataFrame.new({"a" => ["x", "y", "z"]})
mapping = {"x" => 1, "y" => 2, "z" => 3}
df.with_columns(replaced: Polars.col("a").replace_strict(mapping))
# =>
# shape: (3, 2)
# ┌─────┬──────────┐
# │ a   ┆ replaced │
# │ --- ┆ ---      │
# │ str ┆ i64      │
# ╞═════╪══════════╡
# │ x   ┆ 1        │
# │ y   ┆ 2        │
# │ z   ┆ 3        │
# └─────┴──────────┘
df.with_columns(replaced: Polars.col("a").replace_strict(mapping, default: "x"))
# =>
# shape: (3, 2)
# ┌─────┬──────────┐
# │ a   ┆ replaced │
# │ --- ┆ ---      │
# │ str ┆ str      │
# ╞═════╪══════════╡
# │ x   ┆ 1        │
# │ y   ┆ 2        │
# │ z   ┆ 3        │
# └─────┴──────────┘

Set the return_dtype parameter to control the resulting data type directly.

df.with_columns(
  replaced: Polars.col("a").replace_strict(mapping, return_dtype: Polars::UInt8)
)
# =>
# shape: (3, 2)
# ┌─────┬──────────┐
# │ a   ┆ replaced │
# │ --- ┆ ---      │
# │ str ┆ u8       │
# ╞═════╪══════════╡
# │ x   ┆ 1        │
# │ y   ┆ 2        │
# │ z   ┆ 3        │
# └─────┴──────────┘

Expression input is supported for all parameters.

df = Polars::DataFrame.new({"a" => [1, 2, 2, 3], "b" => [1.5, 2.5, 5.0, 1.0]})
df.with_columns(
  replaced: Polars.col("a").replace_strict(
    Polars.col("a").max,
    Polars.col("b").sum,
    default: Polars.col("b")
  )
)
# =>
# shape: (4, 3)
# ┌─────┬─────┬──────────┐
# │ a   ┆ b   ┆ replaced │
# │ --- ┆ --- ┆ ---      │
# │ i64 ┆ f64 ┆ f64      │
# ╞═════╪═════╪══════════╡
# │ 1   ┆ 1.5 ┆ 1.5      │
# │ 2   ┆ 2.5 ┆ 2.5      │
# │ 2   ┆ 5.0 ┆ 5.0      │
# │ 3   ┆ 1.0 ┆ 10.0     │
# └─────┴─────┴──────────┘


8231
8232
8233
8234
8235
8236
8237
8238
8239
8240
8241
8242
8243
8244
8245
8246
8247
8248
8249
8250
# File 'lib/polars/expr.rb', line 8231

def replace_strict(
  old,
  new = NO_DEFAULT,
  default: NO_DEFAULT,
  return_dtype: nil
)
  if new.eql?(NO_DEFAULT) && old.is_a?(Hash)
    new = Series.new(old.values)
    old = Series.new(old.keys)
  end

  old = Utils.parse_into_expression(old, str_as_lit: true, list_as_series: true)
  new = Utils.parse_into_expression(new, str_as_lit: true, list_as_series: true)

  default = default.eql?(NO_DEFAULT) ? nil : Utils.parse_into_expression(default, str_as_lit: true)

  wrap_expr(
    _rbexpr.replace_strict(old, new, default, return_dtype)
  )
end

#reshape(dimensions) ⇒ Expr

Reshape this Expr to a flat Series or a Series of Lists.

Examples:

df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7, 8, 9]})
square = df.select(Polars.col("foo").reshape([3, 3]))
# =>
# shape: (3, 1)
# ┌───────────────┐
# │ foo           │
# │ ---           │
# │ array[i64, 3] │
# ╞═══════════════╡
# │ [1, 2, 3]     │
# │ [4, 5, 6]     │
# │ [7, 8, 9]     │
# └───────────────┘
square.select(Polars.col("foo").reshape([9]))
# =>
# shape: (9, 1)
# ┌─────┐
# │ foo │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 2   │
# │ 3   │
# │ 4   │
# │ 5   │
# │ 6   │
# │ 7   │
# │ 8   │
# │ 9   │
# └─────┘


7300
7301
7302
# File 'lib/polars/expr.rb', line 7300

def reshape(dimensions)
  wrap_expr(_rbexpr.reshape(dimensions))
end

#reverseExpr

Reverse the selection.

Examples:

df = Polars::DataFrame.new(
  {
    "A" => [1, 2, 3, 4, 5],
    "fruits" => ["banana", "banana", "apple", "apple", "banana"],
    "B" => [5, 4, 3, 2, 1],
    "cars" => ["beetle", "audi", "beetle", "beetle", "beetle"]
  }
)
df.select(
  [
    Polars.all,
    Polars.all.reverse.name.suffix("_reverse")
  ]
)
# =>
# shape: (5, 8)
# ┌─────┬────────┬─────┬────────┬───────────┬────────────────┬───────────┬──────────────┐
# │ A   ┆ fruits ┆ B   ┆ cars   ┆ A_reverse ┆ fruits_reverse ┆ B_reverse ┆ cars_reverse │
# │ --- ┆ ---    ┆ --- ┆ ---    ┆ ---       ┆ ---            ┆ ---       ┆ ---          │
# │ i64 ┆ str    ┆ i64 ┆ str    ┆ i64       ┆ str            ┆ i64       ┆ str          │
# ╞═════╪════════╪═════╪════════╪═══════════╪════════════════╪═══════════╪══════════════╡
# │ 1   ┆ banana ┆ 5   ┆ beetle ┆ 5         ┆ banana         ┆ 1         ┆ beetle       │
# │ 2   ┆ banana ┆ 4   ┆ audi   ┆ 4         ┆ apple          ┆ 2         ┆ beetle       │
# │ 3   ┆ apple  ┆ 3   ┆ beetle ┆ 3         ┆ apple          ┆ 3         ┆ beetle       │
# │ 4   ┆ apple  ┆ 2   ┆ beetle ┆ 2         ┆ banana         ┆ 4         ┆ audi         │
# │ 5   ┆ banana ┆ 1   ┆ beetle ┆ 1         ┆ banana         ┆ 5         ┆ beetle       │
# └─────┴────────┴─────┴────────┴───────────┴────────────────┴───────────┴──────────────┘


2199
2200
2201
# File 'lib/polars/expr.rb', line 2199

def reverse
  wrap_expr(_rbexpr.reverse)
end

#rleExpr

Get the lengths of runs of identical values.

Examples:

df = Polars::DataFrame.new(Polars::Series.new("s", [1, 1, 2, 1, nil, 1, 3, 3]))
df.select(Polars.col("s").rle).unnest("s")
# =>
# shape: (6, 2)
# ┌─────┬───────┐
# │ len ┆ value │
# │ --- ┆ ---   │
# │ u32 ┆ i64   │
# ╞═════╪═══════╡
# │ 2   ┆ 1     │
# │ 1   ┆ 2     │
# │ 1   ┆ 1     │
# │ 1   ┆ null  │
# │ 1   ┆ 1     │
# │ 2   ┆ 3     │
# └─────┴───────┘


3343
3344
3345
# File 'lib/polars/expr.rb', line 3343

def rle
  wrap_expr(_rbexpr.rle)
end

#rle_idExpr

Map values to run IDs.

Similar to RLE, but it maps each value to an ID corresponding to the run into which it falls. This is especially useful when you want to define groups by runs of identical values rather than the values themselves.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 1, 1, 1], "b" => ["x", "x", nil, "y", "y"]})
df.with_columns([Polars.col("a").rle_id.alias("a_r"), Polars.struct(["a", "b"]).rle_id.alias("ab_r")])
# =>
# shape: (5, 4)
# ┌─────┬──────┬─────┬──────┐
# │ a   ┆ b    ┆ a_r ┆ ab_r │
# │ --- ┆ ---  ┆ --- ┆ ---  │
# │ i64 ┆ str  ┆ u32 ┆ u32  │
# ╞═════╪══════╪═════╪══════╡
# │ 1   ┆ x    ┆ 0   ┆ 0    │
# │ 2   ┆ x    ┆ 1   ┆ 1    │
# │ 1   ┆ null ┆ 2   ┆ 2    │
# │ 1   ┆ y    ┆ 2   ┆ 3    │
# │ 1   ┆ y    ┆ 2   ┆ 3    │
# └─────┴──────┴─────┴──────┘


3371
3372
3373
# File 'lib/polars/expr.rb', line 3371

def rle_id
  wrap_expr(_rbexpr.rle_id)
end

#rolling(index_column:, period:, offset: nil, closed: "right") ⇒ Expr

Create rolling groups based on a temporal or integer column.

If you have a time series <t_0, t_1, ..., t_n>, then by default the windows created will be

  • (t_0 - period, t_0]
  • (t_1 - period, t_1]
  • ...
  • (t_n - period, t_n]

whereas if you pass a non-default offset, then the windows will be

  • (t_0 + offset, t_0 + offset + period]
  • (t_1 + offset, t_1 + offset + period]
  • ...
  • (t_n + offset, t_n + offset + period]

The period and offset arguments are created either from a timedelta, or by using the following string language:

  • 1ns (1 nanosecond)
  • 1us (1 microsecond)
  • 1ms (1 millisecond)
  • 1s (1 second)
  • 1m (1 minute)
  • 1h (1 hour)
  • 1d (1 calendar day)
  • 1w (1 calendar week)
  • 1mo (1 calendar month)
  • 1q (1 calendar quarter)
  • 1y (1 calendar year)
  • 1i (1 index count)

Or combine them: "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds

By "calendar day", we mean the corresponding time on the next day (which may not be 24 hours, due to daylight savings). Similarly for "calendar week", "calendar month", "calendar quarter", and "calendar year".

Examples:

dates = [
  "2020-01-01 13:45:48",
  "2020-01-01 16:42:13",
  "2020-01-01 16:45:09",
  "2020-01-02 18:12:48",
  "2020-01-03 19:45:32",
  "2020-01-08 23:16:43"
]
df = Polars::DataFrame.new({"dt" => dates, "a": [3, 7, 5, 9, 2, 1]}).with_columns(
  Polars.col("dt").str.strptime(Polars::Datetime).set_sorted
)
df.with_columns(
  sum_a: Polars.sum("a").rolling(index_column: "dt", period: "2d"),
  min_a: Polars.min("a").rolling(index_column: "dt", period: "2d"),
  max_a: Polars.max("a").rolling(index_column: "dt", period: "2d")
)
# =>
# shape: (6, 5)
# ┌─────────────────────┬─────┬───────┬───────┬───────┐
# │ dt                  ┆ a   ┆ sum_a ┆ min_a ┆ max_a │
# │ ---                 ┆ --- ┆ ---   ┆ ---   ┆ ---   │
# │ datetime[μs]        ┆ i64 ┆ i64   ┆ i64   ┆ i64   │
# ╞═════════════════════╪═════╪═══════╪═══════╪═══════╡
# │ 2020-01-01 13:45:48 ┆ 3   ┆ 3     ┆ 3     ┆ 3     │
# │ 2020-01-01 16:42:13 ┆ 7   ┆ 10    ┆ 3     ┆ 7     │
# │ 2020-01-01 16:45:09 ┆ 5   ┆ 15    ┆ 3     ┆ 7     │
# │ 2020-01-02 18:12:48 ┆ 9   ┆ 24    ┆ 3     ┆ 9     │
# │ 2020-01-03 19:45:32 ┆ 2   ┆ 11    ┆ 2     ┆ 9     │
# │ 2020-01-08 23:16:43 ┆ 1   ┆ 1     ┆ 1     ┆ 1     │
# └─────────────────────┴─────┴───────┴───────┴───────┘


2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
# File 'lib/polars/expr.rb', line 2940

def rolling(
  index_column:,
  period:,
  offset: nil,
  closed: "right"
)
  index_column_rbexpr = Utils.parse_into_expression(index_column)
  if offset.nil?
    offset = Utils.negate_duration_string(Utils.parse_as_duration_string(period))
  end

  period = Utils.parse_as_duration_string(period)
  offset = Utils.parse_as_duration_string(offset)

  wrap_expr(_rbexpr.rolling(index_column_rbexpr, period, offset, closed))
end

#rolling_kurtosis(window_size, fisher: true, bias: true, min_samples: nil, center: false) ⇒ Expr

Note:

This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.

Compute a rolling kurtosis.

The window at a given row will include the row itself, and the window_size - 1 elements before it.

Examples:

df = Polars::DataFrame.new({"a" => [1, 4, 2, 9]})
df.select(Polars.col("a").rolling_kurtosis(3))
# =>
# shape: (4, 1)
# ┌──────┐
# │ a    │
# │ ---  │
# │ f64  │
# ╞══════╡
# │ null │
# │ null │
# │ -1.5 │
# │ -1.5 │
# └──────┘


6535
6536
6537
6538
6539
6540
6541
6542
6543
6544
6545
6546
6547
6548
6549
6550
6551
# File 'lib/polars/expr.rb', line 6535

def rolling_kurtosis(
  window_size,
  fisher: true,
  bias: true,
  min_samples: nil,
  center: false
)
  wrap_expr(
    _rbexpr.rolling_kurtosis(
      window_size,
      fisher,
      bias,
      min_samples,
      center
    )
  )
end

#rolling_map(window_size, weights: nil, min_samples: nil, center: false, &function) ⇒ Expr

Note:

This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.

Compute a custom rolling window function.

Examples:

df = Polars::DataFrame.new({"a" => [11.0, 2.0, 9.0, Float::NAN, 8.0]})
df.select(Polars.col("a").rolling_map(3) { |v| v.drop_nans.sum })
# =>
# shape: (5, 1)
# ┌──────┐
# │ a    │
# │ ---  │
# │ f64  │
# ╞══════╡
# │ null │
# │ null │
# │ 22.0 │
# │ 11.0 │
# │ 17.0 │
# └──────┘


6588
6589
6590
6591
6592
6593
6594
6595
6596
6597
6598
6599
6600
6601
6602
6603
6604
6605
6606
6607
6608
6609
6610
6611
6612
# File 'lib/polars/expr.rb', line 6588

def rolling_map(
  window_size,
  weights: nil,
  min_samples: nil,
  center: false,
  &function
)
  if min_samples.nil?
    min_samples = window_size
  end

  _wrap = lambda do |rbs|
    s = Utils.wrap_s(rbs)
    rv = function.(s)
    if rv.is_a?(Series)
      rv._s
    else
      Series.new([rv])._s
    end
  end

  wrap_expr(
    _rbexpr.rolling_map(_wrap, window_size, weights, min_samples, center)
  )
end

#rolling_max(window_size, weights: nil, min_samples: nil, center: false) ⇒ Expr

Note:

This functionality is experimental and may change without it being considered a breaking change.

Note:

If you want to compute multiple aggregation statistics over the same dynamic window, consider using rolling this method can cache the window size computation.

Apply a rolling max (moving max) over the values in this array.

A window of length window_size will traverse the array. The values that fill this window will (optionally) be multiplied with the weights given by the weight vector. The resulting values will be aggregated to their sum.

Examples:

df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]})
df.select(
  [
    Polars.col("A").rolling_max(2)
  ]
)
# =>
# shape: (6, 1)
# ┌──────┐
# │ A    │
# │ ---  │
# │ f64  │
# ╞══════╡
# │ null │
# │ 2.0  │
# │ 3.0  │
# │ 4.0  │
# │ 5.0  │
# │ 6.0  │
# └──────┘


5905
5906
5907
5908
5909
5910
5911
5912
5913
5914
5915
5916
# File 'lib/polars/expr.rb', line 5905

def rolling_max(
  window_size,
  weights: nil,
  min_samples: nil,
  center: false
)
  wrap_expr(
    _rbexpr.rolling_max(
      window_size, weights, min_samples, center
    )
  )
end

#rolling_max_by(by, window_size, min_samples: 1, closed: "right") ⇒ Expr

Note:

If you want to compute multiple aggregation statistics over the same dynamic window, consider using rolling - this method can cache the window size computation.

Apply a rolling max based on another column.

Examples:

Create a DataFrame with a datetime column and a row number column

start = DateTime.new(2001, 1, 1)
stop = DateTime.new(2001, 1, 2)
df_temporal = Polars::DataFrame.new(
    {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
).with_row_index
# =>
# shape: (25, 2)
# ┌───────┬─────────────────────┐
# │ index ┆ date                │
# │ ---   ┆ ---                 │
# │ u32   ┆ datetime[ns]        │
# ╞═══════╪═════════════════════╡
# │ 0     ┆ 2001-01-01 00:00:00 │
# │ 1     ┆ 2001-01-01 01:00:00 │
# │ 2     ┆ 2001-01-01 02:00:00 │
# │ 3     ┆ 2001-01-01 03:00:00 │
# │ 4     ┆ 2001-01-01 04:00:00 │
# │ …     ┆ …                   │
# │ 20    ┆ 2001-01-01 20:00:00 │
# │ 21    ┆ 2001-01-01 21:00:00 │
# │ 22    ┆ 2001-01-01 22:00:00 │
# │ 23    ┆ 2001-01-01 23:00:00 │
# │ 24    ┆ 2001-01-02 00:00:00 │
# └───────┴─────────────────────┘

Compute the rolling max with the temporal windows closed on the right (default)

df_temporal.with_columns(
  rolling_row_max: Polars.col("index").rolling_max_by("date", "2h")
)
# =>
# shape: (25, 3)
# ┌───────┬─────────────────────┬─────────────────┐
# │ index ┆ date                ┆ rolling_row_max │
# │ ---   ┆ ---                 ┆ ---             │
# │ u32   ┆ datetime[ns]        ┆ u32             │
# ╞═══════╪═════════════════════╪═════════════════╡
# │ 0     ┆ 2001-01-01 00:00:00 ┆ 0               │
# │ 1     ┆ 2001-01-01 01:00:00 ┆ 1               │
# │ 2     ┆ 2001-01-01 02:00:00 ┆ 2               │
# │ 3     ┆ 2001-01-01 03:00:00 ┆ 3               │
# │ 4     ┆ 2001-01-01 04:00:00 ┆ 4               │
# │ …     ┆ …                   ┆ …               │
# │ 20    ┆ 2001-01-01 20:00:00 ┆ 20              │
# │ 21    ┆ 2001-01-01 21:00:00 ┆ 21              │
# │ 22    ┆ 2001-01-01 22:00:00 ┆ 22              │
# │ 23    ┆ 2001-01-01 23:00:00 ┆ 23              │
# │ 24    ┆ 2001-01-02 00:00:00 ┆ 24              │
# └───────┴─────────────────────┴─────────────────┘

Compute the rolling max with the closure of windows on both sides

df_temporal.with_columns(
  rolling_row_max: Polars.col("index").rolling_max_by(
    "date", "2h", closed: "both"
  )
)
# =>
# shape: (25, 3)
# ┌───────┬─────────────────────┬─────────────────┐
# │ index ┆ date                ┆ rolling_row_max │
# │ ---   ┆ ---                 ┆ ---             │
# │ u32   ┆ datetime[ns]        ┆ u32             │
# ╞═══════╪═════════════════════╪═════════════════╡
# │ 0     ┆ 2001-01-01 00:00:00 ┆ 0               │
# │ 1     ┆ 2001-01-01 01:00:00 ┆ 1               │
# │ 2     ┆ 2001-01-01 02:00:00 ┆ 2               │
# │ 3     ┆ 2001-01-01 03:00:00 ┆ 3               │
# │ 4     ┆ 2001-01-01 04:00:00 ┆ 4               │
# │ …     ┆ …                   ┆ …               │
# │ 20    ┆ 2001-01-01 20:00:00 ┆ 20              │
# │ 21    ┆ 2001-01-01 21:00:00 ┆ 21              │
# │ 22    ┆ 2001-01-01 22:00:00 ┆ 22              │
# │ 23    ┆ 2001-01-01 23:00:00 ┆ 23              │
# │ 24    ┆ 2001-01-02 00:00:00 ┆ 24              │
# └───────┴─────────────────────┴─────────────────┘


4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
# File 'lib/polars/expr.rb', line 4916

def rolling_max_by(
  by,
  window_size,
  min_samples: 1,
  closed: "right"
)
  window_size = _prepare_rolling_by_window_args(window_size)
  by = Utils.parse_into_expression(by)
  wrap_expr(
    _rbexpr.rolling_max_by(by, window_size, min_samples, closed)
  )
end

#rolling_mean(window_size, weights: nil, min_samples: nil, center: false) ⇒ Expr

Note:

This functionality is experimental and may change without it being considered a breaking change.

Note:

If you want to compute multiple aggregation statistics over the same dynamic window, consider using rolling this method can cache the window size computation.

Apply a rolling mean (moving mean) over the values in this array.

A window of length window_size will traverse the array. The values that fill this window will (optionally) be multiplied with the weights given by the weight vector. The resulting values will be aggregated to their sum.

Examples:

df = Polars::DataFrame.new({"A" => [1.0, 8.0, 6.0, 2.0, 16.0, 10.0]})
df.select(
  [
    Polars.col("A").rolling_mean(2)
  ]
)
# =>
# shape: (6, 1)
# ┌──────┐
# │ A    │
# │ ---  │
# │ f64  │
# ╞══════╡
# │ null │
# │ 4.5  │
# │ 7.0  │
# │ 4.0  │
# │ 9.0  │
# │ 13.0 │
# └──────┘


5983
5984
5985
5986
5987
5988
5989
5990
5991
5992
5993
5994
# File 'lib/polars/expr.rb', line 5983

def rolling_mean(
  window_size,
  weights: nil,
  min_samples: nil,
  center: false
)
  wrap_expr(
    _rbexpr.rolling_mean(
      window_size, weights, min_samples, center
    )
  )
end

#rolling_mean_by(by, window_size, min_samples: 1, closed: "right") ⇒ Expr

Note:

If you want to compute multiple aggregation statistics over the same dynamic window, consider using rolling - this method can cache the window size computation.

Apply a rolling mean based on another column.

Examples:

Create a DataFrame with a datetime column and a row number column

start = DateTime.new(2001, 1, 1)
stop = DateTime.new(2001, 1, 2)
df_temporal = Polars::DataFrame.new(
    {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
).with_row_index
# =>
# shape: (25, 2)
# ┌───────┬─────────────────────┐
# │ index ┆ date                │
# │ ---   ┆ ---                 │
# │ u32   ┆ datetime[ns]        │
# ╞═══════╪═════════════════════╡
# │ 0     ┆ 2001-01-01 00:00:00 │
# │ 1     ┆ 2001-01-01 01:00:00 │
# │ 2     ┆ 2001-01-01 02:00:00 │
# │ 3     ┆ 2001-01-01 03:00:00 │
# │ 4     ┆ 2001-01-01 04:00:00 │
# │ …     ┆ …                   │
# │ 20    ┆ 2001-01-01 20:00:00 │
# │ 21    ┆ 2001-01-01 21:00:00 │
# │ 22    ┆ 2001-01-01 22:00:00 │
# │ 23    ┆ 2001-01-01 23:00:00 │
# │ 24    ┆ 2001-01-02 00:00:00 │
# └───────┴─────────────────────┘

Compute the rolling mean with the temporal windows closed on the right (default)

df_temporal.with_columns(
  rolling_row_mean: Polars.col("index").rolling_mean_by(
    "date", "2h"
  )
)
# =>
# shape: (25, 3)
# ┌───────┬─────────────────────┬──────────────────┐
# │ index ┆ date                ┆ rolling_row_mean │
# │ ---   ┆ ---                 ┆ ---              │
# │ u32   ┆ datetime[ns]        ┆ f64              │
# ╞═══════╪═════════════════════╪══════════════════╡
# │ 0     ┆ 2001-01-01 00:00:00 ┆ 0.0              │
# │ 1     ┆ 2001-01-01 01:00:00 ┆ 0.5              │
# │ 2     ┆ 2001-01-01 02:00:00 ┆ 1.5              │
# │ 3     ┆ 2001-01-01 03:00:00 ┆ 2.5              │
# │ 4     ┆ 2001-01-01 04:00:00 ┆ 3.5              │
# │ …     ┆ …                   ┆ …                │
# │ 20    ┆ 2001-01-01 20:00:00 ┆ 19.5             │
# │ 21    ┆ 2001-01-01 21:00:00 ┆ 20.5             │
# │ 22    ┆ 2001-01-01 22:00:00 ┆ 21.5             │
# │ 23    ┆ 2001-01-01 23:00:00 ┆ 22.5             │
# │ 24    ┆ 2001-01-02 00:00:00 ┆ 23.5             │
# └───────┴─────────────────────┴──────────────────┘

Compute the rolling mean with the closure of windows on both sides

df_temporal.with_columns(
  rolling_row_mean: Polars.col("index").rolling_mean_by(
    "date", "2h", closed: "both"
  )
)
# =>
# shape: (25, 3)
# ┌───────┬─────────────────────┬──────────────────┐
# │ index ┆ date                ┆ rolling_row_mean │
# │ ---   ┆ ---                 ┆ ---              │
# │ u32   ┆ datetime[ns]        ┆ f64              │
# ╞═══════╪═════════════════════╪══════════════════╡
# │ 0     ┆ 2001-01-01 00:00:00 ┆ 0.0              │
# │ 1     ┆ 2001-01-01 01:00:00 ┆ 0.5              │
# │ 2     ┆ 2001-01-01 02:00:00 ┆ 1.0              │
# │ 3     ┆ 2001-01-01 03:00:00 ┆ 2.0              │
# │ 4     ┆ 2001-01-01 04:00:00 ┆ 3.0              │
# │ …     ┆ …                   ┆ …                │
# │ 20    ┆ 2001-01-01 20:00:00 ┆ 19.0             │
# │ 21    ┆ 2001-01-01 21:00:00 ┆ 20.0             │
# │ 22    ┆ 2001-01-01 22:00:00 ┆ 21.0             │
# │ 23    ┆ 2001-01-01 23:00:00 ┆ 22.0             │
# │ 24    ┆ 2001-01-02 00:00:00 ┆ 23.0             │
# └───────┴─────────────────────┴──────────────────┘


5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058
5059
5060
# File 'lib/polars/expr.rb', line 5044

def rolling_mean_by(
  by,
  window_size,
  min_samples: 1,
  closed: "right"
)
  window_size = _prepare_rolling_by_window_args(window_size)
  by = Utils.parse_into_expression(by)
  wrap_expr(
    _rbexpr.rolling_mean_by(
      by,
      window_size,
      min_samples,
      closed
    )
  )
end

#rolling_median(window_size, weights: nil, min_samples: nil, center: false) ⇒ Expr

Note:

This functionality is experimental and may change without it being considered a breaking change.

Note:

If you want to compute multiple aggregation statistics over the same dynamic window, consider using rolling this method can cache the window size computation.

Compute a rolling median.

Examples:

df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]})
df.select(
  [
    Polars.col("A").rolling_median(3)
  ]
)
# =>
# shape: (6, 1)
# ┌──────┐
# │ A    │
# │ ---  │
# │ f64  │
# ╞══════╡
# │ null │
# │ null │
# │ 2.0  │
# │ 3.0  │
# │ 4.0  │
# │ 6.0  │
# └──────┘


6297
6298
6299
6300
6301
6302
6303
6304
6305
6306
6307
6308
# File 'lib/polars/expr.rb', line 6297

def rolling_median(
  window_size,
  weights: nil,
  min_samples: nil,
  center: false
)
  wrap_expr(
    _rbexpr.rolling_median(
      window_size, weights, min_samples, center
    )
  )
end

#rolling_median_by(by, window_size, min_samples: 1, closed: "right") ⇒ Expr

Note:

If you want to compute multiple aggregation statistics over the same dynamic window, consider using rolling - this method can cache the window size computation.

Compute a rolling median based on another column.

Examples:

Create a DataFrame with a datetime column and a row number column

start = DateTime.new(2001, 1, 1)
stop = DateTime.new(2001, 1, 2)
df_temporal = Polars::DataFrame.new(
  {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
).with_row_index
# =>
# shape: (25, 2)
# ┌───────┬─────────────────────┐
# │ index ┆ date                │
# │ ---   ┆ ---                 │
# │ u32   ┆ datetime[ns]        │
# ╞═══════╪═════════════════════╡
# │ 0     ┆ 2001-01-01 00:00:00 │
# │ 1     ┆ 2001-01-01 01:00:00 │
# │ 2     ┆ 2001-01-01 02:00:00 │
# │ 3     ┆ 2001-01-01 03:00:00 │
# │ 4     ┆ 2001-01-01 04:00:00 │
# │ …     ┆ …                   │
# │ 20    ┆ 2001-01-01 20:00:00 │
# │ 21    ┆ 2001-01-01 21:00:00 │
# │ 22    ┆ 2001-01-01 22:00:00 │
# │ 23    ┆ 2001-01-01 23:00:00 │
# │ 24    ┆ 2001-01-02 00:00:00 │
# └───────┴─────────────────────┘

Compute the rolling median with the temporal windows closed on the right:

df_temporal.with_columns(
  rolling_row_median: Polars.col("index").rolling_median_by(
    "date", "2h"
  )
)
# =>
# shape: (25, 3)
# ┌───────┬─────────────────────┬────────────────────┐
# │ index ┆ date                ┆ rolling_row_median │
# │ ---   ┆ ---                 ┆ ---                │
# │ u32   ┆ datetime[ns]        ┆ f64                │
# ╞═══════╪═════════════════════╪════════════════════╡
# │ 0     ┆ 2001-01-01 00:00:00 ┆ 0.0                │
# │ 1     ┆ 2001-01-01 01:00:00 ┆ 0.5                │
# │ 2     ┆ 2001-01-01 02:00:00 ┆ 1.5                │
# │ 3     ┆ 2001-01-01 03:00:00 ┆ 2.5                │
# │ 4     ┆ 2001-01-01 04:00:00 ┆ 3.5                │
# │ …     ┆ …                   ┆ …                  │
# │ 20    ┆ 2001-01-01 20:00:00 ┆ 19.5               │
# │ 21    ┆ 2001-01-01 21:00:00 ┆ 20.5               │
# │ 22    ┆ 2001-01-01 22:00:00 ┆ 21.5               │
# │ 23    ┆ 2001-01-01 23:00:00 ┆ 22.5               │
# │ 24    ┆ 2001-01-02 00:00:00 ┆ 23.5               │
# └───────┴─────────────────────┴────────────────────┘


5547
5548
5549
5550
5551
5552
5553
5554
5555
5556
5557
5558
# File 'lib/polars/expr.rb', line 5547

def rolling_median_by(
  by,
  window_size,
  min_samples: 1,
  closed: "right"
)
  window_size = _prepare_rolling_by_window_args(window_size)
  by = Utils.parse_into_expression(by)
  wrap_expr(
    _rbexpr.rolling_median_by(by, window_size, min_samples, closed)
  )
end

#rolling_min(window_size, weights: nil, min_samples: nil, center: false) ⇒ Expr

Note:

This functionality is experimental and may change without it being considered a breaking change.

Note:

If you want to compute multiple aggregation statistics over the same dynamic window, consider using rolling this method can cache the window size computation.

Apply a rolling min (moving min) over the values in this array.

A window of length window_size will traverse the array. The values that fill this window will (optionally) be multiplied with the weights given by the weight vector. The resulting values will be aggregated to their sum.

Examples:

df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]})
df.select(
  [
    Polars.col("A").rolling_min(2)
  ]
)
# =>
# shape: (6, 1)
# ┌──────┐
# │ A    │
# │ ---  │
# │ f64  │
# ╞══════╡
# │ null │
# │ 1.0  │
# │ 2.0  │
# │ 3.0  │
# │ 4.0  │
# │ 5.0  │
# └──────┘


5827
5828
5829
5830
5831
5832
5833
5834
5835
5836
5837
5838
# File 'lib/polars/expr.rb', line 5827

def rolling_min(
  window_size,
  weights: nil,
  min_samples: nil,
  center: false
)
  wrap_expr(
    _rbexpr.rolling_min(
      window_size, weights, min_samples, center
    )
  )
end

#rolling_min_by(by, window_size, min_samples: 1, closed: "right") ⇒ Expr

Note:

If you want to compute multiple aggregation statistics over the same dynamic window, consider using rolling - this method can cache the window size computation.

Apply a rolling min based on another column.

Examples:

Create a DataFrame with a datetime column and a row number column

start = DateTime.new(2001, 1, 1)
stop = DateTime.new(2001, 1, 2)
df_temporal = Polars::DataFrame.new(
  {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
).with_row_index
# =>
# shape: (25, 2)
# ┌───────┬─────────────────────┐
# │ index ┆ date                │
# │ ---   ┆ ---                 │
# │ u32   ┆ datetime[ns]        │
# ╞═══════╪═════════════════════╡
# │ 0     ┆ 2001-01-01 00:00:00 │
# │ 1     ┆ 2001-01-01 01:00:00 │
# │ 2     ┆ 2001-01-01 02:00:00 │
# │ 3     ┆ 2001-01-01 03:00:00 │
# │ 4     ┆ 2001-01-01 04:00:00 │
# │ …     ┆ …                   │
# │ 20    ┆ 2001-01-01 20:00:00 │
# │ 21    ┆ 2001-01-01 21:00:00 │
# │ 22    ┆ 2001-01-01 22:00:00 │
# │ 23    ┆ 2001-01-01 23:00:00 │
# │ 24    ┆ 2001-01-02 00:00:00 │
# └───────┴─────────────────────┘

Compute the rolling min with the temporal windows closed on the right (default)

df_temporal.with_columns(
  rolling_row_min: Polars.col("index").rolling_min_by("date", "2h")
)
# =>
# shape: (25, 3)
# ┌───────┬─────────────────────┬─────────────────┐
# │ index ┆ date                ┆ rolling_row_min │
# │ ---   ┆ ---                 ┆ ---             │
# │ u32   ┆ datetime[ns]        ┆ u32             │
# ╞═══════╪═════════════════════╪═════════════════╡
# │ 0     ┆ 2001-01-01 00:00:00 ┆ 0               │
# │ 1     ┆ 2001-01-01 01:00:00 ┆ 0               │
# │ 2     ┆ 2001-01-01 02:00:00 ┆ 1               │
# │ 3     ┆ 2001-01-01 03:00:00 ┆ 2               │
# │ 4     ┆ 2001-01-01 04:00:00 ┆ 3               │
# │ …     ┆ …                   ┆ …               │
# │ 20    ┆ 2001-01-01 20:00:00 ┆ 19              │
# │ 21    ┆ 2001-01-01 21:00:00 ┆ 20              │
# │ 22    ┆ 2001-01-01 22:00:00 ┆ 21              │
# │ 23    ┆ 2001-01-01 23:00:00 ┆ 22              │
# │ 24    ┆ 2001-01-02 00:00:00 ┆ 23              │
# └───────┴─────────────────────┴─────────────────┘


4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
# File 'lib/polars/expr.rb', line 4790

def rolling_min_by(
  by,
  window_size,
  min_samples: 1,
  closed: "right"
)
  window_size = _prepare_rolling_by_window_args(window_size)
  by = Utils.parse_into_expression(by)
  wrap_expr(
    _rbexpr.rolling_min_by(by, window_size, min_samples, closed)
  )
end

#rolling_quantile(quantile, interpolation: "nearest", window_size: 2, weights: nil, min_samples: nil, center: false) ⇒ Expr

Note:

This functionality is experimental and may change without it being considered a breaking change.

Note:

If you want to compute multiple aggregation statistics over the same dynamic window, consider using rolling this method can cache the window size computation.

Compute a rolling quantile.

Examples:

df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]})
df.select(
  [
    Polars.col("A").rolling_quantile(0.33, window_size: 3)
  ]
)
# =>
# shape: (6, 1)
# ┌──────┐
# │ A    │
# │ ---  │
# │ f64  │
# ╞══════╡
# │ null │
# │ null │
# │ 2.0  │
# │ 3.0  │
# │ 4.0  │
# │ 6.0  │
# └──────┘


6375
6376
6377
6378
6379
6380
6381
6382
6383
6384
6385
6386
6387
6388
# File 'lib/polars/expr.rb', line 6375

def rolling_quantile(
  quantile,
  interpolation: "nearest",
  window_size: 2,
  weights: nil,
  min_samples: nil,
  center: false
)
  wrap_expr(
    _rbexpr.rolling_quantile(
      quantile, interpolation, window_size, weights, min_samples, center
    )
  )
end

#rolling_quantile_by(by, window_size, quantile:, interpolation: "nearest", min_samples: 1, closed: "right") ⇒ Expr

Note:

If you want to compute multiple aggregation statistics over the same dynamic window, consider using rolling - this method can cache the window size computation.

Compute a rolling quantile based on another column.

Examples:

Create a DataFrame with a datetime column and a row number column

start = DateTime.new(2001, 1, 1)
stop = DateTime.new(2001, 1, 2)
df_temporal = Polars::DataFrame.new(
    {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
).with_row_index
# =>
# shape: (25, 2)
# ┌───────┬─────────────────────┐
# │ index ┆ date                │
# │ ---   ┆ ---                 │
# │ u32   ┆ datetime[ns]        │
# ╞═══════╪═════════════════════╡
# │ 0     ┆ 2001-01-01 00:00:00 │
# │ 1     ┆ 2001-01-01 01:00:00 │
# │ 2     ┆ 2001-01-01 02:00:00 │
# │ 3     ┆ 2001-01-01 03:00:00 │
# │ 4     ┆ 2001-01-01 04:00:00 │
# │ …     ┆ …                   │
# │ 20    ┆ 2001-01-01 20:00:00 │
# │ 21    ┆ 2001-01-01 21:00:00 │
# │ 22    ┆ 2001-01-01 22:00:00 │
# │ 23    ┆ 2001-01-01 23:00:00 │
# │ 24    ┆ 2001-01-02 00:00:00 │
# └───────┴─────────────────────┘

Compute the rolling quantile with the temporal windows closed on the right:

df_temporal.with_columns(
  rolling_row_quantile: Polars.col("index").rolling_quantile_by(
    "date", "2h", quantile: 0.3
  )
)
# =>
# shape: (25, 3)
# ┌───────┬─────────────────────┬──────────────────────┐
# │ index ┆ date                ┆ rolling_row_quantile │
# │ ---   ┆ ---                 ┆ ---                  │
# │ u32   ┆ datetime[ns]        ┆ f64                  │
# ╞═══════╪═════════════════════╪══════════════════════╡
# │ 0     ┆ 2001-01-01 00:00:00 ┆ 0.0                  │
# │ 1     ┆ 2001-01-01 01:00:00 ┆ 0.0                  │
# │ 2     ┆ 2001-01-01 02:00:00 ┆ 1.0                  │
# │ 3     ┆ 2001-01-01 03:00:00 ┆ 2.0                  │
# │ 4     ┆ 2001-01-01 04:00:00 ┆ 3.0                  │
# │ …     ┆ …                   ┆ …                    │
# │ 20    ┆ 2001-01-01 20:00:00 ┆ 19.0                 │
# │ 21    ┆ 2001-01-01 21:00:00 ┆ 20.0                 │
# │ 22    ┆ 2001-01-01 22:00:00 ┆ 21.0                 │
# │ 23    ┆ 2001-01-01 23:00:00 ┆ 22.0                 │
# │ 24    ┆ 2001-01-02 00:00:00 ┆ 23.0                 │
# └───────┴─────────────────────┴──────────────────────┘


5653
5654
5655
5656
5657
5658
5659
5660
5661
5662
5663
5664
5665
5666
5667
5668
5669
5670
5671
5672
5673
# File 'lib/polars/expr.rb', line 5653

def rolling_quantile_by(
  by,
  window_size,
  quantile:,
  interpolation: "nearest",
  min_samples: 1,
  closed: "right"
)
  window_size = _prepare_rolling_by_window_args(window_size)
  by = Utils.parse_into_expression(by)
  wrap_expr(
    _rbexpr.rolling_quantile_by(
      by,
      quantile,
      interpolation,
      window_size,
      min_samples,
      closed,
    )
  )
end

#rolling_rank(window_size, method: "average", seed: nil, min_samples: nil, center: false) ⇒ Expr

Note:

This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.

Compute a rolling rank.

A window of length window_size will traverse the array. The values that fill this window will be ranked according to the method parameter. The resulting values will be the rank of the value that is at the end of the sliding window.

Examples:

df = Polars::DataFrame.new({"a" => [1, 4, 4, 1, 9]})
df.select(Polars.col("a").rolling_rank(3, method: "average"))
# =>
# shape: (5, 1)
# ┌──────┐
# │ a    │
# │ ---  │
# │ f64  │
# ╞══════╡
# │ null │
# │ null │
# │ 2.5  │
# │ 1.0  │
# │ 3.0  │
# └──────┘


6445
6446
6447
6448
6449
6450
6451
6452
6453
6454
6455
6456
6457
6458
6459
6460
6461
# File 'lib/polars/expr.rb', line 6445

def rolling_rank(
  window_size,
  method: "average",
  seed: nil,
  min_samples: nil,
  center: false
)
  Utils.wrap_expr(
    _rbexpr.rolling_rank(
      window_size,
      method,
      seed,
      min_samples,
      center
    )
  )
end

#rolling_rank_by(by, window_size, method: "average", seed: nil, min_samples: 1, closed: "right") ⇒ Expr

Note:

This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.

Compute a rolling rank based on another column.

Given a by column <t_0, t_1, ..., t_n>, then closed: "right" (the default) means the windows will be:

  • (t_0 - window_size, t_0]
  • (t_1 - window_size, t_1]
  • ...
  • (t_n - window_size, t_n]


5740
5741
5742
5743
5744
5745
5746
5747
5748
5749
5750
5751
5752
5753
5754
5755
5756
5757
5758
5759
5760
# File 'lib/polars/expr.rb', line 5740

def rolling_rank_by(
  by,
  window_size,
  method: "average",
  seed: nil,
  min_samples: 1,
  closed: "right"
)
  window_size = _prepare_rolling_by_window_args(window_size)
  by_rbexpr = Utils.parse_into_expression(by)
  Utils.wrap_expr(
    _rbexpr.rolling_rank_by(
      by_rbexpr,
      window_size,
      method,
      seed,
      min_samples,
      closed
    )
  )
end

#rolling_skew(window_size, bias: true, min_samples: nil, center: false) ⇒ Expr

Compute a rolling skew.

Examples:

df = Polars::DataFrame.new({"a" => [1, 4, 2, 9]})
df.select(Polars.col("a").rolling_skew(3))
# =>
# shape: (4, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ null     │
# │ null     │
# │ 0.381802 │
# │ 0.47033  │
# └──────────┘


6492
6493
6494
# File 'lib/polars/expr.rb', line 6492

def rolling_skew(window_size, bias: true, min_samples: nil, center: false)
  wrap_expr(_rbexpr.rolling_skew(window_size, bias, min_samples, center))
end

#rolling_std(window_size, weights: nil, min_samples: nil, center: false, ddof: 1) ⇒ Expr

Note:

This functionality is experimental and may change without it being considered a breaking change.

Note:

If you want to compute multiple aggregation statistics over the same dynamic window, consider using rolling this method can cache the window size computation.

Compute a rolling standard deviation.

A window of length window_size will traverse the array. The values that fill this window will (optionally) be multiplied with the weights given by the weight vector. The resulting values will be aggregated to their sum.

Examples:

df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]})
df.select(
  [
    Polars.col("A").rolling_std(3)
  ]
)
# =>
# shape: (6, 1)
# ┌──────────┐
# │ A        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ null     │
# │ null     │
# │ 1.0      │
# │ 1.0      │
# │ 1.527525 │
# │ 2.0      │
# └──────────┘


6141
6142
6143
6144
6145
6146
6147
6148
6149
6150
6151
6152
6153
# File 'lib/polars/expr.rb', line 6141

def rolling_std(
  window_size,
  weights: nil,
  min_samples: nil,
  center: false,
  ddof: 1
)
  wrap_expr(
    _rbexpr.rolling_std(
      window_size, weights, min_samples, center, ddof
    )
  )
end

#rolling_std_by(by, window_size, min_samples: 1, closed: "right", ddof: 1) ⇒ Expr

Note:

If you want to compute multiple aggregation statistics over the same dynamic window, consider using rolling - this method can cache the window size computation.

Compute a rolling standard deviation based on another column.

Examples:

Create a DataFrame with a datetime column and a row number column

start = DateTime.new(2001, 1, 1)
stop = DateTime.new(2001, 1, 2)
df_temporal = Polars::DataFrame.new(
    {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
).with_row_index
# =>
# shape: (25, 2)
# ┌───────┬─────────────────────┐
# │ index ┆ date                │
# │ ---   ┆ ---                 │
# │ u32   ┆ datetime[ns]        │
# ╞═══════╪═════════════════════╡
# │ 0     ┆ 2001-01-01 00:00:00 │
# │ 1     ┆ 2001-01-01 01:00:00 │
# │ 2     ┆ 2001-01-01 02:00:00 │
# │ 3     ┆ 2001-01-01 03:00:00 │
# │ 4     ┆ 2001-01-01 04:00:00 │
# │ …     ┆ …                   │
# │ 20    ┆ 2001-01-01 20:00:00 │
# │ 21    ┆ 2001-01-01 21:00:00 │
# │ 22    ┆ 2001-01-01 22:00:00 │
# │ 23    ┆ 2001-01-01 23:00:00 │
# │ 24    ┆ 2001-01-02 00:00:00 │
# └───────┴─────────────────────┘

Compute the rolling std with the temporal windows closed on the right (default)

df_temporal.with_columns(
  rolling_row_std: Polars.col("index").rolling_std_by("date", "2h")
)
# =>
# shape: (25, 3)
# ┌───────┬─────────────────────┬─────────────────┐
# │ index ┆ date                ┆ rolling_row_std │
# │ ---   ┆ ---                 ┆ ---             │
# │ u32   ┆ datetime[ns]        ┆ f64             │
# ╞═══════╪═════════════════════╪═════════════════╡
# │ 0     ┆ 2001-01-01 00:00:00 ┆ null            │
# │ 1     ┆ 2001-01-01 01:00:00 ┆ 0.707107        │
# │ 2     ┆ 2001-01-01 02:00:00 ┆ 0.707107        │
# │ 3     ┆ 2001-01-01 03:00:00 ┆ 0.707107        │
# │ 4     ┆ 2001-01-01 04:00:00 ┆ 0.707107        │
# │ …     ┆ …                   ┆ …               │
# │ 20    ┆ 2001-01-01 20:00:00 ┆ 0.707107        │
# │ 21    ┆ 2001-01-01 21:00:00 ┆ 0.707107        │
# │ 22    ┆ 2001-01-01 22:00:00 ┆ 0.707107        │
# │ 23    ┆ 2001-01-01 23:00:00 ┆ 0.707107        │
# │ 24    ┆ 2001-01-02 00:00:00 ┆ 0.707107        │
# └───────┴─────────────────────┴─────────────────┘

Compute the rolling std with the closure of windows on both sides

df_temporal.with_columns(
  rolling_row_std: Polars.col("index").rolling_std_by(
    "date", "2h", closed: "both"
  )
)
# =>
# shape: (25, 3)
# ┌───────┬─────────────────────┬─────────────────┐
# │ index ┆ date                ┆ rolling_row_std │
# │ ---   ┆ ---                 ┆ ---             │
# │ u32   ┆ datetime[ns]        ┆ f64             │
# ╞═══════╪═════════════════════╪═════════════════╡
# │ 0     ┆ 2001-01-01 00:00:00 ┆ null            │
# │ 1     ┆ 2001-01-01 01:00:00 ┆ 0.707107        │
# │ 2     ┆ 2001-01-01 02:00:00 ┆ 1.0             │
# │ 3     ┆ 2001-01-01 03:00:00 ┆ 1.0             │
# │ 4     ┆ 2001-01-01 04:00:00 ┆ 1.0             │
# │ …     ┆ …                   ┆ …               │
# │ 20    ┆ 2001-01-01 20:00:00 ┆ 1.0             │
# │ 21    ┆ 2001-01-01 21:00:00 ┆ 1.0             │
# │ 22    ┆ 2001-01-01 22:00:00 ┆ 1.0             │
# │ 23    ┆ 2001-01-01 23:00:00 ┆ 1.0             │
# │ 24    ┆ 2001-01-02 00:00:00 ┆ 1.0             │
# └───────┴─────────────────────┴─────────────────┘


5303
5304
5305
5306
5307
5308
5309
5310
5311
5312
5313
5314
5315
5316
5317
5318
5319
5320
5321
# File 'lib/polars/expr.rb', line 5303

def rolling_std_by(
  by,
  window_size,
  min_samples: 1,
  closed: "right",
  ddof: 1
)
  window_size = _prepare_rolling_by_window_args(window_size)
  by = Utils.parse_into_expression(by)
  wrap_expr(
    _rbexpr.rolling_std_by(
      by,
      window_size,
      min_samples,
      closed,
      ddof
    )
  )
end

#rolling_sum(window_size, weights: nil, min_samples: nil, center: false) ⇒ Expr

Note:

This functionality is experimental and may change without it being considered a breaking change.

Note:

If you want to compute multiple aggregation statistics over the same dynamic window, consider using rolling this method can cache the window size computation.

Apply a rolling sum (moving sum) over the values in this array.

A window of length window_size will traverse the array. The values that fill this window will (optionally) be multiplied with the weights given by the weight vector. The resulting values will be aggregated to their sum.

Examples:

df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]})
df.select(
  [
    Polars.col("A").rolling_sum(2)
  ]
)
# =>
# shape: (6, 1)
# ┌──────┐
# │ A    │
# │ ---  │
# │ f64  │
# ╞══════╡
# │ null │
# │ 3.0  │
# │ 5.0  │
# │ 7.0  │
# │ 9.0  │
# │ 11.0 │
# └──────┘


6061
6062
6063
6064
6065
6066
6067
6068
6069
6070
6071
6072
# File 'lib/polars/expr.rb', line 6061

def rolling_sum(
  window_size,
  weights: nil,
  min_samples: nil,
  center: false
)
  wrap_expr(
    _rbexpr.rolling_sum(
      window_size, weights, min_samples, center
    )
  )
end

#rolling_sum_by(by, window_size, min_samples: 1, closed: "right") ⇒ Expr

Note:

If you want to compute multiple aggregation statistics over the same dynamic window, consider using rolling - this method can cache the window size computation.

Apply a rolling sum based on another column.

Examples:

Create a DataFrame with a datetime column and a row number column

start = DateTime.new(2001, 1, 1)
stop = DateTime.new(2001, 1, 2)
df_temporal = Polars::DataFrame.new(
    {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
).with_row_index
# =>
# shape: (25, 2)
# ┌───────┬─────────────────────┐
# │ index ┆ date                │
# │ ---   ┆ ---                 │
# │ u32   ┆ datetime[ns]        │
# ╞═══════╪═════════════════════╡
# │ 0     ┆ 2001-01-01 00:00:00 │
# │ 1     ┆ 2001-01-01 01:00:00 │
# │ 2     ┆ 2001-01-01 02:00:00 │
# │ 3     ┆ 2001-01-01 03:00:00 │
# │ 4     ┆ 2001-01-01 04:00:00 │
# │ …     ┆ …                   │
# │ 20    ┆ 2001-01-01 20:00:00 │
# │ 21    ┆ 2001-01-01 21:00:00 │
# │ 22    ┆ 2001-01-01 22:00:00 │
# │ 23    ┆ 2001-01-01 23:00:00 │
# │ 24    ┆ 2001-01-02 00:00:00 │
# └───────┴─────────────────────┘

Compute the rolling sum with the temporal windows closed on the right (default)

df_temporal.with_columns(
  rolling_row_sum: Polars.col("index").rolling_sum_by("date", "2h")
)
# =>
# shape: (25, 3)
# ┌───────┬─────────────────────┬─────────────────┐
# │ index ┆ date                ┆ rolling_row_sum │
# │ ---   ┆ ---                 ┆ ---             │
# │ u32   ┆ datetime[ns]        ┆ u32             │
# ╞═══════╪═════════════════════╪═════════════════╡
# │ 0     ┆ 2001-01-01 00:00:00 ┆ 0               │
# │ 1     ┆ 2001-01-01 01:00:00 ┆ 1               │
# │ 2     ┆ 2001-01-01 02:00:00 ┆ 3               │
# │ 3     ┆ 2001-01-01 03:00:00 ┆ 5               │
# │ 4     ┆ 2001-01-01 04:00:00 ┆ 7               │
# │ …     ┆ …                   ┆ …               │
# │ 20    ┆ 2001-01-01 20:00:00 ┆ 39              │
# │ 21    ┆ 2001-01-01 21:00:00 ┆ 41              │
# │ 22    ┆ 2001-01-01 22:00:00 ┆ 43              │
# │ 23    ┆ 2001-01-01 23:00:00 ┆ 45              │
# │ 24    ┆ 2001-01-02 00:00:00 ┆ 47              │
# └───────┴─────────────────────┴─────────────────┘

Compute the rolling sum with the closure of windows on both sides

df_temporal.with_columns(
  rolling_row_sum: Polars.col("index").rolling_sum_by(
    "date", "2h", closed: "both"
  )
)
# =>
# shape: (25, 3)
# ┌───────┬─────────────────────┬─────────────────┐
# │ index ┆ date                ┆ rolling_row_sum │
# │ ---   ┆ ---                 ┆ ---             │
# │ u32   ┆ datetime[ns]        ┆ u32             │
# ╞═══════╪═════════════════════╪═════════════════╡
# │ 0     ┆ 2001-01-01 00:00:00 ┆ 0               │
# │ 1     ┆ 2001-01-01 01:00:00 ┆ 1               │
# │ 2     ┆ 2001-01-01 02:00:00 ┆ 3               │
# │ 3     ┆ 2001-01-01 03:00:00 ┆ 6               │
# │ 4     ┆ 2001-01-01 04:00:00 ┆ 9               │
# │ …     ┆ …                   ┆ …               │
# │ 20    ┆ 2001-01-01 20:00:00 ┆ 57              │
# │ 21    ┆ 2001-01-01 21:00:00 ┆ 60              │
# │ 22    ┆ 2001-01-01 22:00:00 ┆ 63              │
# │ 23    ┆ 2001-01-01 23:00:00 ┆ 66              │
# │ 24    ┆ 2001-01-02 00:00:00 ┆ 69              │
# └───────┴─────────────────────┴─────────────────┘


5175
5176
5177
5178
5179
5180
5181
5182
5183
5184
5185
5186
# File 'lib/polars/expr.rb', line 5175

def rolling_sum_by(
  by,
  window_size,
  min_samples: 1,
  closed: "right"
)
  window_size = _prepare_rolling_by_window_args(window_size)
  by = Utils.parse_into_expression(by)
  wrap_expr(
    _rbexpr.rolling_sum_by(by, window_size, min_samples, closed)
  )
end

#rolling_var(window_size, weights: nil, min_samples: nil, center: false, ddof: 1) ⇒ Expr

Note:

This functionality is experimental and may change without it being considered a breaking change.

Note:

If you want to compute multiple aggregation statistics over the same dynamic window, consider using rolling this method can cache the window size computation.

Compute a rolling variance.

A window of length window_size will traverse the array. The values that fill this window will (optionally) be multiplied with the weights given by the weight vector. The resulting values will be aggregated to their sum.

Examples:

df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]})
df.select(
  [
    Polars.col("A").rolling_var(3)
  ]
)
# =>
# shape: (6, 1)
# ┌──────────┐
# │ A        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ null     │
# │ null     │
# │ 1.0      │
# │ 1.0      │
# │ 2.333333 │
# │ 4.0      │
# └──────────┘


6222
6223
6224
6225
6226
6227
6228
6229
6230
6231
6232
6233
6234
# File 'lib/polars/expr.rb', line 6222

def rolling_var(
  window_size,
  weights: nil,
  min_samples: nil,
  center: false,
  ddof: 1
)
  wrap_expr(
    _rbexpr.rolling_var(
      window_size, weights, min_samples, center, ddof
    )
  )
end

#rolling_var_by(by, window_size, min_samples: 1, closed: "right", ddof: 1) ⇒ Expr

Note:

If you want to compute multiple aggregation statistics over the same dynamic window, consider using rolling - this method can cache the window size computation.

Compute a rolling variance based on another column.

Examples:

Create a DataFrame with a datetime column and a row number column

start = DateTime.new(2001, 1, 1)
stop = DateTime.new(2001, 1, 2)
df_temporal = Polars::DataFrame.new(
    {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
).with_row_index
# =>
# shape: (25, 2)
# ┌───────┬─────────────────────┐
# │ index ┆ date                │
# │ ---   ┆ ---                 │
# │ u32   ┆ datetime[ns]        │
# ╞═══════╪═════════════════════╡
# │ 0     ┆ 2001-01-01 00:00:00 │
# │ 1     ┆ 2001-01-01 01:00:00 │
# │ 2     ┆ 2001-01-01 02:00:00 │
# │ 3     ┆ 2001-01-01 03:00:00 │
# │ 4     ┆ 2001-01-01 04:00:00 │
# │ …     ┆ …                   │
# │ 20    ┆ 2001-01-01 20:00:00 │
# │ 21    ┆ 2001-01-01 21:00:00 │
# │ 22    ┆ 2001-01-01 22:00:00 │
# │ 23    ┆ 2001-01-01 23:00:00 │
# │ 24    ┆ 2001-01-02 00:00:00 │
# └───────┴─────────────────────┘

Compute the rolling var with the temporal windows closed on the right (default)

df_temporal.with_columns(
  rolling_row_var: Polars.col("index").rolling_var_by("date", "2h")
)
# =>
# shape: (25, 3)
# ┌───────┬─────────────────────┬─────────────────┐
# │ index ┆ date                ┆ rolling_row_var │
# │ ---   ┆ ---                 ┆ ---             │
# │ u32   ┆ datetime[ns]        ┆ f64             │
# ╞═══════╪═════════════════════╪═════════════════╡
# │ 0     ┆ 2001-01-01 00:00:00 ┆ null            │
# │ 1     ┆ 2001-01-01 01:00:00 ┆ 0.5             │
# │ 2     ┆ 2001-01-01 02:00:00 ┆ 0.5             │
# │ 3     ┆ 2001-01-01 03:00:00 ┆ 0.5             │
# │ 4     ┆ 2001-01-01 04:00:00 ┆ 0.5             │
# │ …     ┆ …                   ┆ …               │
# │ 20    ┆ 2001-01-01 20:00:00 ┆ 0.5             │
# │ 21    ┆ 2001-01-01 21:00:00 ┆ 0.5             │
# │ 22    ┆ 2001-01-01 22:00:00 ┆ 0.5             │
# │ 23    ┆ 2001-01-01 23:00:00 ┆ 0.5             │
# │ 24    ┆ 2001-01-02 00:00:00 ┆ 0.5             │
# └───────┴─────────────────────┴─────────────────┘

Compute the rolling var with the closure of windows on both sides

df_temporal.with_columns(
  rolling_row_var: Polars.col("index").rolling_var_by(
    "date", "2h", closed: "both"
  )
)
# =>
# shape: (25, 3)
# ┌───────┬─────────────────────┬─────────────────┐
# │ index ┆ date                ┆ rolling_row_var │
# │ ---   ┆ ---                 ┆ ---             │
# │ u32   ┆ datetime[ns]        ┆ f64             │
# ╞═══════╪═════════════════════╪═════════════════╡
# │ 0     ┆ 2001-01-01 00:00:00 ┆ null            │
# │ 1     ┆ 2001-01-01 01:00:00 ┆ 0.5             │
# │ 2     ┆ 2001-01-01 02:00:00 ┆ 1.0             │
# │ 3     ┆ 2001-01-01 03:00:00 ┆ 1.0             │
# │ 4     ┆ 2001-01-01 04:00:00 ┆ 1.0             │
# │ …     ┆ …                   ┆ …               │
# │ 20    ┆ 2001-01-01 20:00:00 ┆ 1.0             │
# │ 21    ┆ 2001-01-01 21:00:00 ┆ 1.0             │
# │ 22    ┆ 2001-01-01 22:00:00 ┆ 1.0             │
# │ 23    ┆ 2001-01-01 23:00:00 ┆ 1.0             │
# │ 24    ┆ 2001-01-02 00:00:00 ┆ 1.0             │
# └───────┴─────────────────────┴─────────────────┘


5438
5439
5440
5441
5442
5443
5444
5445
5446
5447
5448
5449
5450
5451
5452
5453
5454
5455
5456
# File 'lib/polars/expr.rb', line 5438

def rolling_var_by(
  by,
  window_size,
  min_samples: 1,
  closed: "right",
  ddof: 1
)
  window_size = _prepare_rolling_by_window_args(window_size)
  by = Utils.parse_into_expression(by)
  wrap_expr(
    _rbexpr.rolling_var_by(
      by,
      window_size,
      min_samples,
      closed,
      ddof
    )
  )
end

#round(decimals = 0, mode: "half_to_even") ⇒ Expr

Round underlying floating point data by decimals digits.

Examples:

df = Polars::DataFrame.new({"a" => [0.33, 0.52, 1.02, 1.17]})
df.select(Polars.col("a").round(1))
# =>
# shape: (4, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 0.3 │
# │ 0.5 │
# │ 1.0 │
# │ 1.2 │
# └─────┘


1178
1179
1180
# File 'lib/polars/expr.rb', line 1178

def round(decimals = 0, mode: "half_to_even")
  wrap_expr(_rbexpr.round(decimals, mode))
end

#round_sig_figs(digits) ⇒ Expr

Round to a number of significant figures.

Examples:

df = Polars::DataFrame.new({"a" => [0.01234, 3.333, 1234.0]})
df.with_columns(Polars.col("a").round_sig_figs(2).alias("round_sig_figs"))
# =>
# shape: (3, 2)
# ┌─────────┬────────────────┐
# │ a       ┆ round_sig_figs │
# │ ---     ┆ ---            │
# │ f64     ┆ f64            │
# ╞═════════╪════════════════╡
# │ 0.01234 ┆ 0.012          │
# │ 3.333   ┆ 3.3            │
# │ 1234.0  ┆ 1200.0         │
# └─────────┴────────────────┘


1203
1204
1205
# File 'lib/polars/expr.rb', line 1203

def round_sig_figs(digits)
  wrap_expr(_rbexpr.round_sig_figs(digits))
end

#sample(fraction: nil, with_replacement: nil, shuffle: false, seed: nil, n: nil) ⇒ Expr

Sample from this expression.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3]})
df.select(Polars.col("a").sample(fraction: 1.0, with_replacement: true, seed: 1))
# =>
# shape: (3, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 3   │
# │ 3   │
# │ 1   │
# └─────┘


7363
7364
7365
7366
7367
7368
7369
7370
7371
7372
7373
7374
7375
7376
7377
7378
7379
7380
7381
7382
7383
7384
7385
7386
7387
7388
7389
7390
7391
7392
# File 'lib/polars/expr.rb', line 7363

def sample(
  fraction: nil,
  with_replacement: nil,
  shuffle: false,
  seed: nil,
  n: nil
)
  # TODO update
  if with_replacement.nil?
    warn "The default `with_replacement` for `sample` method will change from `true` to `false` in a future version"
    with_replacement = true
  end

  if !n.nil? && !fraction.nil?
    raise ArgumentError, "cannot specify both `n` and `fraction`"
  end

  if !n.nil? && fraction.nil?
    n = Utils.parse_into_expression(n)
    return wrap_expr(_rbexpr.sample_n(n, with_replacement, shuffle, seed))
  end

  if fraction.nil?
    fraction = 1.0
  end
  fraction = Utils.parse_into_expression(fraction)
  wrap_expr(
    _rbexpr.sample_frac(fraction, with_replacement, shuffle, seed)
  )
end

#search_sorted(element, side: "any", descending: false) ⇒ Expr

Find indices where elements should be inserted to maintain order.

Examples:

df = Polars::DataFrame.new(
  {
    "values" => [1, 2, 3, 5]
  }
)
df.select(
  [
    Polars.col("values").search_sorted(0).alias("zero"),
    Polars.col("values").search_sorted(3).alias("three"),
    Polars.col("values").search_sorted(6).alias("six")
  ]
)
# =>
# shape: (1, 3)
# ┌──────┬───────┬─────┐
# │ zero ┆ three ┆ six │
# │ ---  ┆ ---   ┆ --- │
# │ u32  ┆ u32   ┆ u32 │
# ╞══════╪═══════╪═════╡
# │ 0    ┆ 2     ┆ 4   │
# └──────┴───────┴─────┘


1819
1820
1821
1822
# File 'lib/polars/expr.rb', line 1819

def search_sorted(element, side: "any", descending: false)
  element = Utils.parse_into_expression(element, str_as_lit: false)
  wrap_expr(_rbexpr.search_sorted(element, side, descending))
end

#set_sorted(descending: false) ⇒ Expr

Note:

This can lead to incorrect results if this Series is not sorted!! Use with care!

Flags the expression as 'sorted'.

Enables downstream code to user fast paths for sorted arrays.

Examples:

df = Polars::DataFrame.new({"values" => [1, 2, 3]})
df.select(Polars.col("values").set_sorted.max)
# =>
# shape: (1, 1)
# ┌────────┐
# │ values │
# │ ---    │
# │ i64    │
# ╞════════╡
# │ 3      │
# └────────┘


7845
7846
7847
# File 'lib/polars/expr.rb', line 7845

def set_sorted(descending: false)
  wrap_expr(_rbexpr.set_sorted_flag(descending))
end

#shift(n = 1, fill_value: nil) ⇒ Expr

Shift the values by a given period.

Examples:

df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4]})
df.select(Polars.col("foo").shift(1))
# =>
# shape: (4, 1)
# ┌──────┐
# │ foo  │
# │ ---  │
# │ i64  │
# ╞══════╡
# │ null │
# │ 1    │
# │ 2    │
# │ 3    │
# └──────┘


1993
1994
1995
1996
1997
1998
1999
# File 'lib/polars/expr.rb', line 1993

def shift(n = 1, fill_value: nil)
  if !fill_value.nil?
    fill_value = Utils.parse_into_expression(fill_value, str_as_lit: true)
  end
  n = Utils.parse_into_expression(n)
  wrap_expr(_rbexpr.shift(n, fill_value))
end

#shuffle(seed: nil) ⇒ Expr

Shuffle the contents of this expr.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3]})
df.select(Polars.col("a").shuffle(seed: 1))
# =>
# shape: (3, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 2   │
# │ 3   │
# │ 1   │
# └─────┘


7326
7327
7328
7329
7330
7331
# File 'lib/polars/expr.rb', line 7326

def shuffle(seed: nil)
  if seed.nil?
    seed = rand(10000)
  end
  wrap_expr(_rbexpr.shuffle(seed))
end

#signExpr

Compute the element-wise indication of the sign.

Examples:

df = Polars::DataFrame.new({"a" => [-9.0, -0.0, 0.0, 4.0, nil]})
df.select(Polars.col("a").sign)
# =>
# shape: (5, 1)
# ┌──────┐
# │ a    │
# │ ---  │
# │ f64  │
# ╞══════╡
# │ -1.0 │
# │ -0.0 │
# │ 0.0  │
# │ 1.0  │
# │ null │
# └──────┘


6938
6939
6940
# File 'lib/polars/expr.rb', line 6938

def sign
  wrap_expr(_rbexpr.sign)
end

#sinExpr

Compute the element-wise value for the sine.

Examples:

df = Polars::DataFrame.new({"a" => [0.0]})
df.select(Polars.col("a").sin)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 0.0 │
# └─────┘


6958
6959
6960
# File 'lib/polars/expr.rb', line 6958

def sin
  wrap_expr(_rbexpr.sin)
end

#sinhExpr

Compute the element-wise value for the hyperbolic sine.

Examples:

df = Polars::DataFrame.new({"a" => [1.0]})
df.select(Polars.col("a").sinh)
# =>
# shape: (1, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 1.175201 │
# └──────────┘


7098
7099
7100
# File 'lib/polars/expr.rb', line 7098

def sinh
  wrap_expr(_rbexpr.sinh)
end

#skew(bias: true) ⇒ Expr

Compute the sample skewness of a data set.

For normally distributed data, the skewness should be about zero. For unimodal continuous distributions, a skewness value greater than zero means that there is more weight in the right tail of the distribution. The function skewtest can be used to determine if the skewness value is close enough to zero, statistically speaking.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3, 2, 1]})
df.select(Polars.col("a").skew)
# =>
# shape: (1, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 0.343622 │
# └──────────┘


6799
6800
6801
# File 'lib/polars/expr.rb', line 6799

def skew(bias: true)
  wrap_expr(_rbexpr.skew(bias))
end

#slice(offset, length = nil) ⇒ Expr

Get a slice of this expression.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [8, 9, 10, 11],
    "b" => [nil, 4, 4, 4]
  }
)
df.select(Polars.all.slice(1, 2))
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 9   ┆ 4   │
# │ 10  ┆ 4   │
# └─────┴─────┘


811
812
813
814
815
816
817
818
819
# File 'lib/polars/expr.rb', line 811

def slice(offset, length = nil)
  if !offset.is_a?(Expr)
    offset = Polars.lit(offset)
  end
  if !length.is_a?(Expr)
    length = Polars.lit(length)
  end
  wrap_expr(_rbexpr.slice(offset._rbexpr, length._rbexpr))
end

#sort(descending: false, nulls_last: false) ⇒ Expr

Sort this column. In projection/ selection context the whole column is sorted.

If used in a group by context, the groups are sorted.

Examples:

df = Polars::DataFrame.new(
  {
    "group" => [
      "one",
      "one",
      "one",
      "two",
      "two",
      "two"
    ],
    "value" => [1, 98, 2, 3, 99, 4]
  }
)
df.select(Polars.col("value").sort)
# =>
# shape: (6, 1)
# ┌───────┐
# │ value │
# │ ---   │
# │ i64   │
# ╞═══════╡
# │ 1     │
# │ 2     │
# │ 3     │
# │ 4     │
# │ 98    │
# │ 99    │
# └───────┘
df.select(Polars.col("value").sort)
# =>
# shape: (6, 1)
# ┌───────┐
# │ value │
# │ ---   │
# │ i64   │
# ╞═══════╡
# │ 1     │
# │ 2     │
# │ 3     │
# │ 4     │
# │ 98    │
# │ 99    │
# └───────┘
df.group_by("group").agg(Polars.col("value").sort)
# =>
# shape: (2, 2)
# ┌───────┬────────────┐
# │ group ┆ value      │
# │ ---   ┆ ---        │
# │ str   ┆ list[i64]  │
# ╞═══════╪════════════╡
# │ two   ┆ [3, 4, 99] │
# │ one   ┆ [1, 2, 98] │
# └───────┴────────────┘


1380
1381
1382
# File 'lib/polars/expr.rb', line 1380

def sort(descending: false, nulls_last: false)
  wrap_expr(_rbexpr.sort_with(descending, nulls_last))
end

#sort_by(by, *more_by, descending: false, nulls_last: false, multithreaded: true, maintain_order: false) ⇒ Expr

Sort this column by the ordering of another column, or multiple other columns.

In projection/ selection context the whole column is sorted. If used in a group by context, the groups are sorted.

Examples:

df = Polars::DataFrame.new(
  {
    "group" => [
      "one",
      "one",
      "one",
      "two",
      "two",
      "two"
    ],
    "value" => [1, 98, 2, 3, 99, 4]
  }
)
df.select(Polars.col("group").sort_by("value"))
# =>
# shape: (6, 1)
# ┌───────┐
# │ group │
# │ ---   │
# │ str   │
# ╞═══════╡
# │ one   │
# │ one   │
# │ two   │
# │ two   │
# │ one   │
# │ two   │
# └───────┘


1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
# File 'lib/polars/expr.rb', line 1875

def sort_by(by, *more_by, descending: false, nulls_last: false, multithreaded: true, maintain_order: false)
  by = Utils.parse_into_list_of_expressions(by, *more_by)
  descending = Utils.extend_bool(descending, by.length, "descending", "by")
  nulls_last = Utils.extend_bool(nulls_last, by.length, "nulls_last", "by")
  wrap_expr(
    _rbexpr.sort_by(
      by, descending, nulls_last, multithreaded, maintain_order
    )
  )
end

#sqrtExpr

Compute the square root of the elements.

Examples:

df = Polars::DataFrame.new({"values" => [1.0, 2.0, 4.0]})
df.select(Polars.col("values").sqrt)
# =>
# shape: (3, 1)
# ┌──────────┐
# │ values   │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 1.0      │
# │ 1.414214 │
# │ 2.0      │
# └──────────┘


299
300
301
# File 'lib/polars/expr.rb', line 299

def sqrt
  wrap_expr(_rbexpr.sqrt)
end

#std(ddof: 1) ⇒ Expr

Get standard deviation.

Examples:

df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
df.select(Polars.col("a").std)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 1.0 │
# └─────┘


2222
2223
2224
# File 'lib/polars/expr.rb', line 2222

def std(ddof: 1)
  wrap_expr(_rbexpr.std(ddof))
end

#strStringExpr

Create an object namespace of all string related methods.



8454
8455
8456
# File 'lib/polars/expr.rb', line 8454

def str
  StringExpr.new(self)
end

#structStructExpr

Create an object namespace of all struct related methods.



8461
8462
8463
# File 'lib/polars/expr.rb', line 8461

def struct
  StructExpr.new(self)
end

#sub(other) ⇒ Expr

Method equivalent of subtraction operator expr - other.

Examples:

df = Polars::DataFrame.new({"x" => [0, 1, 2, 3, 4]})
df.with_columns(
  Polars.col("x").sub(2).alias("x-2"),
  Polars.col("x").sub(Polars.col("x").cum_sum).alias("x-expr"),
)
# =>
# shape: (5, 3)
# ┌─────┬─────┬────────┐
# │ x   ┆ x-2 ┆ x-expr │
# │ --- ┆ --- ┆ ---    │
# │ i64 ┆ i64 ┆ i64    │
# ╞═════╪═════╪════════╡
# │ 0   ┆ -2  ┆ 0      │
# │ 1   ┆ -1  ┆ 0      │
# │ 2   ┆ 0   ┆ -1     │
# │ 3   ┆ 1   ┆ -3     │
# │ 4   ┆ 2   ┆ -6     │
# └─────┴─────┴────────┘


4236
4237
4238
# File 'lib/polars/expr.rb', line 4236

def sub(other)
  self - other
end

#sumExpr

Note:

Dtypes in \{Int8, UInt8, Int16, UInt16} are cast to Int64 before summing to prevent overflow issues.

Get sum value.

Examples:

df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
df.select(Polars.col("a").sum)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 0   │
# └─────┘


2413
2414
2415
# File 'lib/polars/expr.rb', line 2413

def sum
  wrap_expr(_rbexpr.sum)
end

#tail(n = 10) ⇒ Expr

Get the last n rows.

Examples:

df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7]})
df.tail(3)
# =>
# shape: (3, 1)
# ┌─────┐
# │ foo │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 5   │
# │ 6   │
# │ 7   │
# └─────┘


3687
3688
3689
# File 'lib/polars/expr.rb', line 3687

def tail(n = 10)
  wrap_expr(_rbexpr.tail(n))
end

#tanExpr

Compute the element-wise value for the tangent.

Examples:

df = Polars::DataFrame.new({"a" => [1.0]})
df.select(Polars.col("a").tan)
# =>
# shape: (1, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 1.557408 │
# └──────────┘


6998
6999
7000
# File 'lib/polars/expr.rb', line 6998

def tan
  wrap_expr(_rbexpr.tan)
end

#tanhExpr

Compute the element-wise value for the hyperbolic tangent.

Examples:

df = Polars::DataFrame.new({"a" => [1.0]})
df.select(Polars.col("a").tanh)
# =>
# shape: (1, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 0.761594 │
# └──────────┘


7138
7139
7140
# File 'lib/polars/expr.rb', line 7138

def tanh
  wrap_expr(_rbexpr.tanh)
end

#to_physicalExpr

Cast to physical representation of the logical dtype.

Examples:

Polars::DataFrame.new({"vals" => ["a", "x", nil, "a"]}).with_columns(
  [
    Polars.col("vals").cast(Polars::Categorical),
    Polars.col("vals")
      .cast(Polars::Categorical)
      .to_physical
      .alias("vals_physical")
  ]
)
# =>
# shape: (4, 2)
# ┌──────┬───────────────┐
# │ vals ┆ vals_physical │
# │ ---  ┆ ---           │
# │ cat  ┆ u32           │
# ╞══════╪═══════════════╡
# │ a    ┆ 0             │
# │ x    ┆ 1             │
# │ null ┆ null          │
# │ a    ┆ 0             │
# └──────┴───────────────┘


206
207
208
# File 'lib/polars/expr.rb', line 206

def to_physical
  wrap_expr(_rbexpr.to_physical)
end

#to_sString Also known as: inspect

Returns a string representing the Expr.



17
18
19
# File 'lib/polars/expr.rb', line 17

def to_s
  _rbexpr.to_str
end

#top_k(k: 5) ⇒ Expr

Return the k largest elements.

If 'reverse: true` the smallest elements will be given.

Examples:

df = Polars::DataFrame.new(
  {
    "value" => [1, 98, 2, 3, 99, 4]
  }
)
df.select(
  [
    Polars.col("value").top_k.alias("top_k"),
    Polars.col("value").bottom_k.alias("bottom_k")
  ]
)
# =>
# shape: (5, 2)
# ┌───────┬──────────┐
# │ top_k ┆ bottom_k │
# │ ---   ┆ ---      │
# │ i64   ┆ i64      │
# ╞═══════╪══════════╡
# │ 99    ┆ 1        │
# │ 98    ┆ 2        │
# │ 4     ┆ 3        │
# │ 3     ┆ 4        │
# │ 2     ┆ 98       │
# └───────┴──────────┘


1418
1419
1420
1421
# File 'lib/polars/expr.rb', line 1418

def top_k(k: 5)
  k = Utils.parse_into_expression(k)
  wrap_expr(_rbexpr.top_k(k))
end

#top_k_by(by, k: 5, reverse: false) ⇒ Expr

Return the elements corresponding to the k largest elements of the by column(s).

Non-null elements are always preferred over null elements, regardless of the value of reverse. The output is not guaranteed to be in any particular order, call :func:sort after this function if you wish the output to be sorted.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 3, 4, 5, 6],
    "b" => [6, 5, 4, 3, 2, 1],
    "c" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
  }
)
# =>
# shape: (6, 3)
# ┌─────┬─────┬────────┐
# │ a   ┆ b   ┆ c      │
# │ --- ┆ --- ┆ ---    │
# │ i64 ┆ i64 ┆ str    │
# ╞═════╪═════╪════════╡
# │ 1   ┆ 6   ┆ Apple  │
# │ 2   ┆ 5   ┆ Orange │
# │ 3   ┆ 4   ┆ Apple  │
# │ 4   ┆ 3   ┆ Apple  │
# │ 5   ┆ 2   ┆ Banana │
# │ 6   ┆ 1   ┆ Banana │
# └─────┴─────┴────────┘

Get the top 2 rows by column a or b.

df.select(
  Polars.all.top_k_by("a", k: 2).name.suffix("_top_by_a"),
  Polars.all.top_k_by("b", k: 2).name.suffix("_top_by_b")
)
# =>
# shape: (2, 6)
# ┌────────────┬────────────┬────────────┬────────────┬────────────┬────────────┐
# │ a_top_by_a ┆ b_top_by_a ┆ c_top_by_a ┆ a_top_by_b ┆ b_top_by_b ┆ c_top_by_b │
# │ ---        ┆ ---        ┆ ---        ┆ ---        ┆ ---        ┆ ---        │
# │ i64        ┆ i64        ┆ str        ┆ i64        ┆ i64        ┆ str        │
# ╞════════════╪════════════╪════════════╪════════════╪════════════╪════════════╡
# │ 6          ┆ 1          ┆ Banana     ┆ 1          ┆ 6          ┆ Apple      │
# │ 5          ┆ 2          ┆ Banana     ┆ 2          ┆ 5          ┆ Orange     │
# └────────────┴────────────┴────────────┴────────────┴────────────┴────────────┘

Get the top 2 rows by multiple columns with given order.

df.select(
  Polars.all
  .top_k_by(["c", "a"], k: 2, reverse: [false, true])
  .name.suffix("_by_ca"),
  Polars.all
  .top_k_by(["c", "b"], k: 2, reverse: [false, true])
  .name.suffix("_by_cb")
)
# =>
# shape: (2, 6)
# ┌─────────┬─────────┬─────────┬─────────┬─────────┬─────────┐
# │ a_by_ca ┆ b_by_ca ┆ c_by_ca ┆ a_by_cb ┆ b_by_cb ┆ c_by_cb │
# │ ---     ┆ ---     ┆ ---     ┆ ---     ┆ ---     ┆ ---     │
# │ i64     ┆ i64     ┆ str     ┆ i64     ┆ i64     ┆ str     │
# ╞═════════╪═════════╪═════════╪═════════╪═════════╪═════════╡
# │ 2       ┆ 5       ┆ Orange  ┆ 2       ┆ 5       ┆ Orange  │
# │ 5       ┆ 2       ┆ Banana  ┆ 6       ┆ 1       ┆ Banana  │
# └─────────┴─────────┴─────────┴─────────┴─────────┴─────────┘

Get the top 2 rows by column a in each group.

df.group_by("c", maintain_order: true)
  .agg(Polars.all.top_k_by("a", k: 2))
  .explode(Polars.all.exclude("c"))
# =>
# shape: (5, 3)
# ┌────────┬─────┬─────┐
# │ c      ┆ a   ┆ b   │
# │ ---    ┆ --- ┆ --- │
# │ str    ┆ i64 ┆ i64 │
# ╞════════╪═════╪═════╡
# │ Apple  ┆ 4   ┆ 3   │
# │ Apple  ┆ 3   ┆ 4   │
# │ Orange ┆ 2   ┆ 5   │
# │ Banana ┆ 6   ┆ 1   │
# │ Banana ┆ 5   ┆ 2   │
# └────────┴─────┴─────┘


1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
# File 'lib/polars/expr.rb', line 1518

def top_k_by(
  by,
  k: 5,
  reverse: false
)
  k = Utils.parse_into_expression(k)
  by = Utils.parse_into_list_of_expressions(by)
  reverse = Utils.extend_bool(reverse, by.length, "reverse", "by")
  wrap_expr(_rbexpr.top_k_by(by, k, reverse))
end

#truediv(other) ⇒ Expr

Method equivalent of float division operator expr / other.

Examples:

df = Polars::DataFrame.new(
  {"x" => [-2, -1, 0, 1, 2], "y" => [0.5, 0.0, 0.0, -4.0, -0.5]}
)
df.with_columns(
  Polars.col("x").truediv(2).alias("x/2"),
  Polars.col("x").truediv(Polars.col("y")).alias("x/y")
)
# =>
# shape: (5, 4)
# ┌─────┬──────┬──────┬───────┐
# │ x   ┆ y    ┆ x/2  ┆ x/y   │
# │ --- ┆ ---  ┆ ---  ┆ ---   │
# │ i64 ┆ f64  ┆ f64  ┆ f64   │
# ╞═════╪══════╪══════╪═══════╡
# │ -2  ┆ 0.5  ┆ -1.0 ┆ -4.0  │
# │ -1  ┆ 0.0  ┆ -0.5 ┆ -inf  │
# │ 0   ┆ 0.0  ┆ 0.0  ┆ NaN   │
# │ 1   ┆ -4.0 ┆ 0.5  ┆ -0.25 │
# │ 2   ┆ -0.5 ┆ 1.0  ┆ -4.0  │
# └─────┴──────┴──────┴───────┘


4291
4292
4293
# File 'lib/polars/expr.rb', line 4291

def truediv(other)
  self / other
end

#unique(maintain_order: false) ⇒ Expr

Get unique values of this expression.

Examples:

df = Polars::DataFrame.new({"a" => [1, 1, 2]})
df.select(Polars.col("a").unique(maintain_order: true))
# =>
# shape: (2, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 2   │
# └─────┘


2630
2631
2632
2633
2634
2635
2636
# File 'lib/polars/expr.rb', line 2630

def unique(maintain_order: false)
  if maintain_order
    wrap_expr(_rbexpr.unique_stable)
  else
    wrap_expr(_rbexpr.unique)
  end
end

#unique_countsExpr

Return a count of the unique values in the order of appearance.

This method differs from value_counts in that it does not return the values, only the counts and might be faster

Examples:

df = Polars::DataFrame.new(
  {
    "id" => ["a", "b", "b", "c", "c", "c"]
  }
)
df.select(
  [
    Polars.col("id").unique_counts
  ]
)
# =>
# shape: (3, 1)
# ┌─────┐
# │ id  │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 1   │
# │ 2   │
# │ 3   │
# └─────┘


7675
7676
7677
# File 'lib/polars/expr.rb', line 7675

def unique_counts
  wrap_expr(_rbexpr.unique_counts)
end

#upper_boundExpr

Calculate the upper bound.

Returns a unit Series with the highest value possible for the dtype of this expression.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3, 2, 1]})
df.select(Polars.col("a").upper_bound)
# =>
# shape: (1, 1)
# ┌─────────────────────┐
# │ a                   │
# │ ---                 │
# │ i64                 │
# ╞═════════════════════╡
# │ 9223372036854775807 │
# └─────────────────────┘


6914
6915
6916
# File 'lib/polars/expr.rb', line 6914

def upper_bound
  wrap_expr(_rbexpr.upper_bound)
end

#value_counts(sort: false, parallel: false, name: nil, normalize: false) ⇒ Expr

Count all unique values and create a struct mapping value to count.

Examples:

df = Polars::DataFrame.new(
  {
    "id" => ["a", "b", "b", "c", "c", "c"]
  }
)
df.select(
  [
    Polars.col("id").value_counts(sort: true),
  ]
)
# =>
# shape: (3, 1)
# ┌───────────┐
# │ id        │
# │ ---       │
# │ struct[2] │
# ╞═══════════╡
# │ {"c",3}   │
# │ {"b",2}   │
# │ {"a",1}   │
# └───────────┘


7628
7629
7630
7631
7632
7633
7634
7635
7636
7637
7638
7639
7640
7641
7642
7643
7644
# File 'lib/polars/expr.rb', line 7628

def value_counts(
  sort: false,
  parallel: false,
  name: nil,
  normalize: false
)
  if name.nil?
    if normalize
      name = "proportion"
    else
      name = "count"
    end
  end
  wrap_expr(
    _rbexpr.value_counts(sort, parallel, name, normalize)
  )
end

#var(ddof: 1) ⇒ Expr

Get variance.

Examples:

df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
df.select(Polars.col("a").var)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 1.0 │
# └─────┘


2245
2246
2247
# File 'lib/polars/expr.rb', line 2245

def var(ddof: 1)
  wrap_expr(_rbexpr.var(ddof))
end

#xor(other) ⇒ Expr

Method equivalent of bitwise exclusive-or operator expr ^ other.

Examples:

df = Polars::DataFrame.new(
  {"x" => [true, false, true, false], "y" => [true, true, false, false]}
)
df.with_columns(Polars.col("x").xor(Polars.col("y")).alias("x ^ y"))
# =>
# shape: (4, 3)
# ┌───────┬───────┬───────┐
# │ x     ┆ y     ┆ x ^ y │
# │ ---   ┆ ---   ┆ ---   │
# │ bool  ┆ bool  ┆ bool  │
# ╞═══════╪═══════╪═══════╡
# │ true  ┆ true  ┆ false │
# │ false ┆ true  ┆ true  │
# │ true  ┆ false ┆ true  │
# │ false ┆ false ┆ false │
# └───────┴───────┴───────┘


4345
4346
4347
# File 'lib/polars/expr.rb', line 4345

def xor(other)
  self ^ other
end

#|(other) ⇒ Expr

Bitwise OR.



41
42
43
44
# File 'lib/polars/expr.rb', line 41

def |(other)
  other = Utils.parse_into_expression(other)
  wrap_expr(_rbexpr.or_(other))
end