Class: Polars::Expr

Inherits:
Object
  • Object
show all
Defined in:
lib/polars/expr.rb

Overview

Expressions that can be used in various contexts.

Instance Method Summary collapse

Instance Method Details

#!Expr

Performs boolean not.

Returns:



134
135
136
# File 'lib/polars/expr.rb', line 134

def !
  is_not
end

#!=(other) ⇒ Expr

Not equal.

Returns:



113
114
115
# File 'lib/polars/expr.rb', line 113

def !=(other)
  _from_rbexpr(_rbexpr.neq(_to_expr(other)._rbexpr))
end

#%(other) ⇒ Expr

Returns the modulo.

Returns:



77
78
79
# File 'lib/polars/expr.rb', line 77

def %(other)
  _from_rbexpr(_rbexpr % _to_rbexpr(other))
end

#&(other) ⇒ Expr

Bitwise AND.

Returns:



35
36
37
# File 'lib/polars/expr.rb', line 35

def &(other)
  _from_rbexpr(_rbexpr._and(_to_rbexpr(other)))
end

#*(other) ⇒ Expr

Performs multiplication.

Returns:



63
64
65
# File 'lib/polars/expr.rb', line 63

def *(other)
  _from_rbexpr(_rbexpr * _to_rbexpr(other))
end

#**(power) ⇒ Expr

Raises to the power of exponent.

Returns:



84
85
86
87
# File 'lib/polars/expr.rb', line 84

def **(power)
  exponent = Utils.parse_into_expression(power)
  _from_rbexpr(_rbexpr.pow(exponent))
end

#+(other) ⇒ Expr

Performs addition.

Returns:



49
50
51
# File 'lib/polars/expr.rb', line 49

def +(other)
  _from_rbexpr(_rbexpr + _to_rbexpr(other))
end

#-(other) ⇒ Expr

Performs subtraction.

Returns:



56
57
58
# File 'lib/polars/expr.rb', line 56

def -(other)
  _from_rbexpr(_rbexpr - _to_rbexpr(other))
end

#-@Expr

Performs negation.

Returns:



141
142
143
# File 'lib/polars/expr.rb', line 141

def -@
  _from_rbexpr(_rbexpr.neg)
end

#/(other) ⇒ Expr

Performs division.

Returns:



70
71
72
# File 'lib/polars/expr.rb', line 70

def /(other)
  _from_rbexpr(_rbexpr / _to_rbexpr(other))
end

#<(other) ⇒ Expr

Less than.

Returns:



120
121
122
# File 'lib/polars/expr.rb', line 120

def <(other)
  _from_rbexpr(_rbexpr.lt(_to_expr(other)._rbexpr))
end

#<=(other) ⇒ Expr

Less than or equal.

Returns:



99
100
101
# File 'lib/polars/expr.rb', line 99

def <=(other)
  _from_rbexpr(_rbexpr.lt_eq(_to_expr(other)._rbexpr))
end

#==(other) ⇒ Expr

Equal.

Returns:



106
107
108
# File 'lib/polars/expr.rb', line 106

def ==(other)
  _from_rbexpr(_rbexpr.eq(_to_expr(other)._rbexpr))
end

#>(other) ⇒ Expr

Greater than.

Returns:



127
128
129
# File 'lib/polars/expr.rb', line 127

def >(other)
  _from_rbexpr(_rbexpr.gt(_to_expr(other)._rbexpr))
end

#>=(other) ⇒ Expr

Greater than or equal.

Returns:



92
93
94
# File 'lib/polars/expr.rb', line 92

def >=(other)
  _from_rbexpr(_rbexpr.gt_eq(_to_expr(other)._rbexpr))
end

#^(other) ⇒ Expr

Bitwise XOR.

Returns:



28
29
30
# File 'lib/polars/expr.rb', line 28

def ^(other)
  _from_rbexpr(_rbexpr._xor(_to_rbexpr(other)))
end

#_hash(seed = 0, seed_1 = nil, seed_2 = nil, seed_3 = nil) ⇒ Expr

Hash the elements in the selection.

The hash value is of type :u64.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, nil],
    "b" => ["x", nil, "z"]
  }
)
df.with_column(Polars.all._hash(10, 20, 30, 40))
# =>
# shape: (3, 2)
# ┌──────────────────────┬──────────────────────┐
# │ a                    ┆ b                    │
# │ ---                  ┆ ---                  │
# │ u64                  ┆ u64                  │
# ╞══════════════════════╪══════════════════════╡
# │ 4629889412789719550  ┆ 6959506404929392568  │
# │ 16386608652769605760 ┆ 11638928888656214026 │
# │ 11638928888656214026 ┆ 11040941213715918520 │
# └──────────────────────┴──────────────────────┘

Parameters:

  • seed (Integer) (defaults to: 0)

    Random seed parameter. Defaults to 0.

  • seed_1 (Integer) (defaults to: nil)

    Random seed parameter. Defaults to seed if not set.

  • seed_2 (Integer) (defaults to: nil)

    Random seed parameter. Defaults to seed if not set.

  • seed_3 (Integer) (defaults to: nil)

    Random seed parameter. Defaults to seed if not set.

Returns:



3892
3893
3894
3895
3896
3897
3898
# File 'lib/polars/expr.rb', line 3892

def _hash(seed = 0, seed_1 = nil, seed_2 = nil, seed_3 = nil)
  k0 = seed
  k1 = seed_1.nil? ? seed : seed_1
  k2 = seed_2.nil? ? seed : seed_2
  k3 = seed_3.nil? ? seed : seed_3
  _from_rbexpr(_rbexpr._hash(k0, k1, k2, k3))
end

#absExpr

Compute absolute values.

Examples:

df = Polars::DataFrame.new(
  {
    "A" => [-1.0, 0.0, 1.0, 2.0]
  }
)
df.select(Polars.col("A").abs)
# =>
# shape: (4, 1)
# ┌─────┐
# │ A   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 1.0 │
# │ 0.0 │
# │ 1.0 │
# │ 2.0 │
# └─────┘

Returns:



5732
5733
5734
# File 'lib/polars/expr.rb', line 5732

def abs
  _from_rbexpr(_rbexpr.abs)
end

#add(other) ⇒ Expr

Method equivalent of addition operator expr + other.

Examples:

df = Polars::DataFrame.new({"x" => [1, 2, 3, 4, 5]})
df.with_columns(
  Polars.col("x").add(2).alias("x+int"),
  Polars.col("x").add(Polars.col("x").cum_prod).alias("x+expr")
)
# =>
# shape: (5, 3)
# ┌─────┬───────┬────────┐
# │ x   ┆ x+int ┆ x+expr │
# │ --- ┆ ---   ┆ ---    │
# │ i64 ┆ i64   ┆ i64    │
# ╞═════╪═══════╪════════╡
# │ 1   ┆ 3     ┆ 2      │
# │ 2   ┆ 4     ┆ 4      │
# │ 3   ┆ 5     ┆ 9      │
# │ 4   ┆ 6     ┆ 28     │
# │ 5   ┆ 7     ┆ 125    │
# └─────┴───────┴────────┘
df = Polars::DataFrame.new(
  {"x" => ["a", "d", "g"], "y": ["b", "e", "h"], "z": ["c", "f", "i"]}
)
df.with_columns(Polars.col("x").add(Polars.col("y")).add(Polars.col("z")).alias("xyz"))
# =>
# shape: (3, 4)
# ┌─────┬─────┬─────┬─────┐
# │ x   ┆ y   ┆ z   ┆ xyz │
# │ --- ┆ --- ┆ --- ┆ --- │
# │ str ┆ str ┆ str ┆ str │
# ╞═════╪═════╪═════╪═════╡
# │ a   ┆ b   ┆ c   ┆ abc │
# │ d   ┆ e   ┆ f   ┆ def │
# │ g   ┆ h   ┆ i   ┆ ghi │
# └─────┴─────┴─────┴─────┘

Parameters:

  • other (Object)

    numeric or string value; accepts expression input.

Returns:



3481
3482
3483
# File 'lib/polars/expr.rb', line 3481

def add(other)
  self + other
end

#agg_groupsExpr

Get the group indexes of the group by operation.

Should be used in aggregation context only.

Examples:

df = Polars::DataFrame.new(
  {
    "group" => [
      "one",
      "one",
      "one",
      "two",
      "two",
      "two"
    ],
    "value" => [94, 95, 96, 97, 97, 99]
  }
)
df.group_by("group", maintain_order: true).agg(Polars.col("value").agg_groups)
# =>
# shape: (2, 2)
# ┌───────┬───────────┐
# │ group ┆ value     │
# │ ---   ┆ ---       │
# │ str   ┆ list[u32] │
# ╞═══════╪═══════════╡
# │ one   ┆ [0, 1, 2] │
# │ two   ┆ [3, 4, 5] │
# └───────┴───────────┘

Returns:



741
742
743
# File 'lib/polars/expr.rb', line 741

def agg_groups
  _from_rbexpr(_rbexpr.agg_groups)
end

#alias(name) ⇒ Expr

Rename the output of an expression.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 3],
    "b" => ["a", "b", nil]
  }
)
df.select(
  [
    Polars.col("a").alias("bar"),
    Polars.col("b").alias("foo")
  ]
)
# =>
# shape: (3, 2)
# ┌─────┬──────┐
# │ bar ┆ foo  │
# │ --- ┆ ---  │
# │ i64 ┆ str  │
# ╞═════╪══════╡
# │ 1   ┆ a    │
# │ 2   ┆ b    │
# │ 3   ┆ null │
# └─────┴──────┘

Parameters:

Returns:



324
325
326
# File 'lib/polars/expr.rb', line 324

def alias(name)
  _from_rbexpr(_rbexpr._alias(name))
end

#all(drop_nulls: true) ⇒ Boolean

Check if all boolean values in a Boolean column are true.

This method is an expression - not to be confused with Polars.all which is a function to select all columns.

Examples:

df = Polars::DataFrame.new(
  {"TT" => [true, true], "TF" => [true, false], "FF" => [false, false]}
)
df.select(Polars.col("*").all)
# =>
# shape: (1, 3)
# ┌──────┬───────┬───────┐
# │ TT   ┆ TF    ┆ FF    │
# │ ---  ┆ ---   ┆ ---   │
# │ bool ┆ bool  ┆ bool  │
# ╞══════╪═══════╪═══════╡
# │ true ┆ false ┆ false │
# └──────┴───────┴───────┘

Returns:



223
224
225
# File 'lib/polars/expr.rb', line 223

def all(drop_nulls: true)
  _from_rbexpr(_rbexpr.all(drop_nulls))
end

#any(drop_nulls: true) ⇒ Boolean

Check if any boolean value in a Boolean column is true.

Examples:

df = Polars::DataFrame.new({"TF" => [true, false], "FF" => [false, false]})
df.select(Polars.all.any)
# =>
# shape: (1, 2)
# ┌──────┬───────┐
# │ TF   ┆ FF    │
# │ ---  ┆ ---   │
# │ bool ┆ bool  │
# ╞══════╪═══════╡
# │ true ┆ false │
# └──────┴───────┘

Returns:



198
199
200
# File 'lib/polars/expr.rb', line 198

def any(drop_nulls: true)
  _from_rbexpr(_rbexpr.any(drop_nulls))
end

#append(other, upcast: true) ⇒ Expr

Append expressions.

This is done by adding the chunks of other to this Series.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [8, 9, 10],
    "b" => [nil, 4, 4]
  }
)
df.select(Polars.all.head(1).append(Polars.all.tail(1)))
# =>
# shape: (2, 2)
# ┌─────┬──────┐
# │ a   ┆ b    │
# │ --- ┆ ---  │
# │ i64 ┆ i64  │
# ╞═════╪══════╡
# │ 8   ┆ null │
# │ 10  ┆ 4    │
# └─────┴──────┘

Parameters:

  • other (Expr)

    Expression to append.

  • upcast (Boolean) (defaults to: true)

    Cast both Series to the same supertype.

Returns:



853
854
855
856
# File 'lib/polars/expr.rb', line 853

def append(other, upcast: true)
  other = Utils.parse_into_expression(other)
  _from_rbexpr(_rbexpr.append(other, upcast))
end

#approx_n_uniqueExpr Also known as: approx_unique

Approx count unique values.

This is done using the HyperLogLog++ algorithm for cardinality estimation.

Examples:

df = Polars::DataFrame.new({"a" => [1, 1, 2]})
df.select(Polars.col("a").approx_n_unique)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 2   │
# └─────┘

Returns:



2187
2188
2189
# File 'lib/polars/expr.rb', line 2187

def approx_n_unique
  _from_rbexpr(_rbexpr.approx_n_unique)
end

#arccosExpr

Compute the element-wise value for the inverse cosine.

Examples:

df = Polars::DataFrame.new({"a" => [0.0]})
df.select(Polars.col("a").arccos)
# =>
# shape: (1, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 1.570796 │
# └──────────┘

Returns:



6227
6228
6229
# File 'lib/polars/expr.rb', line 6227

def arccos
  _from_rbexpr(_rbexpr.arccos)
end

#arccoshExpr

Compute the element-wise value for the inverse hyperbolic cosine.

Examples:

df = Polars::DataFrame.new({"a" => [1.0]})
df.select(Polars.col("a").arccosh)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 0.0 │
# └─────┘

Returns:



6347
6348
6349
# File 'lib/polars/expr.rb', line 6347

def arccosh
  _from_rbexpr(_rbexpr.arccosh)
end

#arcsinExpr

Compute the element-wise value for the inverse sine.

Examples:

df = Polars::DataFrame.new({"a" => [1.0]})
df.select(Polars.col("a").arcsin)
# =>
# shape: (1, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 1.570796 │
# └──────────┘

Returns:



6207
6208
6209
# File 'lib/polars/expr.rb', line 6207

def arcsin
  _from_rbexpr(_rbexpr.arcsin)
end

#arcsinhExpr

Compute the element-wise value for the inverse hyperbolic sine.

Examples:

df = Polars::DataFrame.new({"a" => [1.0]})
df.select(Polars.col("a").arcsinh)
# =>
# shape: (1, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 0.881374 │
# └──────────┘

Returns:



6327
6328
6329
# File 'lib/polars/expr.rb', line 6327

def arcsinh
  _from_rbexpr(_rbexpr.arcsinh)
end

#arctanExpr

Compute the element-wise value for the inverse tangent.

Examples:

df = Polars::DataFrame.new({"a" => [1.0]})
df.select(Polars.col("a").arctan)
# =>
# shape: (1, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 0.785398 │
# └──────────┘

Returns:



6247
6248
6249
# File 'lib/polars/expr.rb', line 6247

def arctan
  _from_rbexpr(_rbexpr.arctan)
end

#arctanhExpr

Compute the element-wise value for the inverse hyperbolic tangent.

Examples:

df = Polars::DataFrame.new({"a" => [1.0]})
df.select(Polars.col("a").arctanh)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ inf │
# └─────┘

Returns:



6367
6368
6369
# File 'lib/polars/expr.rb', line 6367

def arctanh
  _from_rbexpr(_rbexpr.arctanh)
end

#arg_maxExpr

Get the index of the maximal value.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [20, 10, 30]
  }
)
df.select(Polars.col("a").arg_max)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 2   │
# └─────┘

Returns:



1483
1484
1485
# File 'lib/polars/expr.rb', line 1483

def arg_max
  _from_rbexpr(_rbexpr.arg_max)
end

#arg_minExpr

Get the index of the minimal value.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [20, 10, 30]
  }
)
df.select(Polars.col("a").arg_min)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 1   │
# └─────┘

Returns:



1507
1508
1509
# File 'lib/polars/expr.rb', line 1507

def arg_min
  _from_rbexpr(_rbexpr.arg_min)
end

#arg_sort(reverse: false, nulls_last: false) ⇒ Expr

Get the index values that would sort this column.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [20, 10, 30]
  }
)
df.select(Polars.col("a").arg_sort)
# =>
# shape: (3, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 1   │
# │ 0   │
# │ 2   │
# └─────┘

Parameters:

  • reverse (Boolean) (defaults to: false)

    Sort in reverse (descending) order.

  • nulls_last (Boolean) (defaults to: false)

    Place null values last instead of first.

Returns:



1459
1460
1461
# File 'lib/polars/expr.rb', line 1459

def arg_sort(reverse: false, nulls_last: false)
  _from_rbexpr(_rbexpr.arg_sort(reverse, nulls_last))
end

#arg_uniqueExpr

Get index of first unique value.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [8, 9, 10],
    "b" => [nil, 4, 4]
  }
)
df.select(Polars.col("a").arg_unique)
# =>
# shape: (3, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 0   │
# │ 1   │
# │ 2   │
# └─────┘
df.select(Polars.col("b").arg_unique)
# =>
# shape: (2, 1)
# ┌─────┐
# │ b   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 0   │
# │ 1   │
# └─────┘

Returns:



2253
2254
2255
# File 'lib/polars/expr.rb', line 2253

def arg_unique
  _from_rbexpr(_rbexpr.arg_unique)
end

#argsort(reverse: false, nulls_last: false) ⇒ expr

Get the index values that would sort this column.

Alias for #arg_sort.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [20, 10, 30]
  }
)
df.select(Polars.col("a").argsort)
# =>
# shape: (3, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 1   │
# │ 0   │
# │ 2   │
# └─────┘

Parameters:

  • reverse (Boolean) (defaults to: false)

    Sort in reverse (descending) order.

  • nulls_last (Boolean) (defaults to: false)

    Place null values last instead of first.

Returns:

  • (expr)


5765
5766
5767
# File 'lib/polars/expr.rb', line 5765

def argsort(reverse: false, nulls_last: false)
  arg_sort(reverse: reverse, nulls_last: nulls_last)
end

#arrArrayExpr

Create an object namespace of all array related methods.

Returns:



7240
7241
7242
# File 'lib/polars/expr.rb', line 7240

def arr
  ArrayExpr.new(self)
end

#backward_fill(limit: nil) ⇒ Expr

Fill missing values with the next to be seen values.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, nil],
    "b" => [4, nil, 6]
  }
)
df.select(Polars.all.backward_fill)
# =>
# shape: (3, 2)
# ┌──────┬─────┐
# │ a    ┆ b   │
# │ ---  ┆ --- │
# │ i64  ┆ i64 │
# ╞══════╪═════╡
# │ 1    ┆ 4   │
# │ 2    ┆ 6   │
# │ null ┆ 6   │
# └──────┴─────┘

Parameters:

  • limit (Integer) (defaults to: nil)

    The number of consecutive null values to backward fill.

Returns:



1899
1900
1901
# File 'lib/polars/expr.rb', line 1899

def backward_fill(limit: nil)
  _from_rbexpr(_rbexpr.backward_fill(limit))
end

#binBinaryExpr

Create an object namespace of all binary related methods.

Returns:



7247
7248
7249
# File 'lib/polars/expr.rb', line 7247

def bin
  BinaryExpr.new(self)
end

#bottom_k(k: 5) ⇒ Expr

Return the k smallest elements.

If 'reverse: true` the smallest elements will be given.

Examples:

df = Polars::DataFrame.new(
  {
    "value" => [1, 98, 2, 3, 99, 4]
  }
)
df.select(
  [
    Polars.col("value").top_k.alias("top_k"),
    Polars.col("value").bottom_k.alias("bottom_k")
  ]
)
# =>
# shape: (5, 2)
# ┌───────┬──────────┐
# │ top_k ┆ bottom_k │
# │ ---   ┆ ---      │
# │ i64   ┆ i64      │
# ╞═══════╪══════════╡
# │ 99    ┆ 1        │
# │ 98    ┆ 2        │
# │ 4     ┆ 3        │
# │ 3     ┆ 4        │
# │ 2     ┆ 98       │
# └───────┴──────────┘

Parameters:

  • k (Integer) (defaults to: 5)

    Number of elements to return.

Returns:



1427
1428
1429
1430
# File 'lib/polars/expr.rb', line 1427

def bottom_k(k: 5)
  k = Utils.parse_into_expression(k)
  _from_rbexpr(_rbexpr.bottom_k(k))
end

#cast(dtype, strict: true) ⇒ Expr

Cast between data types.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 3],
    "b" => ["4", "5", "6"]
  }
)
df.with_columns(
  [
    Polars.col("a").cast(:f64),
    Polars.col("b").cast(:i32)
  ]
)
# =>
# shape: (3, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ f64 ┆ i32 │
# ╞═════╪═════╡
# │ 1.0 ┆ 4   │
# │ 2.0 ┆ 5   │
# │ 3.0 ┆ 6   │
# └─────┴─────┘

Parameters:

  • dtype (Symbol)

    DataType to cast to.

  • strict (Boolean) (defaults to: true)

    Throw an error if a cast could not be done. For instance, due to an overflow.

Returns:



1274
1275
1276
1277
# File 'lib/polars/expr.rb', line 1274

def cast(dtype, strict: true)
  dtype = Utils.rb_type_to_dtype(dtype)
  _from_rbexpr(_rbexpr.cast(dtype, strict))
end

#catCatExpr

Create an object namespace of all categorical related methods.

Returns:



7254
7255
7256
# File 'lib/polars/expr.rb', line 7254

def cat
  CatExpr.new(self)
end

#ceilExpr

Rounds up to the nearest integer value.

Only works on floating point Series.

Examples:

df = Polars::DataFrame.new({"a" => [0.3, 0.5, 1.0, 1.1]})
df.select(Polars.col("a").ceil)
# =>
# shape: (4, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 1.0 │
# │ 1.0 │
# │ 1.0 │
# │ 2.0 │
# └─────┘

Returns:



1153
1154
1155
# File 'lib/polars/expr.rb', line 1153

def ceil
  _from_rbexpr(_rbexpr.ceil)
end

#clip(lower_bound = nil, upper_bound = nil) ⇒ Expr

Set values outside the given boundaries to the boundary value.

Only works for numeric and temporal columns. If you want to clip other data types, consider writing a when-then-otherwise expression.

Examples:

df = Polars::DataFrame.new({"foo" => [-50, 5, nil, 50]})
df.with_column(Polars.col("foo").clip(1, 10).alias("foo_clipped"))
# =>
# shape: (4, 2)
# ┌──────┬─────────────┐
# │ foo  ┆ foo_clipped │
# │ ---  ┆ ---         │
# │ i64  ┆ i64         │
# ╞══════╪═════════════╡
# │ -50  ┆ 1           │
# │ 5    ┆ 5           │
# │ null ┆ null        │
# │ 50   ┆ 10          │
# └──────┴─────────────┘

Parameters:

  • lower_bound (Numeric) (defaults to: nil)

    Minimum value.

  • upper_bound (Numeric) (defaults to: nil)

    Maximum value.

Returns:



5989
5990
5991
5992
5993
5994
5995
5996
5997
# File 'lib/polars/expr.rb', line 5989

def clip(lower_bound = nil, upper_bound = nil)
  if !lower_bound.nil?
    lower_bound = Utils.parse_into_expression(lower_bound)
  end
  if !upper_bound.nil?
    upper_bound = Utils.parse_into_expression(upper_bound)
  end
  _from_rbexpr(_rbexpr.clip(lower_bound, upper_bound))
end

#clip_max(upper_bound) ⇒ Expr

Clip (limit) the values in an array to a max boundary.

Only works for numerical types.

If you want to clip other dtypes, consider writing a "when, then, otherwise" expression. See when for more information.

Examples:

df = Polars::DataFrame.new({"foo" => [-50, 5, nil, 50]})
df.with_column(Polars.col("foo").clip_max(0).alias("foo_clipped"))
# =>
# shape: (4, 2)
# ┌──────┬─────────────┐
# │ foo  ┆ foo_clipped │
# │ ---  ┆ ---         │
# │ i64  ┆ i64         │
# ╞══════╪═════════════╡
# │ -50  ┆ -50         │
# │ 5    ┆ 0           │
# │ null ┆ null        │
# │ 50   ┆ 0           │
# └──────┴─────────────┘

Parameters:

  • upper_bound (Numeric)

    Maximum value.

Returns:



6057
6058
6059
# File 'lib/polars/expr.rb', line 6057

def clip_max(upper_bound)
  clip(nil, upper_bound)
end

#clip_min(lower_bound) ⇒ Expr

Clip (limit) the values in an array to a min boundary.

Only works for numerical types.

If you want to clip other dtypes, consider writing a "when, then, otherwise" expression. See when for more information.

Examples:

df = Polars::DataFrame.new({"foo" => [-50, 5, nil, 50]})
df.with_column(Polars.col("foo").clip_min(0).alias("foo_clipped"))
# =>
# shape: (4, 2)
# ┌──────┬─────────────┐
# │ foo  ┆ foo_clipped │
# │ ---  ┆ ---         │
# │ i64  ┆ i64         │
# ╞══════╪═════════════╡
# │ -50  ┆ 0           │
# │ 5    ┆ 5           │
# │ null ┆ null        │
# │ 50   ┆ 50          │
# └──────┴─────────────┘

Parameters:

  • lower_bound (Numeric)

    Minimum value.

Returns:



6026
6027
6028
# File 'lib/polars/expr.rb', line 6026

def clip_min(lower_bound)
  clip(lower_bound, nil)
end

#cosExpr

Compute the element-wise value for the cosine.

Examples:

df = Polars::DataFrame.new({"a" => [0.0]})
df.select(Polars.col("a").cos)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 1.0 │
# └─────┘

Returns:



6167
6168
6169
# File 'lib/polars/expr.rb', line 6167

def cos
  _from_rbexpr(_rbexpr.cos)
end

#coshExpr

Compute the element-wise value for the hyperbolic cosine.

Examples:

df = Polars::DataFrame.new({"a" => [1.0]})
df.select(Polars.col("a").cosh)
# =>
# shape: (1, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 1.543081 │
# └──────────┘

Returns:



6287
6288
6289
# File 'lib/polars/expr.rb', line 6287

def cosh
  _from_rbexpr(_rbexpr.cosh)
end

#countExpr

Count the number of values in this expression.

Examples:

df = Polars::DataFrame.new({"a" => [8, 9, 10], "b" => [nil, 4, 4]})
df.select(Polars.all.count)
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ u32 ┆ u32 │
# ╞═════╪═════╡
# │ 3   ┆ 2   │
# └─────┴─────┘

Returns:



761
762
763
# File 'lib/polars/expr.rb', line 761

def count
  _from_rbexpr(_rbexpr.count)
end

#cum_count(reverse: false) ⇒ Expr Also known as: cumcount

Get an array with the cumulative count computed at every element.

Counting from 0 to len

Examples:

df = Polars::DataFrame.new({"a" => ["x", "k", nil, "d"]})
df.with_columns(
  [
    Polars.col("a").cum_count.alias("cum_count"),
    Polars.col("a").cum_count(reverse: true).alias("cum_count_reverse")
  ]
)
# =>
# shape: (4, 3)
# ┌──────┬───────────┬───────────────────┐
# │ a    ┆ cum_count ┆ cum_count_reverse │
# │ ---  ┆ ---       ┆ ---               │
# │ str  ┆ u32       ┆ u32               │
# ╞══════╪═══════════╪═══════════════════╡
# │ x    ┆ 1         ┆ 3                 │
# │ k    ┆ 2         ┆ 2                 │
# │ null ┆ 2         ┆ 1                 │
# │ d    ┆ 3         ┆ 1                 │
# └──────┴───────────┴───────────────────┘

Parameters:

  • reverse (Boolean) (defaults to: false)

    Reverse the operation.

Returns:



1102
1103
1104
# File 'lib/polars/expr.rb', line 1102

def cum_count(reverse: false)
  _from_rbexpr(_rbexpr.cum_count(reverse))
end

#cum_max(reverse: false) ⇒ Expr Also known as: cummax

Get an array with the cumulative max computed at every element.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
df.select(
  [
    Polars.col("a").cum_max,
    Polars.col("a").cum_max(reverse: true).alias("a_reverse")
  ]
)
# =>
# shape: (4, 2)
# ┌─────┬───────────┐
# │ a   ┆ a_reverse │
# │ --- ┆ ---       │
# │ i64 ┆ i64       │
# ╞═════╪═══════════╡
# │ 1   ┆ 4         │
# │ 2   ┆ 4         │
# │ 3   ┆ 4         │
# │ 4   ┆ 4         │
# └─────┴───────────┘

Parameters:

  • reverse (Boolean) (defaults to: false)

    Reverse the operation.

Returns:



1068
1069
1070
# File 'lib/polars/expr.rb', line 1068

def cum_max(reverse: false)
  _from_rbexpr(_rbexpr.cum_max(reverse))
end

#cum_min(reverse: false) ⇒ Expr Also known as: cummin

Get an array with the cumulative min computed at every element.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
df.select(
  [
    Polars.col("a").cum_min,
    Polars.col("a").cum_min(reverse: true).alias("a_reverse")
  ]
)
# =>
# shape: (4, 2)
# ┌─────┬───────────┐
# │ a   ┆ a_reverse │
# │ --- ┆ ---       │
# │ i64 ┆ i64       │
# ╞═════╪═══════════╡
# │ 1   ┆ 1         │
# │ 1   ┆ 2         │
# │ 1   ┆ 3         │
# │ 1   ┆ 4         │
# └─────┴───────────┘

Parameters:

  • reverse (Boolean) (defaults to: false)

    Reverse the operation.

Returns:



1036
1037
1038
# File 'lib/polars/expr.rb', line 1036

def cum_min(reverse: false)
  _from_rbexpr(_rbexpr.cum_min(reverse))
end

#cum_prod(reverse: false) ⇒ Expr Also known as: cumprod

Note:

Dtypes in :i8, :u8, :i16, and :u16 are cast to :i64 before summing to prevent overflow issues.

Get an array with the cumulative product computed at every element.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
df.select(
  [
    Polars.col("a").cum_prod,
    Polars.col("a").cum_prod(reverse: true).alias("a_reverse")
  ]
)
# =>
# shape: (4, 2)
# ┌─────┬───────────┐
# │ a   ┆ a_reverse │
# │ --- ┆ ---       │
# │ i64 ┆ i64       │
# ╞═════╪═══════════╡
# │ 1   ┆ 24        │
# │ 2   ┆ 24        │
# │ 6   ┆ 12        │
# │ 24  ┆ 4         │
# └─────┴───────────┘

Parameters:

  • reverse (Boolean) (defaults to: false)

    Reverse the operation.

Returns:



1004
1005
1006
# File 'lib/polars/expr.rb', line 1004

def cum_prod(reverse: false)
  _from_rbexpr(_rbexpr.cum_prod(reverse))
end

#cum_sum(reverse: false) ⇒ Expr Also known as: cumsum

Note:

Dtypes in :i8, :u8, :i16, and :u16 are cast to :i64 before summing to prevent overflow issues.

Get an array with the cumulative sum computed at every element.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
df.select(
  [
    Polars.col("a").cum_sum,
    Polars.col("a").cum_sum(reverse: true).alias("a_reverse")
  ]
)
# =>
# shape: (4, 2)
# ┌─────┬───────────┐
# │ a   ┆ a_reverse │
# │ --- ┆ ---       │
# │ i64 ┆ i64       │
# ╞═════╪═══════════╡
# │ 1   ┆ 10        │
# │ 3   ┆ 9         │
# │ 6   ┆ 7         │
# │ 10  ┆ 4         │
# └─────┴───────────┘

Parameters:

  • reverse (Boolean) (defaults to: false)

    Reverse the operation.

Returns:



968
969
970
# File 'lib/polars/expr.rb', line 968

def cum_sum(reverse: false)
  _from_rbexpr(_rbexpr.cum_sum(reverse))
end

#cumulative_eval(expr, min_periods: 1, parallel: false) ⇒ Expr

Note:

This functionality is experimental and may change without it being considered a breaking change.

Note:

This can be really slow as it can have O(n^2) complexity. Don't use this for operations that visit all elements.

Run an expression over a sliding window that increases 1 slot every iteration.

Examples:

df = Polars::DataFrame.new({"values" => [1, 2, 3, 4, 5]})
df.select(
  [
    Polars.col("values").cumulative_eval(
      Polars.element.first - Polars.element.last ** 2
    )
  ]
)
# =>
# shape: (5, 1)
# ┌────────┐
# │ values │
# │ ---    │
# │ i64    │
# ╞════════╡
# │ 0      │
# │ -3     │
# │ -8     │
# │ -15    │
# │ -24    │
# └────────┘

Parameters:

  • expr (Expr)

    Expression to evaluate

  • min_periods (Integer) (defaults to: 1)

    Number of valid values there should be in the window before the expression is evaluated. valid values = length - null_count

  • parallel (Boolean) (defaults to: false)

    Run in parallel. Don't do this in a group by or another operation that already has much parallelization.

Returns:



6822
6823
6824
6825
6826
# File 'lib/polars/expr.rb', line 6822

def cumulative_eval(expr, min_periods: 1, parallel: false)
  _from_rbexpr(
    _rbexpr.cumulative_eval(expr._rbexpr, min_periods, parallel)
  )
end

#cut(breaks, labels: nil, left_closed: false, include_breaks: false) ⇒ Expr

Bin continuous values into discrete categories.

Examples:

Divide a column into three categories.

df = Polars::DataFrame.new({"foo" => [-2, -1, 0, 1, 2]})
df.with_columns(
  Polars.col("foo").cut([-1, 1], labels: ["a", "b", "c"]).alias("cut")
)
# =>
# shape: (5, 2)
# ┌─────┬─────┐
# │ foo ┆ cut │
# │ --- ┆ --- │
# │ i64 ┆ cat │
# ╞═════╪═════╡
# │ -2  ┆ a   │
# │ -1  ┆ a   │
# │ 0   ┆ b   │
# │ 1   ┆ b   │
# │ 2   ┆ c   │
# └─────┴─────┘

Add both the category and the breakpoint.

df.with_columns(
  Polars.col("foo").cut([-1, 1], include_breaks: true).alias("cut")
).unnest("cut")
# =>
# shape: (5, 3)
# ┌─────┬────────────┬────────────┐
# │ foo ┆ breakpoint ┆ category   │
# │ --- ┆ ---        ┆ ---        │
# │ i64 ┆ f64        ┆ cat        │
# ╞═════╪════════════╪════════════╡
# │ -2  ┆ -1.0       ┆ (-inf, -1] │
# │ -1  ┆ -1.0       ┆ (-inf, -1] │
# │ 0   ┆ 1.0        ┆ (-1, 1]    │
# │ 1   ┆ 1.0        ┆ (-1, 1]    │
# │ 2   ┆ inf        ┆ (1, inf]   │
# └─────┴────────────┴────────────┘

Parameters:

  • breaks (Array)

    List of unique cut points.

  • labels (Array) (defaults to: nil)

    Names of the categories. The number of labels must be equal to the number of cut points plus one.

  • left_closed (Boolean) (defaults to: false)

    Set the intervals to be left-closed instead of right-closed.

  • include_breaks (Boolean) (defaults to: false)

    Include a column with the right endpoint of the bin each observation falls in. This will change the data type of the output from a Categorical to a Struct.

Returns:



2636
2637
2638
# File 'lib/polars/expr.rb', line 2636

def cut(breaks, labels: nil, left_closed: false, include_breaks: false)
  _from_rbexpr(_rbexpr.cut(breaks, labels, left_closed, include_breaks))
end

#diff(n: 1, null_behavior: "ignore") ⇒ Expr

Calculate the n-th discrete difference.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [20, 10, 30]
  }
)
df.select(Polars.col("a").diff)
# =>
# shape: (3, 1)
# ┌──────┐
# │ a    │
# │ ---  │
# │ i64  │
# ╞══════╡
# │ null │
# │ -10  │
# │ 20   │
# └──────┘

Parameters:

  • n (Integer) (defaults to: 1)

    Number of slots to shift.

  • null_behavior ("ignore", "drop") (defaults to: "ignore")

    How to handle null values.

Returns:



5860
5861
5862
# File 'lib/polars/expr.rb', line 5860

def diff(n: 1, null_behavior: "ignore")
  _from_rbexpr(_rbexpr.diff(n, null_behavior))
end

#dot(other) ⇒ Expr

Compute the dot/inner product between two Expressions.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 3, 5],
    "b" => [2, 4, 6]
  }
)
df.select(Polars.col("a").dot(Polars.col("b")))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 44  │
# └─────┘

Parameters:

  • other (Expr)

    Expression to compute dot product with.

Returns:



1207
1208
1209
1210
# File 'lib/polars/expr.rb', line 1207

def dot(other)
  other = Utils.parse_into_expression(other, str_as_lit: false)
  _from_rbexpr(_rbexpr.dot(other))
end

#drop_nansExpr

Drop floating point NaN values.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [8, 9, 10, 11],
    "b" => [nil, 4.0, 4.0, Float::NAN]
  }
)
df.select(Polars.col("b").drop_nans)
# =>
# shape: (3, 1)
# ┌──────┐
# │ b    │
# │ ---  │
# │ f64  │
# ╞══════╡
# │ null │
# │ 4.0  │
# │ 4.0  │
# └──────┘

Returns:



933
934
935
# File 'lib/polars/expr.rb', line 933

def drop_nans
  _from_rbexpr(_rbexpr.drop_nans)
end

#drop_nullsExpr

Drop null values.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [8, 9, 10, 11],
    "b" => [nil, 4.0, 4.0, Float::NAN]
  }
)
df.select(Polars.col("b").drop_nulls)
# =>
# shape: (3, 1)
# ┌─────┐
# │ b   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 4.0 │
# │ 4.0 │
# │ NaN │
# └─────┘

Returns:



906
907
908
# File 'lib/polars/expr.rb', line 906

def drop_nulls
  _from_rbexpr(_rbexpr.drop_nulls)
end

#dtDateTimeExpr

Create an object namespace of all datetime related methods.

Returns:



7261
7262
7263
# File 'lib/polars/expr.rb', line 7261

def dt
  DateTimeExpr.new(self)
end

#entropy(base: 2, normalize: true) ⇒ Expr

Computes the entropy.

Uses the formula -sum(pk * log(pk) where pk are discrete probabilities.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3]})
df.select(Polars.col("a").entropy(base: 2))
# =>
# shape: (1, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 1.459148 │
# └──────────┘
df.select(Polars.col("a").entropy(base: 2, normalize: false))
# =>
# shape: (1, 1)
# ┌───────────┐
# │ a         │
# │ ---       │
# │ f64       │
# ╞═══════════╡
# │ -6.754888 │
# └───────────┘

Parameters:

  • base (Float) (defaults to: 2)

    Given base, defaults to e.

  • normalize (Boolean) (defaults to: true)

    Normalize pk if it doesn't sum to 1.

Returns:



6775
6776
6777
# File 'lib/polars/expr.rb', line 6775

def entropy(base: 2, normalize: true)
  _from_rbexpr(_rbexpr.entropy(base, normalize))
end

#eq(other) ⇒ Expr

Method equivalent of equality operator expr == other.

Examples:

df = Polars::DataFrame.new(
  {
    "x" => [1.0, 2.0, Float::NAN, 4.0],
    "y" => [2.0, 2.0, Float::NAN, 4.0]
  }
)
df.with_columns(
  Polars.col("x").eq(Polars.col("y")).alias("x == y")
)
# =>
# shape: (4, 3)
# ┌─────┬─────┬────────┐
# │ x   ┆ y   ┆ x == y │
# │ --- ┆ --- ┆ ---    │
# │ f64 ┆ f64 ┆ bool   │
# ╞═════╪═════╪════════╡
# │ 1.0 ┆ 2.0 ┆ false  │
# │ 2.0 ┆ 2.0 ┆ true   │
# │ NaN ┆ NaN ┆ true   │
# │ 4.0 ┆ 4.0 ┆ true   │
# └─────┴─────┴────────┘

Parameters:

  • other (Object)

    A literal or expression value to compare with.

Returns:



3191
3192
3193
# File 'lib/polars/expr.rb', line 3191

def eq(other)
  self == other
end

#eq_missing(other) ⇒ Expr

Method equivalent of equality operator expr == other where None == None.

This differs from default eq where null values are propagated.

Examples:

df = Polars::DataFrame.new(
  data={
    "x" => [1.0, 2.0, Float::NAN, 4.0, nil, nil],
    "y" => [2.0, 2.0, Float::NAN, 4.0, 5.0, nil]
  }
)
df.with_columns(
  Polars.col("x").eq(Polars.col("y")).alias("x eq y"),
  Polars.col("x").eq_missing(Polars.col("y")).alias("x eq_missing y")
)
# =>
# shape: (6, 4)
# ┌──────┬──────┬────────┬────────────────┐
# │ x    ┆ y    ┆ x eq y ┆ x eq_missing y │
# │ ---  ┆ ---  ┆ ---    ┆ ---            │
# │ f64  ┆ f64  ┆ bool   ┆ bool           │
# ╞══════╪══════╪════════╪════════════════╡
# │ 1.0  ┆ 2.0  ┆ false  ┆ false          │
# │ 2.0  ┆ 2.0  ┆ true   ┆ true           │
# │ NaN  ┆ NaN  ┆ true   ┆ true           │
# │ 4.0  ┆ 4.0  ┆ true   ┆ true           │
# │ null ┆ 5.0  ┆ null   ┆ false          │
# │ null ┆ null ┆ null   ┆ true           │
# └──────┴──────┴────────┴────────────────┘

Parameters:

  • other (Object)

    A literal or expression value to compare with.

Returns:



3229
3230
3231
3232
# File 'lib/polars/expr.rb', line 3229

def eq_missing(other)
  other = Utils.parse_into_expression(other, str_as_lit: true)
  _from_rbexpr(_rbexpr.eq_missing(other))
end

#ewm_mean(com: nil, span: nil, half_life: nil, alpha: nil, adjust: true, min_periods: 1, ignore_nulls: true) ⇒ Expr

Exponentially-weighted moving average.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3]})
df.select(Polars.col("a").ewm_mean(com: 1))
# =>
# shape: (3, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 1.0      │
# │ 1.666667 │
# │ 2.428571 │
# └──────────┘

Returns:



6519
6520
6521
6522
6523
6524
6525
6526
6527
6528
6529
6530
# File 'lib/polars/expr.rb', line 6519

def ewm_mean(
  com: nil,
  span: nil,
  half_life: nil,
  alpha: nil,
  adjust: true,
  min_periods: 1,
  ignore_nulls: true
)
  alpha = _prepare_alpha(com, span, half_life, alpha)
  _from_rbexpr(_rbexpr.ewm_mean(alpha, adjust, min_periods, ignore_nulls))
end

#ewm_std(com: nil, span: nil, half_life: nil, alpha: nil, adjust: true, bias: false, min_periods: 1, ignore_nulls: true) ⇒ Expr

Exponentially-weighted moving standard deviation.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3]})
df.select(Polars.col("a").ewm_std(com: 1))
# =>
# shape: (3, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 0.0      │
# │ 0.707107 │
# │ 0.963624 │
# └──────────┘

Returns:



6550
6551
6552
6553
6554
6555
6556
6557
6558
6559
6560
6561
6562
# File 'lib/polars/expr.rb', line 6550

def ewm_std(
  com: nil,
  span: nil,
  half_life: nil,
  alpha: nil,
  adjust: true,
  bias: false,
  min_periods: 1,
  ignore_nulls: true
)
  alpha = _prepare_alpha(com, span, half_life, alpha)
  _from_rbexpr(_rbexpr.ewm_std(alpha, adjust, bias, min_periods, ignore_nulls))
end

#ewm_var(com: nil, span: nil, half_life: nil, alpha: nil, adjust: true, bias: false, min_periods: 1, ignore_nulls: true) ⇒ Expr

Exponentially-weighted moving variance.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3]})
df.select(Polars.col("a").ewm_var(com: 1))
# =>
# shape: (3, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 0.0      │
# │ 0.5      │
# │ 0.928571 │
# └──────────┘

Returns:



6582
6583
6584
6585
6586
6587
6588
6589
6590
6591
6592
6593
6594
# File 'lib/polars/expr.rb', line 6582

def ewm_var(
  com: nil,
  span: nil,
  half_life: nil,
  alpha: nil,
  adjust: true,
  bias: false,
  min_periods: 1,
  ignore_nulls: true
)
  alpha = _prepare_alpha(com, span, half_life, alpha)
  _from_rbexpr(_rbexpr.ewm_var(alpha, adjust, bias, min_periods, ignore_nulls))
end

#exclude(columns) ⇒ Expr

Exclude certain columns from a wildcard/regex selection.

You may also use regexes in the exclude list. They must start with ^ and end with $.

Examples:

df = Polars::DataFrame.new(
  {
    "aa" => [1, 2, 3],
    "ba" => ["a", "b", nil],
    "cc" => [nil, 2.5, 1.5]
  }
)
df.select(Polars.all.exclude("ba"))
# =>
# shape: (3, 2)
# ┌─────┬──────┐
# │ aa  ┆ cc   │
# │ --- ┆ ---  │
# │ i64 ┆ f64  │
# ╞═════╪══════╡
# │ 1   ┆ null │
# │ 2   ┆ 2.5  │
# │ 3   ┆ 1.5  │
# └─────┴──────┘

Parameters:

  • columns (Object)

    Column(s) to exclude from selection. This can be:

    • a column name, or multiple column names
    • a regular expression starting with ^ and ending with $
    • a dtype or multiple dtypes

Returns:



365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
# File 'lib/polars/expr.rb', line 365

def exclude(columns)
  if columns.is_a?(::String)
    columns = [columns]
    return _from_rbexpr(_rbexpr.exclude(columns))
  elsif !columns.is_a?(::Array)
    columns = [columns]
    return _from_rbexpr(_rbexpr.exclude_dtype(columns))
  end

  if !columns.all? { |a| a.is_a?(::String) } || !columns.all? { |a| Utils.is_polars_dtype(a) }
    raise ArgumentError, "input should be all string or all DataType"
  end

  if columns[0].is_a?(::String)
    _from_rbexpr(_rbexpr.exclude(columns))
  else
    _from_rbexpr(_rbexpr.exclude_dtype(columns))
  end
end

#expExpr

Compute the exponential, element-wise.

Examples:

df = Polars::DataFrame.new({"values" => [1.0, 2.0, 4.0]})
df.select(Polars.col("values").exp)
# =>
# shape: (3, 1)
# ┌──────────┐
# │ values   │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 2.718282 │
# │ 7.389056 │
# │ 54.59815 │
# └──────────┘

Returns:



289
290
291
# File 'lib/polars/expr.rb', line 289

def exp
  _from_rbexpr(_rbexpr.exp)
end

#explodeExpr

Explode a list or utf8 Series.

This means that every item is expanded to a new row.

Examples:

df = Polars::DataFrame.new({"b" => [[1, 2, 3], [4, 5, 6]]})
df.select(Polars.col("b").explode)
# =>
# shape: (6, 1)
# ┌─────┐
# │ b   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 2   │
# │ 3   │
# │ 4   │
# │ 5   │
# │ 6   │
# └─────┘

Returns:



3059
3060
3061
# File 'lib/polars/expr.rb', line 3059

def explode
  _from_rbexpr(_rbexpr.explode)
end

#extend_constant(value, n) ⇒ Expr

Extend the Series with given number of values.

Examples:

df = Polars::DataFrame.new({"values" => [1, 2, 3]})
df.select(Polars.col("values").extend_constant(99, 2))
# =>
# shape: (5, 1)
# ┌────────┐
# │ values │
# │ ---    │
# │ i64    │
# ╞════════╡
# │ 1      │
# │ 2      │
# │ 3      │
# │ 99     │
# │ 99     │
# └────────┘

Parameters:

  • value (Object)

    The value to extend the Series with. This value may be nil to fill with nulls.

  • n (Integer)

    The number of values to extend.

Returns:



6622
6623
6624
# File 'lib/polars/expr.rb', line 6622

def extend_constant(value, n)
  _from_rbexpr(_rbexpr.extend_constant(value, n))
end

#fill_nan(fill_value) ⇒ Expr

Fill floating point NaN value with a fill value.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1.0, nil, Float::NAN],
    "b" => [4.0, Float::NAN, 6]
  }
)
df.fill_nan("zero")
# =>
# shape: (3, 2)
# ┌──────┬──────┐
# │ a    ┆ b    │
# │ ---  ┆ ---  │
# │ str  ┆ str  │
# ╞══════╪══════╡
# │ 1.0  ┆ 4.0  │
# │ null ┆ zero │
# │ zero ┆ 6.0  │
# └──────┴──────┘

Returns:



1838
1839
1840
1841
# File 'lib/polars/expr.rb', line 1838

def fill_nan(fill_value)
  fill_value = Utils.parse_into_expression(fill_value, str_as_lit: true)
  _from_rbexpr(_rbexpr.fill_nan(fill_value))
end

#fill_null(value = nil, strategy: nil, limit: nil) ⇒ Expr

Fill null values using the specified value or strategy.

To interpolate over null values see interpolate.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, nil],
    "b" => [4, nil, 6]
  }
)
df.fill_null(strategy: "zero")
# =>
# shape: (3, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 1   ┆ 4   │
# │ 2   ┆ 0   │
# │ 0   ┆ 6   │
# └─────┴─────┘
df.fill_null(99)
# =>
# shape: (3, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 1   ┆ 4   │
# │ 2   ┆ 99  │
# │ 99  ┆ 6   │
# └─────┴─────┘
df.fill_null(strategy: "forward")
# =>
# shape: (3, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 1   ┆ 4   │
# │ 2   ┆ 4   │
# │ 2   ┆ 6   │
# └─────┴─────┘

Parameters:

  • value (Object) (defaults to: nil)

    Value used to fill null values.

  • strategy (nil, "forward", "backward", "min", "max", "mean", "zero", "one") (defaults to: nil)

    Strategy used to fill null values.

  • limit (Integer) (defaults to: nil)

    Number of consecutive null values to fill when using the 'forward' or 'backward' strategy.

Returns:



1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
# File 'lib/polars/expr.rb', line 1798

def fill_null(value = nil, strategy: nil, limit: nil)
  if !value.nil? && !strategy.nil?
    raise ArgumentError, "cannot specify both 'value' and 'strategy'."
  elsif value.nil? && strategy.nil?
    raise ArgumentError, "must specify either a fill 'value' or 'strategy'"
  elsif ["forward", "backward"].include?(strategy) && !limit.nil?
    raise ArgumentError, "can only specify 'limit' when strategy is set to 'backward' or 'forward'"
  end

  if !value.nil?
    value = Utils.parse_into_expression(value, str_as_lit: true)
    _from_rbexpr(_rbexpr.fill_null(value))
  else
    _from_rbexpr(_rbexpr.fill_null_with_strategy(strategy, limit))
  end
end

#filter(predicate) ⇒ Expr

Filter a single column.

Mostly useful in an aggregation context. If you want to filter on a DataFrame level, use LazyFrame#filter.

Examples:

df = Polars::DataFrame.new(
  {
    "group_col" => ["g1", "g1", "g2"],
    "b" => [1, 2, 3]
  }
)
(
  df.group_by("group_col").agg(
    [
      Polars.col("b").filter(Polars.col("b") < 2).sum.alias("lt"),
      Polars.col("b").filter(Polars.col("b") >= 2).sum.alias("gte")
    ]
  )
).sort("group_col")
# =>
# shape: (2, 3)
# ┌───────────┬─────┬─────┐
# │ group_col ┆ lt  ┆ gte │
# │ ---       ┆ --- ┆ --- │
# │ str       ┆ i64 ┆ i64 │
# ╞═══════════╪═════╪═════╡
# │ g1        ┆ 1   ┆ 2   │
# │ g2        ┆ 0   ┆ 3   │
# └───────────┴─────┴─────┘

Parameters:

  • predicate (Expr)

    Boolean expression.

Returns:



2819
2820
2821
# File 'lib/polars/expr.rb', line 2819

def filter(predicate)
  _from_rbexpr(_rbexpr.filter(predicate._rbexpr))
end

#firstExpr

Get the first value.

Examples:

df = Polars::DataFrame.new({"a" => [1, 1, 2]})
df.select(Polars.col("a").first)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# └─────┘

Returns:



2301
2302
2303
# File 'lib/polars/expr.rb', line 2301

def first
  _from_rbexpr(_rbexpr.first)
end

#flattenExpr

Explode a list or utf8 Series. This means that every item is expanded to a new row.

Alias for #explode.

Examples:

df = Polars::DataFrame.new(
  {
    "group" => ["a", "b", "b"],
    "values" => [[1, 2], [2, 3], [4]]
  }
)
df.group_by("group").agg(Polars.col("values").flatten)
# =>
# shape: (2, 2)
# ┌───────┬───────────┐
# │ group ┆ values    │
# │ ---   ┆ ---       │
# │ str   ┆ list[i64] │
# ╞═══════╪═══════════╡
# │ a     ┆ [1, 2]    │
# │ b     ┆ [2, 3, 4] │
# └───────┴───────────┘

Returns:



3032
3033
3034
# File 'lib/polars/expr.rb', line 3032

def flatten
  _from_rbexpr(_rbexpr.explode)
end

#floorExpr

Rounds down to the nearest integer value.

Only works on floating point Series.

Examples:

df = Polars::DataFrame.new({"a" => [0.3, 0.5, 1.0, 1.1]})
df.select(Polars.col("a").floor)
# =>
# shape: (4, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 0.0 │
# │ 0.0 │
# │ 1.0 │
# │ 1.0 │
# └─────┘

Returns:



1128
1129
1130
# File 'lib/polars/expr.rb', line 1128

def floor
  _from_rbexpr(_rbexpr.floor)
end

#floordiv(other) ⇒ Expr

Method equivalent of integer division operator expr // other.

Examples:

df = Polars::DataFrame.new({"x" => [1, 2, 3, 4, 5]})
df.with_columns(
  Polars.col("x").truediv(2).alias("x/2"),
  Polars.col("x").floordiv(2).alias("x//2")
)
# =>
# shape: (5, 3)
# ┌─────┬─────┬──────┐
# │ x   ┆ x/2 ┆ x//2 │
# │ --- ┆ --- ┆ ---  │
# │ i64 ┆ f64 ┆ i64  │
# ╞═════╪═════╪══════╡
# │ 1   ┆ 0.5 ┆ 0    │
# │ 2   ┆ 1.0 ┆ 1    │
# │ 3   ┆ 1.5 ┆ 1    │
# │ 4   ┆ 2.0 ┆ 2    │
# │ 5   ┆ 2.5 ┆ 2    │
# └─────┴─────┴──────┘

Parameters:

  • other (Object)

    Numeric literal or expression value.

Returns:



3511
3512
3513
# File 'lib/polars/expr.rb', line 3511

def floordiv(other)
  _from_rbexpr(_rbexpr.floordiv(_to_rbexpr(other)))
end

#forward_fill(limit: nil) ⇒ Expr

Fill missing values with the latest seen values.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, nil],
    "b" => [4, nil, 6]
  }
)
df.select(Polars.all.forward_fill)
# =>
# shape: (3, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 1   ┆ 4   │
# │ 2   ┆ 4   │
# │ 2   ┆ 6   │
# └─────┴─────┘

Parameters:

  • limit (Integer) (defaults to: nil)

    The number of consecutive null values to forward fill.

Returns:



1869
1870
1871
# File 'lib/polars/expr.rb', line 1869

def forward_fill(limit: nil)
  _from_rbexpr(_rbexpr.forward_fill(limit))
end

#gather(indices) ⇒ Expr Also known as: take

Take values by index.

Examples:

df = Polars::DataFrame.new(
  {
    "group" => [
      "one",
      "one",
      "one",
      "two",
      "two",
      "two"
    ],
    "value" => [1, 98, 2, 3, 99, 4]
  }
)
df.group_by("group", maintain_order: true).agg(Polars.col("value").take([2, 1]))
# =>
# shape: (2, 2)
# ┌───────┬───────────┐
# │ group ┆ value     │
# │ ---   ┆ ---       │
# │ str   ┆ list[i64] │
# ╞═══════╪═══════════╡
# │ one   ┆ [2, 98]   │
# │ two   ┆ [4, 99]   │
# └───────┴───────────┘

Parameters:

  • indices (Expr)

    An expression that leads to a :u32 dtyped Series.

Returns:



1630
1631
1632
1633
1634
1635
1636
1637
# File 'lib/polars/expr.rb', line 1630

def gather(indices)
  if indices.is_a?(::Array)
    indices_lit = Polars.lit(Series.new("", indices, dtype: :u32))._rbexpr
  else
    indices_lit = Utils.parse_into_expression(indices, str_as_lit: false)
  end
  _from_rbexpr(_rbexpr.gather(indices_lit))
end

#gather_every(n, offset = 0) ⇒ Expr Also known as: take_every

Take every nth value in the Series and return as a new Series.

Examples:

df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7, 8, 9]})
df.select(Polars.col("foo").gather_every(3))
# =>
# shape: (3, 1)
# ┌─────┐
# │ foo │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 4   │
# │ 7   │
# └─────┘

Returns:



3081
3082
3083
# File 'lib/polars/expr.rb', line 3081

def gather_every(n, offset = 0)
  _from_rbexpr(_rbexpr.gather_every(n, offset))
end

#ge(other) ⇒ Expr

Method equivalent of "greater than or equal" operator expr >= other.

Examples:

df = Polars::DataFrame.new(
  {
    "x" => [5.0, 4.0, Float::NAN, 2.0],
    "y" => [5.0, 3.0, Float::NAN, 1.0]
  }
)
df.with_columns(
  Polars.col("x").ge(Polars.col("y")).alias("x >= y")
)
# =>
# shape: (4, 3)
# ┌─────┬─────┬────────┐
# │ x   ┆ y   ┆ x >= y │
# │ --- ┆ --- ┆ ---    │
# │ f64 ┆ f64 ┆ bool   │
# ╞═════╪═════╪════════╡
# │ 5.0 ┆ 5.0 ┆ true   │
# │ 4.0 ┆ 3.0 ┆ true   │
# │ NaN ┆ NaN ┆ true   │
# │ 2.0 ┆ 1.0 ┆ true   │
# └─────┴─────┴────────┘

Parameters:

  • other (Object)

    A literal or expression value to compare with.

Returns:



3263
3264
3265
# File 'lib/polars/expr.rb', line 3263

def ge(other)
  self >= other
end

#get(index) ⇒ Expr

Return a single value by index.

Examples:

df = Polars::DataFrame.new(
  {
    "group" => [
      "one",
      "one",
      "one",
      "two",
      "two",
      "two"
    ],
    "value" => [1, 98, 2, 3, 99, 4]
  }
)
df.group_by("group", maintain_order: true).agg(Polars.col("value").get(1))
# =>
# shape: (2, 2)
# ┌───────┬───────┐
# │ group ┆ value │
# │ ---   ┆ ---   │
# │ str   ┆ i64   │
# ╞═══════╪═══════╡
# │ one   ┆ 98    │
# │ two   ┆ 99    │
# └───────┴───────┘

Parameters:

  • index (Object)

    An expression that leads to a UInt32 index.

Returns:



1672
1673
1674
1675
# File 'lib/polars/expr.rb', line 1672

def get(index)
  index_lit = Utils.parse_into_expression(index)
  _from_rbexpr(_rbexpr.get(index_lit))
end

#gt(other) ⇒ Expr

Method equivalent of "greater than" operator expr > other.

Examples:

df = Polars::DataFrame.new(
  {
    "x" => [5.0, 4.0, Float::NAN, 2.0],
    "y" => [5.0, 3.0, Float::NAN, 1.0]
  }
)
df.with_columns(
    Polars.col("x").gt(Polars.col("y")).alias("x > y")
)
# =>
# shape: (4, 3)
# ┌─────┬─────┬───────┐
# │ x   ┆ y   ┆ x > y │
# │ --- ┆ --- ┆ ---   │
# │ f64 ┆ f64 ┆ bool  │
# ╞═════╪═════╪═══════╡
# │ 5.0 ┆ 5.0 ┆ false │
# │ 4.0 ┆ 3.0 ┆ true  │
# │ NaN ┆ NaN ┆ false │
# │ 2.0 ┆ 1.0 ┆ true  │
# └─────┴─────┴───────┘

Parameters:

  • other (Object)

    A literal or expression value to compare with.

Returns:



3296
3297
3298
# File 'lib/polars/expr.rb', line 3296

def gt(other)
  self > other
end

#head(n = 10) ⇒ Expr

Get the first n rows.

Examples:

df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7]})
df.head(3)
# =>
# shape: (3, 1)
# ┌─────┐
# │ foo │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 2   │
# │ 3   │
# └─────┘

Parameters:

  • n (Integer) (defaults to: 10)

    Number of rows to return.

Returns:



3107
3108
3109
# File 'lib/polars/expr.rb', line 3107

def head(n = 10)
  _from_rbexpr(_rbexpr.head(n))
end

#implodeExpr

Aggregate to list.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 3],
    "b" => [4, 5, 6]
  }
)
df.select(Polars.all.implode)
# =>
# shape: (1, 2)
# ┌───────────┬───────────┐
# │ a         ┆ b         │
# │ ---       ┆ ---       │
# │ list[i64] ┆ list[i64] │
# ╞═══════════╪═══════════╡
# │ [1, 2, 3] ┆ [4, 5, 6] │
# └───────────┴───────────┘

Returns:



6878
6879
6880
# File 'lib/polars/expr.rb', line 6878

def implode
  _from_rbexpr(_rbexpr.implode)
end

#interpolate(method: "linear") ⇒ Expr

Fill nulls with linear interpolation over missing values.

Can also be used to regrid data to a new grid - see examples below.

Examples:

Fill nulls with linear interpolation

df = Polars::DataFrame.new(
  {
    "a" => [1, nil, 3],
    "b" => [1.0, Float::NAN, 3.0]
  }
)
df.select(Polars.all.interpolate)
# =>
# shape: (3, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ f64 ┆ f64 │
# ╞═════╪═════╡
# │ 1.0 ┆ 1.0 │
# │ 2.0 ┆ NaN │
# │ 3.0 ┆ 3.0 │
# └─────┴─────┘

Returns:



3993
3994
3995
# File 'lib/polars/expr.rb', line 3993

def interpolate(method: "linear")
  _from_rbexpr(_rbexpr.interpolate(method))
end

#is_between(lower_bound, upper_bound, closed: "both") ⇒ Expr

Check if this expression is between start and end.

Examples:

df = Polars::DataFrame.new({"num" => [1, 2, 3, 4, 5]})
df.with_columns(Polars.col("num").is_between(2, 4).alias("is_between"))
# =>
# shape: (5, 2)
# ┌─────┬────────────┐
# │ num ┆ is_between │
# │ --- ┆ ---        │
# │ i64 ┆ bool       │
# ╞═════╪════════════╡
# │ 1   ┆ false      │
# │ 2   ┆ true       │
# │ 3   ┆ true       │
# │ 4   ┆ true       │
# │ 5   ┆ false      │
# └─────┴────────────┘

Use the closed argument to include or exclude the values at the bounds:

df.with_columns(
  Polars.col("num").is_between(2, 4, closed: "left").alias("is_between")
)
# =>
# shape: (5, 2)
# ┌─────┬────────────┐
# │ num ┆ is_between │
# │ --- ┆ ---        │
# │ i64 ┆ bool       │
# ╞═════╪════════════╡
# │ 1   ┆ false      │
# │ 2   ┆ true       │
# │ 3   ┆ true       │
# │ 4   ┆ false      │
# │ 5   ┆ false      │
# └─────┴────────────┘

You can also use strings as well as numeric/temporal values:

df = Polars::DataFrame.new({"a" => ["a", "b", "c", "d", "e"]})
df.with_columns(
  Polars.col("a")
    .is_between(Polars.lit("a"), Polars.lit("c"), closed: "both")
    .alias("is_between")
)
# =>
# shape: (5, 2)
# ┌─────┬────────────┐
# │ a   ┆ is_between │
# │ --- ┆ ---        │
# │ str ┆ bool       │
# ╞═════╪════════════╡
# │ a   ┆ true       │
# │ b   ┆ true       │
# │ c   ┆ true       │
# │ d   ┆ false      │
# │ e   ┆ false      │
# └─────┴────────────┘

Parameters:

  • lower_bound (Object)

    Lower bound as primitive type or datetime.

  • upper_bound (Object)

    Upper bound as primitive type or datetime.

  • closed ("both", "left", "right", "none") (defaults to: "both")

    Define which sides of the interval are closed (inclusive).

Returns:



3849
3850
3851
3852
3853
3854
3855
3856
# File 'lib/polars/expr.rb', line 3849

def is_between(lower_bound, upper_bound, closed: "both")
  lower_bound = Utils.parse_into_expression(lower_bound)
  upper_bound = Utils.parse_into_expression(upper_bound)

  _from_rbexpr(
    _rbexpr.is_between(lower_bound, upper_bound, closed)
  )
end

#is_duplicatedExpr

Get mask of duplicated values.

Examples:

df = Polars::DataFrame.new({"a" => [1, 1, 2]})
df.select(Polars.col("a").is_duplicated)
# =>
# shape: (3, 1)
# ┌───────┐
# │ a     │
# │ ---   │
# │ bool  │
# ╞═══════╡
# │ true  │
# │ true  │
# │ false │
# └───────┘

Returns:



2458
2459
2460
# File 'lib/polars/expr.rb', line 2458

def is_duplicated
  _from_rbexpr(_rbexpr.is_duplicated)
end

#is_finiteExpr

Returns a boolean Series indicating which values are finite.

Examples:

df = Polars::DataFrame.new(
  {
    "A" => [1.0, 2],
    "B" => [3.0, Float::INFINITY]
  }
)
df.select(Polars.all.is_finite)
# =>
# shape: (2, 2)
# ┌──────┬───────┐
# │ A    ┆ B     │
# │ ---  ┆ ---   │
# │ bool ┆ bool  │
# ╞══════╪═══════╡
# │ true ┆ true  │
# │ true ┆ false │
# └──────┴───────┘

Returns:



614
615
616
# File 'lib/polars/expr.rb', line 614

def is_finite
  _from_rbexpr(_rbexpr.is_finite)
end

#is_first_distinctExpr Also known as: is_first

Get a mask of the first unique value.

Examples:

df = Polars::DataFrame.new(
  {
    "num" => [1, 2, 3, 1, 5]
  }
)
df.with_column(Polars.col("num").is_first.alias("is_first"))
# =>
# shape: (5, 2)
# ┌─────┬──────────┐
# │ num ┆ is_first │
# │ --- ┆ ---      │
# │ i64 ┆ bool     │
# ╞═════╪══════════╡
# │ 1   ┆ true     │
# │ 2   ┆ true     │
# │ 3   ┆ true     │
# │ 1   ┆ false    │
# │ 5   ┆ true     │
# └─────┴──────────┘

Returns:



2435
2436
2437
# File 'lib/polars/expr.rb', line 2435

def is_first_distinct
  _from_rbexpr(_rbexpr.is_first_distinct)
end

#is_in(other) ⇒ Expr Also known as: in?

Check if elements of this expression are present in the other Series.

Examples:

df = Polars::DataFrame.new(
  {"sets" => [[1, 2, 3], [1, 2], [9, 10]], "optional_members" => [1, 2, 3]}
)
df.select([Polars.col("optional_members").is_in("sets").alias("contains")])
# =>
# shape: (3, 1)
# ┌──────────┐
# │ contains │
# │ ---      │
# │ bool     │
# ╞══════════╡
# │ true     │
# │ true     │
# │ false    │
# └──────────┘

Parameters:

  • other (Object)

    Series or sequence of primitive type.

Returns:



3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
# File 'lib/polars/expr.rb', line 3734

def is_in(other)
  if other.is_a?(::Array)
    if other.length == 0
      other = Polars.lit(nil)._rbexpr
    else
      other = Polars.lit(Series.new(other))._rbexpr
    end
  else
    other = Utils.parse_into_expression(other, str_as_lit: false)
  end
  _from_rbexpr(_rbexpr.is_in(other))
end

#is_infiniteExpr

Returns a boolean Series indicating which values are infinite.

Examples:

df = Polars::DataFrame.new(
  {
    "A" => [1.0, 2],
    "B" => [3.0, Float::INFINITY]
  }
)
df.select(Polars.all.is_infinite)
# =>
# shape: (2, 2)
# ┌───────┬───────┐
# │ A     ┆ B     │
# │ ---   ┆ ---   │
# │ bool  ┆ bool  │
# ╞═══════╪═══════╡
# │ false ┆ false │
# │ false ┆ true  │
# └───────┴───────┘

Returns:



640
641
642
# File 'lib/polars/expr.rb', line 640

def is_infinite
  _from_rbexpr(_rbexpr.is_infinite)
end

#is_nanExpr

Note:

Floating point NaN (Not A Number) should not be confused with missing data represented as nil.

Returns a boolean Series indicating which values are NaN.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, nil, 1, 5],
    "b" => [1.0, 2.0, Float::NAN, 1.0, 5.0]
  }
)
df.with_column(Polars.col(Polars::Float64).is_nan.suffix("_isnan"))
# =>
# shape: (5, 3)
# ┌──────┬─────┬─────────┐
# │ a    ┆ b   ┆ b_isnan │
# │ ---  ┆ --- ┆ ---     │
# │ i64  ┆ f64 ┆ bool    │
# ╞══════╪═════╪═════════╡
# │ 1    ┆ 1.0 ┆ false   │
# │ 2    ┆ 2.0 ┆ false   │
# │ null ┆ NaN ┆ true    │
# │ 1    ┆ 1.0 ┆ false   │
# │ 5    ┆ 5.0 ┆ false   │
# └──────┴─────┴─────────┘

Returns:



673
674
675
# File 'lib/polars/expr.rb', line 673

def is_nan
  _from_rbexpr(_rbexpr.is_nan)
end

#is_notExpr Also known as: not_

Negate a boolean expression.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [true, false, false],
    "b" => ["a", "b", nil]
  }
)
# =>
# shape: (3, 2)
# ┌───────┬──────┐
# │ a     ┆ b    │
# │ ---   ┆ ---  │
# │ bool  ┆ str  │
# ╞═══════╪══════╡
# │ true  ┆ a    │
# │ false ┆ b    │
# │ false ┆ null │
# └───────┴──────┘
df.select(Polars.col("a").is_not)
# =>
# shape: (3, 1)
# ┌───────┐
# │ a     │
# │ ---   │
# │ bool  │
# ╞═══════╡
# │ false │
# │ true  │
# │ true  │
# └───────┘

Returns:



529
530
531
# File 'lib/polars/expr.rb', line 529

def is_not
  _from_rbexpr(_rbexpr.not_)
end

#is_not_nanExpr

Note:

Floating point NaN (Not A Number) should not be confused with missing data represented as nil.

Returns a boolean Series indicating which values are not NaN.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, nil, 1, 5],
    "b" => [1.0, 2.0, Float::NAN, 1.0, 5.0]
  }
)
df.with_column(Polars.col(Polars::Float64).is_not_nan.suffix("_is_not_nan"))
# =>
# shape: (5, 3)
# ┌──────┬─────┬──────────────┐
# │ a    ┆ b   ┆ b_is_not_nan │
# │ ---  ┆ --- ┆ ---          │
# │ i64  ┆ f64 ┆ bool         │
# ╞══════╪═════╪══════════════╡
# │ 1    ┆ 1.0 ┆ true         │
# │ 2    ┆ 2.0 ┆ true         │
# │ null ┆ NaN ┆ false        │
# │ 1    ┆ 1.0 ┆ true         │
# │ 5    ┆ 5.0 ┆ true         │
# └──────┴─────┴──────────────┘

Returns:



706
707
708
# File 'lib/polars/expr.rb', line 706

def is_not_nan
  _from_rbexpr(_rbexpr.is_not_nan)
end

#is_not_nullExpr

Returns a boolean Series indicating which values are not null.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, nil, 1, 5],
    "b" => [1.0, 2.0, Float::NAN, 1.0, 5.0]
  }
)
df.with_column(Polars.all.is_not_null.suffix("_not_null"))
# =>
# shape: (5, 4)
# ┌──────┬─────┬────────────┬────────────┐
# │ a    ┆ b   ┆ a_not_null ┆ b_not_null │
# │ ---  ┆ --- ┆ ---        ┆ ---        │
# │ i64  ┆ f64 ┆ bool       ┆ bool       │
# ╞══════╪═════╪════════════╪════════════╡
# │ 1    ┆ 1.0 ┆ true       ┆ true       │
# │ 2    ┆ 2.0 ┆ true       ┆ true       │
# │ null ┆ NaN ┆ false      ┆ true       │
# │ 1    ┆ 1.0 ┆ true       ┆ true       │
# │ 5    ┆ 5.0 ┆ true       ┆ true       │
# └──────┴─────┴────────────┴────────────┘

Returns:



588
589
590
# File 'lib/polars/expr.rb', line 588

def is_not_null
  _from_rbexpr(_rbexpr.is_not_null)
end

#is_nullExpr

Returns a boolean Series indicating which values are null.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, nil, 1, 5],
    "b" => [1.0, 2.0, Float::NAN, 1.0, 5.0]
  }
)
df.with_column(Polars.all.is_null.suffix("_isnull"))
# =>
# shape: (5, 4)
# ┌──────┬─────┬──────────┬──────────┐
# │ a    ┆ b   ┆ a_isnull ┆ b_isnull │
# │ ---  ┆ --- ┆ ---      ┆ ---      │
# │ i64  ┆ f64 ┆ bool     ┆ bool     │
# ╞══════╪═════╪══════════╪══════════╡
# │ 1    ┆ 1.0 ┆ false    ┆ false    │
# │ 2    ┆ 2.0 ┆ false    ┆ false    │
# │ null ┆ NaN ┆ true     ┆ false    │
# │ 1    ┆ 1.0 ┆ false    ┆ false    │
# │ 5    ┆ 5.0 ┆ false    ┆ false    │
# └──────┴─────┴──────────┴──────────┘

Returns:



559
560
561
# File 'lib/polars/expr.rb', line 559

def is_null
  _from_rbexpr(_rbexpr.is_null)
end

#is_uniqueExpr

Get mask of unique values.

Examples:

df = Polars::DataFrame.new({"a" => [1, 1, 2]})
df.select(Polars.col("a").is_unique)
# =>
# shape: (3, 1)
# ┌───────┐
# │ a     │
# │ ---   │
# │ bool  │
# ╞═══════╡
# │ false │
# │ false │
# │ true  │
# └───────┘

Returns:



2407
2408
2409
# File 'lib/polars/expr.rb', line 2407

def is_unique
  _from_rbexpr(_rbexpr.is_unique)
end

#keep_nameExpr

Keep the original root name of the expression.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2],
    "b" => [3, 4]
  }
)
df.with_columns([(Polars.col("a") * 9).alias("c").keep_name])
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 9   ┆ 3   │
# │ 18  ┆ 4   │
# └─────┴─────┘

Returns:



407
408
409
# File 'lib/polars/expr.rb', line 407

def keep_name
  name.keep
end

#kurtosis(fisher: true, bias: true) ⇒ Expr

Compute the kurtosis (Fisher or Pearson) of a dataset.

Kurtosis is the fourth central moment divided by the square of the variance. If Fisher's definition is used, then 3.0 is subtracted from the result to give 0.0 for a normal distribution. If bias is False then the kurtosis is calculated using k statistics to eliminate bias coming from biased moment estimators

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3, 2, 1]})
df.select(Polars.col("a").kurtosis)
# =>
# shape: (1, 1)
# ┌───────────┐
# │ a         │
# │ ---       │
# │ f64       │
# ╞═══════════╡
# │ -1.153061 │
# └───────────┘

Parameters:

  • fisher (Boolean) (defaults to: true)

    If true, Fisher's definition is used (normal ==> 0.0). If false, Pearson's definition is used (normal ==> 3.0).

  • bias (Boolean) (defaults to: true)

    If false, the calculations are corrected for statistical bias.

Returns:



5958
5959
5960
# File 'lib/polars/expr.rb', line 5958

def kurtosis(fisher: true, bias: true)
  _from_rbexpr(_rbexpr.kurtosis(fisher, bias))
end

#lastExpr

Get the last value.

Examples:

df = Polars::DataFrame.new({"a" => [1, 1, 2]})
df.select(Polars.col("a").last)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 2   │
# └─────┘

Returns:



2321
2322
2323
# File 'lib/polars/expr.rb', line 2321

def last
  _from_rbexpr(_rbexpr.last)
end

#le(other) ⇒ Expr

Method equivalent of "less than or equal" operator expr <= other.

Examples:

df = Polars::DataFrame.new(
  {
    "x" => [5.0, 4.0, Float::NAN, 0.5],
    "y" => [5.0, 3.5, Float::NAN, 2.0]
  }
)
df.with_columns(
  Polars.col("x").le(Polars.col("y")).alias("x <= y")
)
# =>
# shape: (4, 3)
# ┌─────┬─────┬────────┐
# │ x   ┆ y   ┆ x <= y │
# │ --- ┆ --- ┆ ---    │
# │ f64 ┆ f64 ┆ bool   │
# ╞═════╪═════╪════════╡
# │ 5.0 ┆ 5.0 ┆ true   │
# │ 4.0 ┆ 3.5 ┆ false  │
# │ NaN ┆ NaN ┆ true   │
# │ 0.5 ┆ 2.0 ┆ true   │
# └─────┴─────┴────────┘

Parameters:

  • other (Object)

    A literal or expression value to compare with.

Returns:



3329
3330
3331
# File 'lib/polars/expr.rb', line 3329

def le(other)
  self <= other
end

#lenExpr Also known as: length

Count the number of values in this expression.

Examples:

df = Polars::DataFrame.new({"a" => [8, 9, 10], "b" => [nil, 4, 4]})
df.select(Polars.all.len)
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ u32 ┆ u32 │
# ╞═════╪═════╡
# │ 3   ┆ 3   │
# └─────┴─────┘

Returns:



781
782
783
# File 'lib/polars/expr.rb', line 781

def len
  _from_rbexpr(_rbexpr.len)
end

#limit(n = 10) ⇒ Expr

Get the first n rows.

Alias for #head.

Examples:

df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7]})
df.select(Polars.col("foo").limit(3))
# =>
# shape: (3, 1)
# ┌─────┐
# │ foo │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 2   │
# │ 3   │
# └─────┘

Parameters:

  • n (Integer) (defaults to: 10)

    Number of rows to return.

Returns:



3159
3160
3161
# File 'lib/polars/expr.rb', line 3159

def limit(n = 10)
  head(n)
end

#listListExpr

Create an object namespace of all list related methods.

Returns:



7233
7234
7235
# File 'lib/polars/expr.rb', line 7233

def list
  ListExpr.new(self)
end

#log(base = Math::E) ⇒ Expr

Compute the logarithm to a given base.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3]})
df.select(Polars.col("a").log(2))
# =>
# shape: (3, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 0.0      │
# │ 1.0      │
# │ 1.584963 │
# └──────────┘

Parameters:

  • base (Float) (defaults to: Math::E)

    Given base, defaults to e.

Returns:



6736
6737
6738
# File 'lib/polars/expr.rb', line 6736

def log(base = Math::E)
  _from_rbexpr(_rbexpr.log(base))
end

#log10Expr

Compute the base 10 logarithm of the input array, element-wise.

Examples:

df = Polars::DataFrame.new({"values" => [1.0, 2.0, 4.0]})
df.select(Polars.col("values").log10)
# =>
# shape: (3, 1)
# ┌─────────┐
# │ values  │
# │ ---     │
# │ f64     │
# ╞═════════╡
# │ 0.0     │
# │ 0.30103 │
# │ 0.60206 │
# └─────────┘

Returns:



267
268
269
# File 'lib/polars/expr.rb', line 267

def log10
  log(10)
end

#lower_boundExpr

Calculate the lower bound.

Returns a unit Series with the lowest value possible for the dtype of this expression.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3, 2, 1]})
df.select(Polars.col("a").lower_bound)
# =>
# shape: (1, 1)
# ┌──────────────────────┐
# │ a                    │
# │ ---                  │
# │ i64                  │
# ╞══════════════════════╡
# │ -9223372036854775808 │
# └──────────────────────┘

Returns:



6080
6081
6082
# File 'lib/polars/expr.rb', line 6080

def lower_bound
  _from_rbexpr(_rbexpr.lower_bound)
end

#lt(other) ⇒ Expr

Method equivalent of "less than" operator expr < other.

Examples:

df = Polars::DataFrame.new(
  {
    "x" => [1.0, 2.0, Float::NAN, 3.0],
    "y" => [2.0, 2.0, Float::NAN, 4.0]
  }
)
df.with_columns(
  Polars.col("x").lt(Polars.col("y")).alias("x < y"),
)
# =>
# shape: (4, 3)
# ┌─────┬─────┬───────┐
# │ x   ┆ y   ┆ x < y │
# │ --- ┆ --- ┆ ---   │
# │ f64 ┆ f64 ┆ bool  │
# ╞═════╪═════╪═══════╡
# │ 1.0 ┆ 2.0 ┆ true  │
# │ 2.0 ┆ 2.0 ┆ false │
# │ NaN ┆ NaN ┆ false │
# │ 3.0 ┆ 4.0 ┆ true  │
# └─────┴─────┴───────┘

Parameters:

  • other (Object)

    A literal or expression value to compare with.

Returns:



3362
3363
3364
# File 'lib/polars/expr.rb', line 3362

def lt(other)
  self < other
end

#map_alias(&f) ⇒ Expr

Rename the output of an expression by mapping a function over the root name.

Examples:

df = Polars::DataFrame.new(
  {
    "A" => [1, 2],
    "B" => [3, 4]
  }
)
df.select(
  Polars.all.reverse.map_alias { |colName| colName + "_reverse" }
)
# =>
# shape: (2, 2)
# ┌───────────┬───────────┐
# │ A_reverse ┆ B_reverse │
# │ ---       ┆ ---       │
# │ i64       ┆ i64       │
# ╞═══════════╪═══════════╡
# │ 2         ┆ 4         │
# │ 1         ┆ 3         │
# └───────────┴───────────┘

Returns:



489
490
491
# File 'lib/polars/expr.rb', line 489

def map_alias(&f)
  name.map(&f)
end

#maxExpr

Get maximum value.

Examples:

df = Polars::DataFrame.new({"a" => [-1.0, Float::NAN, 1.0]})
df.select(Polars.col("a").max)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 1.0 │
# └─────┘

Returns:



2001
2002
2003
# File 'lib/polars/expr.rb', line 2001

def max
  _from_rbexpr(_rbexpr.max)
end

#meanExpr

Get mean value.

Examples:

df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
df.select(Polars.col("a").mean)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 0.0 │
# └─────┘

Returns:



2105
2106
2107
# File 'lib/polars/expr.rb', line 2105

def mean
  _from_rbexpr(_rbexpr.mean)
end

#medianExpr

Get median value using linear interpolation.

Examples:

df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
df.select(Polars.col("a").median)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 0.0 │
# └─────┘

Returns:



2125
2126
2127
# File 'lib/polars/expr.rb', line 2125

def median
  _from_rbexpr(_rbexpr.median)
end

#metaMetaExpr

Create an object namespace of all meta related expression methods.

Returns:



7268
7269
7270
# File 'lib/polars/expr.rb', line 7268

def meta
  MetaExpr.new(self)
end

#minExpr

Get minimum value.

Examples:

df = Polars::DataFrame.new({"a" => [-1.0, Float::NAN, 1.0]})
df.select(Polars.col("a").min)
# =>
# shape: (1, 1)
# ┌──────┐
# │ a    │
# │ ---  │
# │ f64  │
# ╞══════╡
# │ -1.0 │
# └──────┘

Returns:



2021
2022
2023
# File 'lib/polars/expr.rb', line 2021

def min
  _from_rbexpr(_rbexpr.min)
end

#mod(other) ⇒ Expr

Method equivalent of modulus operator expr % other.

Examples:

df = Polars::DataFrame.new({"x" => [0, 1, 2, 3, 4]})
df.with_columns(Polars.col("x").mod(2).alias("x%2"))
# =>
# shape: (5, 2)
# ┌─────┬─────┐
# │ x   ┆ x%2 │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 0   ┆ 0   │
# │ 1   ┆ 1   │
# │ 2   ┆ 0   │
# │ 3   ┆ 1   │
# │ 4   ┆ 0   │
# └─────┴─────┘

Parameters:

  • other (Object)

    Numeric literal or expression value.

Returns:



3538
3539
3540
# File 'lib/polars/expr.rb', line 3538

def mod(other)
  self % other
end

#modeExpr

Compute the most occurring value(s).

Can return multiple Values.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 1, 2, 3],
    "b" => [1, 1, 2, 2]
  }
)
df.select(Polars.all.mode.first)
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 1   ┆ 1   │
# │ 1   ┆ 2   │
# └─────┴─────┘

Returns:



1236
1237
1238
# File 'lib/polars/expr.rb', line 1236

def mode
  _from_rbexpr(_rbexpr.mode)
end

#mul(other) ⇒ Expr

Method equivalent of multiplication operator expr * other.

Examples:

df = Polars::DataFrame.new({"x" => [1, 2, 4, 8, 16]})
df.with_columns(
  Polars.col("x").mul(2).alias("x*2"),
  Polars.col("x").mul(Polars.col("x").log(2)).alias("x * xlog2"),
)
# =>
# shape: (5, 3)
# ┌─────┬─────┬───────────┐
# │ x   ┆ x*2 ┆ x * xlog2 │
# │ --- ┆ --- ┆ ---       │
# │ i64 ┆ i64 ┆ f64       │
# ╞═════╪═════╪═══════════╡
# │ 1   ┆ 2   ┆ 0.0       │
# │ 2   ┆ 4   ┆ 2.0       │
# │ 4   ┆ 8   ┆ 8.0       │
# │ 8   ┆ 16  ┆ 24.0      │
# │ 16  ┆ 32  ┆ 64.0      │
# └─────┴─────┴───────────┘

Parameters:

  • other (Object)

    Numeric literal or expression value.

Returns:



3568
3569
3570
# File 'lib/polars/expr.rb', line 3568

def mul(other)
  self * other
end

#n_uniqueExpr

Count unique values.

Examples:

df = Polars::DataFrame.new({"a" => [1, 1, 2]})
df.select(Polars.col("a").n_unique)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 2   │
# └─────┘

Returns:



2165
2166
2167
# File 'lib/polars/expr.rb', line 2165

def n_unique
  _from_rbexpr(_rbexpr.n_unique)
end

#nameNameExpr

Create an object namespace of all expressions that modify expression names.

Returns:



7275
7276
7277
# File 'lib/polars/expr.rb', line 7275

def name
  NameExpr.new(self)
end

#nan_maxExpr

Get maximum value, but propagate/poison encountered NaN values.

Examples:

df = Polars::DataFrame.new({"a" => [0.0, Float::NAN]})
df.select(Polars.col("a").nan_max)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ NaN │
# └─────┘

Returns:



2041
2042
2043
# File 'lib/polars/expr.rb', line 2041

def nan_max
  _from_rbexpr(_rbexpr.nan_max)
end

#nan_minExpr

Get minimum value, but propagate/poison encountered NaN values.

Examples:

df = Polars::DataFrame.new({"a" => [0.0, Float::NAN]})
df.select(Polars.col("a").nan_min)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ NaN │
# └─────┘

Returns:



2061
2062
2063
# File 'lib/polars/expr.rb', line 2061

def nan_min
  _from_rbexpr(_rbexpr.nan_min)
end

#ne(other) ⇒ Expr

Method equivalent of inequality operator expr != other.

Examples:

df = Polars::DataFrame.new(
  {
    "x" => [1.0, 2.0, Float::NAN, 4.0],
    "y" => [2.0, 2.0, Float::NAN, 4.0]
  }
)
df.with_columns(
  Polars.col("x").ne(Polars.col("y")).alias("x != y"),
)
# =>
# shape: (4, 3)
# ┌─────┬─────┬────────┐
# │ x   ┆ y   ┆ x != y │
# │ --- ┆ --- ┆ ---    │
# │ f64 ┆ f64 ┆ bool   │
# ╞═════╪═════╪════════╡
# │ 1.0 ┆ 2.0 ┆ true   │
# │ 2.0 ┆ 2.0 ┆ false  │
# │ NaN ┆ NaN ┆ false  │
# │ 4.0 ┆ 4.0 ┆ false  │
# └─────┴─────┴────────┘

Parameters:

  • other (Object)

    A literal or expression value to compare with.

Returns:



3395
3396
3397
# File 'lib/polars/expr.rb', line 3395

def ne(other)
  self != other
end

#ne_missing(other) ⇒ Expr

Method equivalent of equality operator expr != other where None == None.

This differs from default ne where null values are propagated.

Examples:

df = Polars::DataFrame.new(
  {
    "x" => [1.0, 2.0, Float::NAN, 4.0, nil, nil],
    "y" => [2.0, 2.0, Float::NAN, 4.0, 5.0, nil]
  }
)
df.with_columns(
  Polars.col("x").ne(Polars.col("y")).alias("x ne y"),
  Polars.col("x").ne_missing(Polars.col("y")).alias("x ne_missing y")
)
# =>
# shape: (6, 4)
# ┌──────┬──────┬────────┬────────────────┐
# │ x    ┆ y    ┆ x ne y ┆ x ne_missing y │
# │ ---  ┆ ---  ┆ ---    ┆ ---            │
# │ f64  ┆ f64  ┆ bool   ┆ bool           │
# ╞══════╪══════╪════════╪════════════════╡
# │ 1.0  ┆ 2.0  ┆ true   ┆ true           │
# │ 2.0  ┆ 2.0  ┆ false  ┆ false          │
# │ NaN  ┆ NaN  ┆ false  ┆ false          │
# │ 4.0  ┆ 4.0  ┆ false  ┆ false          │
# │ null ┆ 5.0  ┆ null   ┆ true           │
# │ null ┆ null ┆ null   ┆ false          │
# └──────┴──────┴────────┴────────────────┘

Parameters:

  • other (Object)

    A literal or expression value to compare with.

Returns:



3433
3434
3435
3436
# File 'lib/polars/expr.rb', line 3433

def ne_missing(other)
  other = Utils.parse_into_expression(other, str_as_lit: true)
  _from_rbexpr(_rbexpr.neq_missing(other))
end

#negExpr

Method equivalent of unary minus operator -expr.

Examples:

df = Polars::DataFrame.new({"a" => [-1, 0, 2, nil]})
df.with_columns(Polars.col("a").neg)
# =>
# shape: (4, 1)
# ┌──────┐
# │ a    │
# │ ---  │
# │ i64  │
# ╞══════╡
# │ 1    │
# │ 0    │
# │ -2   │
# │ null │
# └──────┘

Returns:



3621
3622
3623
# File 'lib/polars/expr.rb', line 3621

def neg
  -self
end

#null_countExpr

Count null values.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [nil, 1, nil],
    "b" => [1, 2, 3]
  }
)
df.select(Polars.all.null_count)
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ u32 ┆ u32 │
# ╞═════╪═════╡
# │ 2   ┆ 0   │
# └─────┴─────┘

Returns:



2213
2214
2215
# File 'lib/polars/expr.rb', line 2213

def null_count
  _from_rbexpr(_rbexpr.null_count)
end

#over(expr) ⇒ Expr

Apply window function over a subgroup.

This is similar to a group by + aggregation + self join. Or similar to window functions in Postgres.

Examples:

df = Polars::DataFrame.new(
  {
    "groups" => ["g1", "g1", "g2"],
    "values" => [1, 2, 3]
  }
)
df.with_column(
  Polars.col("values").max.over("groups").alias("max_by_group")
)
# =>
# shape: (3, 3)
# ┌────────┬────────┬──────────────┐
# │ groups ┆ values ┆ max_by_group │
# │ ---    ┆ ---    ┆ ---          │
# │ str    ┆ i64    ┆ i64          │
# ╞════════╪════════╪══════════════╡
# │ g1     ┆ 1      ┆ 2            │
# │ g1     ┆ 2      ┆ 2            │
# │ g2     ┆ 3      ┆ 3            │
# └────────┴────────┴──────────────┘
df = Polars::DataFrame.new(
  {
    "groups" => [1, 1, 2, 2, 1, 2, 3, 3, 1],
    "values" => [1, 2, 3, 4, 5, 6, 7, 8, 8]
  }
)
df.lazy
  .select([Polars.col("groups").sum.over("groups")])
  .collect
# =>
# shape: (9, 1)
# ┌────────┐
# │ groups │
# │ ---    │
# │ i64    │
# ╞════════╡
# │ 4      │
# │ 4      │
# │ 6      │
# │ 6      │
# │ 4      │
# │ 6      │
# │ 6      │
# │ 6      │
# │ 4      │
# └────────┘

Parameters:

  • expr (Object)

    Column(s) to group by.

Returns:



2384
2385
2386
2387
# File 'lib/polars/expr.rb', line 2384

def over(expr)
  rbexprs = Utils.parse_into_list_of_expressions(expr)
  _from_rbexpr(_rbexpr.over(rbexprs))
end

#pct_change(n: 1) ⇒ Expr

Computes percentage change between values.

Percentage change (as fraction) between current element and most-recent non-null element at least n period(s) before the current element.

Computes the change from the previous row by default.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [10, 11, 12, nil, 12]
  }
)
df.with_column(Polars.col("a").pct_change.alias("pct_change"))
# =>
# shape: (5, 2)
# ┌──────┬────────────┐
# │ a    ┆ pct_change │
# │ ---  ┆ ---        │
# │ i64  ┆ f64        │
# ╞══════╪════════════╡
# │ 10   ┆ null       │
# │ 11   ┆ 0.1        │
# │ 12   ┆ 0.090909   │
# │ null ┆ 0.0        │
# │ 12   ┆ 0.0        │
# └──────┴────────────┘

Parameters:

  • n (Integer) (defaults to: 1)

    Periods to shift for forming percent change.

Returns:



5896
5897
5898
5899
# File 'lib/polars/expr.rb', line 5896

def pct_change(n: 1)
  n = Utils.parse_into_expression(n)
  _from_rbexpr(_rbexpr.pct_change(n))
end

#peak_maxExpr

Get a boolean mask of the local maximum peaks.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3, 4, 5]})
df.select(Polars.col("a").peak_max)
# =>
# shape: (5, 1)
# ┌───────┐
# │ a     │
# │ ---   │
# │ bool  │
# ╞═══════╡
# │ false │
# │ false │
# │ false │
# │ false │
# │ true  │
# └───────┘

Returns:



2482
2483
2484
# File 'lib/polars/expr.rb', line 2482

def peak_max
  _from_rbexpr(_rbexpr.peak_max)
end

#peak_minExpr

Get a boolean mask of the local minimum peaks.

Examples:

df = Polars::DataFrame.new({"a" => [4, 1, 3, 2, 5]})
df.select(Polars.col("a").peak_min)
# =>
# shape: (5, 1)
# ┌───────┐
# │ a     │
# │ ---   │
# │ bool  │
# ╞═══════╡
# │ false │
# │ true  │
# │ false │
# │ true  │
# │ false │
# └───────┘

Returns:



2506
2507
2508
# File 'lib/polars/expr.rb', line 2506

def peak_min
  _from_rbexpr(_rbexpr.peak_min)
end

#pow(exponent) ⇒ Expr

Raise expression to the power of exponent.

Examples:

df = Polars::DataFrame.new({"x" => [1, 2, 4, 8]})
df.with_columns(
  Polars.col("x").pow(3).alias("cube"),
  Polars.col("x").pow(Polars.col("x").log(2)).alias("x ** xlog2")
)
# =>
# shape: (4, 3)
# ┌─────┬──────┬────────────┐
# │ x   ┆ cube ┆ x ** xlog2 │
# │ --- ┆ ---  ┆ ---        │
# │ i64 ┆ i64  ┆ f64        │
# ╞═════╪══════╪════════════╡
# │ 1   ┆ 1    ┆ 1.0        │
# │ 2   ┆ 8    ┆ 2.0        │
# │ 4   ┆ 64   ┆ 16.0       │
# │ 8   ┆ 512  ┆ 512.0      │
# └─────┴──────┴────────────┘

Returns:



3679
3680
3681
# File 'lib/polars/expr.rb', line 3679

def pow(exponent)
  self**exponent
end

#prefix(prefix) ⇒ Expr

Add a prefix to the root column name of the expression.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 3],
    "b" => ["x", "y", "z"]
  }
)
df.with_columns(Polars.all.reverse.name.prefix("reverse_"))
# =>
# shape: (3, 4)
# ┌─────┬─────┬───────────┬───────────┐
# │ a   ┆ b   ┆ reverse_a ┆ reverse_b │
# │ --- ┆ --- ┆ ---       ┆ ---       │
# │ i64 ┆ str ┆ i64       ┆ str       │
# ╞═════╪═════╪═══════════╪═══════════╡
# │ 1   ┆ x   ┆ 3         ┆ z         │
# │ 2   ┆ y   ┆ 2         ┆ y         │
# │ 3   ┆ z   ┆ 1         ┆ x         │
# └─────┴─────┴───────────┴───────────┘

Returns:



434
435
436
# File 'lib/polars/expr.rb', line 434

def prefix(prefix)
  name.prefix(prefix)
end

#productExpr

Compute the product of an expression.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3]})
df.select(Polars.col("a").product)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 6   │
# └─────┘

Returns:



2145
2146
2147
# File 'lib/polars/expr.rb', line 2145

def product
  _from_rbexpr(_rbexpr.product)
end

#qcut(quantiles, labels: nil, left_closed: false, allow_duplicates: false, include_breaks: false) ⇒ Expr

Bin continuous values into discrete categories based on their quantiles.

Examples:

Divide a column into three categories according to pre-defined quantile probabilities.

df = Polars::DataFrame.new({"foo" => [-2, -1, 0, 1, 2]})
df.with_columns(
  Polars.col("foo").qcut([0.25, 0.75], labels: ["a", "b", "c"]).alias("qcut")
)
# =>
# shape: (5, 2)
# ┌─────┬──────┐
# │ foo ┆ qcut │
# │ --- ┆ ---  │
# │ i64 ┆ cat  │
# ╞═════╪══════╡
# │ -2  ┆ a    │
# │ -1  ┆ a    │
# │ 0   ┆ b    │
# │ 1   ┆ b    │
# │ 2   ┆ c    │
# └─────┴──────┘

Divide a column into two categories using uniform quantile probabilities.

df.with_columns(
  Polars.col("foo")
    .qcut(2, labels: ["low", "high"], left_closed: true)
    .alias("qcut")
)
# =>
# shape: (5, 2)
# ┌─────┬──────┐
# │ foo ┆ qcut │
# │ --- ┆ ---  │
# │ i64 ┆ cat  │
# ╞═════╪══════╡
# │ -2  ┆ low  │
# │ -1  ┆ low  │
# │ 0   ┆ high │
# │ 1   ┆ high │
# │ 2   ┆ high │
# └─────┴──────┘

Add both the category and the breakpoint.

df.with_columns(
  Polars.col("foo").qcut([0.25, 0.75], include_breaks: true).alias("qcut")
).unnest("qcut")
# =>
# shape: (5, 3)
# ┌─────┬────────────┬────────────┐
# │ foo ┆ breakpoint ┆ category   │
# │ --- ┆ ---        ┆ ---        │
# │ i64 ┆ f64        ┆ cat        │
# ╞═════╪════════════╪════════════╡
# │ -2  ┆ -1.0       ┆ (-inf, -1] │
# │ -1  ┆ -1.0       ┆ (-inf, -1] │
# │ 0   ┆ 1.0        ┆ (-1, 1]    │
# │ 1   ┆ 1.0        ┆ (-1, 1]    │
# │ 2   ┆ inf        ┆ (1, inf]   │
# └─────┴────────────┴────────────┘

Parameters:

  • quantiles (Array)

    Either a list of quantile probabilities between 0 and 1 or a positive integer determining the number of bins with uniform probability.

  • labels (Array) (defaults to: nil)

    Names of the categories. The number of labels must be equal to the number of categories.

  • left_closed (Boolean) (defaults to: false)

    Set the intervals to be left-closed instead of right-closed.

  • allow_duplicates (Boolean) (defaults to: false)

    If set to true, duplicates in the resulting quantiles are dropped, rather than raising a DuplicateError. This can happen even with unique probabilities, depending on the data.

  • include_breaks (Boolean) (defaults to: false)

    Include a column with the right endpoint of the bin each observation falls in. This will change the data type of the output from a Categorical to a Struct.

Returns:



2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
# File 'lib/polars/expr.rb', line 2717

def qcut(quantiles, labels: nil, left_closed: false, allow_duplicates: false, include_breaks: false)
  if quantiles.is_a?(Integer)
    rbexpr = _rbexpr.qcut_uniform(
      quantiles, labels, left_closed, allow_duplicates, include_breaks
    )
  else
    rbexpr = _rbexpr.qcut(
      quantiles, labels, left_closed, allow_duplicates, include_breaks
    )
  end

  _from_rbexpr(rbexpr)
end

#quantile(quantile, interpolation: "nearest") ⇒ Expr

Get quantile value.

Examples:

df = Polars::DataFrame.new({"a" => [0, 1, 2, 3, 4, 5]})
df.select(Polars.col("a").quantile(0.3))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 2.0 │
# └─────┘
df.select(Polars.col("a").quantile(0.3, interpolation: "higher"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 2.0 │
# └─────┘
df.select(Polars.col("a").quantile(0.3, interpolation: "lower"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 1.0 │
# └─────┘
df.select(Polars.col("a").quantile(0.3, interpolation: "midpoint"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 1.5 │
# └─────┘
df.select(Polars.col("a").quantile(0.3, interpolation: "linear"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 1.5 │
# └─────┘

Parameters:

  • quantile (Float)

    Quantile between 0.0 and 1.0.

  • interpolation ("nearest", "higher", "lower", "midpoint", "linear") (defaults to: "nearest")

    Interpolation method.

Returns:



2579
2580
2581
2582
# File 'lib/polars/expr.rb', line 2579

def quantile(quantile, interpolation: "nearest")
  quantile = Utils.parse_into_expression(quantile, str_as_lit: false)
  _from_rbexpr(_rbexpr.quantile(quantile, interpolation))
end

#rank(method: "average", reverse: false, seed: nil) ⇒ Expr

Assign ranks to data, dealing with ties appropriately.

Examples:

The 'average' method:

df = Polars::DataFrame.new({"a" => [3, 6, 1, 1, 6]})
df.select(Polars.col("a").rank)
# =>
# shape: (5, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 3.0 │
# │ 4.5 │
# │ 1.5 │
# │ 1.5 │
# │ 4.5 │
# └─────┘

The 'ordinal' method:

df = Polars::DataFrame.new({"a" => [3, 6, 1, 1, 6]})
df.select(Polars.col("a").rank(method: "ordinal"))
# =>
# shape: (5, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 3   │
# │ 4   │
# │ 1   │
# │ 2   │
# │ 5   │
# └─────┘

Parameters:

  • method ("average", "min", "max", "dense", "ordinal", "random") (defaults to: "average")

    The method used to assign ranks to tied elements. The following methods are available:

    • 'average' : The average of the ranks that would have been assigned to all the tied values is assigned to each value.
    • 'min' : The minimum of the ranks that would have been assigned to all the tied values is assigned to each value. (This is also referred to as "competition" ranking.)
    • 'max' : The maximum of the ranks that would have been assigned to all the tied values is assigned to each value.
    • 'dense' : Like 'min', but the rank of the next highest element is assigned the rank immediately after those assigned to the tied elements.
    • 'ordinal' : All values are given a distinct rank, corresponding to the order that the values occur in the Series.
    • 'random' : Like 'ordinal', but the rank for ties is not dependent on the order that the values occur in the Series.
  • reverse (Boolean) (defaults to: false)

    Reverse the operation.

  • seed (Integer) (defaults to: nil)

    If method: "random", use this as seed.

Returns:



5829
5830
5831
# File 'lib/polars/expr.rb', line 5829

def rank(method: "average", reverse: false, seed: nil)
  _from_rbexpr(_rbexpr.rank(method, reverse, seed))
end

#rechunkExpr

Create a single chunk of memory for this Series.

Examples:

Create a Series with 3 nulls, append column a then rechunk

df = Polars::DataFrame.new({"a" => [1, 1, 2]})
df.select(Polars.repeat(nil, 3).append(Polars.col("a")).rechunk)
# =>
# shape: (6, 1)
# ┌────────┐
# │ repeat │
# │ ---    │
# │ i64    │
# ╞════════╡
# │ null   │
# │ null   │
# │ null   │
# │ 1      │
# │ 1      │
# │ 2      │
# └────────┘

Returns:



879
880
881
# File 'lib/polars/expr.rb', line 879

def rechunk
  _from_rbexpr(_rbexpr.rechunk)
end

#reinterpret(signed: false) ⇒ Expr

Reinterpret the underlying bits as a signed/unsigned integer.

This operation is only allowed for 64bit integers. For lower bits integers, you can safely use that cast operation.

Examples:

s = Polars::Series.new("a", [1, 1, 2], dtype: :u64)
df = Polars::DataFrame.new([s])
df.select(
  [
    Polars.col("a").reinterpret(signed: true).alias("reinterpreted"),
    Polars.col("a").alias("original")
  ]
)
# =>
# shape: (3, 2)
# ┌───────────────┬──────────┐
# │ reinterpreted ┆ original │
# │ ---           ┆ ---      │
# │ i64           ┆ u64      │
# ╞═══════════════╪══════════╡
# │ 1             ┆ 1        │
# │ 1             ┆ 1        │
# │ 2             ┆ 2        │
# └───────────────┴──────────┘

Parameters:

  • signed (Boolean) (defaults to: false)

    If true, reinterpret as :i64. Otherwise, reinterpret as :u64.

Returns:



3930
3931
3932
# File 'lib/polars/expr.rb', line 3930

def reinterpret(signed: false)
  _from_rbexpr(_rbexpr.reinterpret(signed))
end

#repeat_by(by) ⇒ Expr

Repeat the elements in this Series as specified in the given expression.

The repeated elements are expanded into a List.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => ["x", "y", "z"],
    "n" => [1, 2, 3]
  }
)
df.select(Polars.col("a").repeat_by("n"))
# =>
# shape: (3, 1)
# ┌─────────────────┐
# │ a               │
# │ ---             │
# │ list[str]       │
# ╞═════════════════╡
# │ ["x"]           │
# │ ["y", "y"]      │
# │ ["z", "z", "z"] │
# └─────────────────┘

Parameters:

  • by (Object)

    Numeric column that determines how often the values will be repeated. The column will be coerced to UInt32. Give this dtype to make the coercion a no-op.

Returns:



3778
3779
3780
3781
# File 'lib/polars/expr.rb', line 3778

def repeat_by(by)
  by = Utils.parse_into_expression(by, str_as_lit: false)
  _from_rbexpr(_rbexpr.repeat_by(by))
end

#replace(old, new = NO_DEFAULT, default: NO_DEFAULT, return_dtype: nil) ⇒ Expr

Replace values by different values.

Examples:

Replace a single value by another value. Values that were not replaced remain unchanged.

df = Polars::DataFrame.new({"a" => [1, 2, 2, 3]})
df.with_columns(replaced: Polars.col("a").replace(2, 100))
# =>
# shape: (4, 2)
# ┌─────┬──────────┐
# │ a   ┆ replaced │
# │ --- ┆ ---      │
# │ i64 ┆ i64      │
# ╞═════╪══════════╡
# │ 1   ┆ 1        │
# │ 2   ┆ 100      │
# │ 2   ┆ 100      │
# │ 3   ┆ 3        │
# └─────┴──────────┘

Replace multiple values by passing sequences to the old and new parameters.

df.with_columns(replaced: Polars.col("a").replace([2, 3], [100, 200]))
# =>
# shape: (4, 2)
# ┌─────┬──────────┐
# │ a   ┆ replaced │
# │ --- ┆ ---      │
# │ i64 ┆ i64      │
# ╞═════╪══════════╡
# │ 1   ┆ 1        │
# │ 2   ┆ 100      │
# │ 2   ┆ 100      │
# │ 3   ┆ 200      │
# └─────┴──────────┘

Passing a mapping with replacements is also supported as syntactic sugar. Specify a default to set all values that were not matched.

mapping = {2 => 100, 3 => 200}
df.with_columns(replaced: Polars.col("a").replace(mapping, default: -1))
# =>
# shape: (4, 2)
# ┌─────┬──────────┐
# │ a   ┆ replaced │
# │ --- ┆ ---      │
# │ i64 ┆ i64      │
# ╞═════╪══════════╡
# │ 1   ┆ -1       │
# │ 2   ┆ 100      │
# │ 2   ┆ 100      │
# │ 3   ┆ 200      │
# └─────┴──────────┘

Replacing by values of a different data type sets the return type based on a combination of the new data type and either the original data type or the default data type if it was set.

df = Polars::DataFrame.new({"a" => ["x", "y", "z"]})
mapping = {"x" => 1, "y" => 2, "z" => 3}
df.with_columns(replaced: Polars.col("a").replace(mapping))
# =>
# shape: (3, 2)
# ┌─────┬──────────┐
# │ a   ┆ replaced │
# │ --- ┆ ---      │
# │ str ┆ str      │
# ╞═════╪══════════╡
# │ x   ┆ 1        │
# │ y   ┆ 2        │
# │ z   ┆ 3        │
# └─────┴──────────┘
df.with_columns(replaced: Polars.col("a").replace(mapping, default: nil))
# =>
# shape: (3, 2)
# ┌─────┬──────────┐
# │ a   ┆ replaced │
# │ --- ┆ ---      │
# │ str ┆ i64      │
# ╞═════╪══════════╡
# │ x   ┆ 1        │
# │ y   ┆ 2        │
# │ z   ┆ 3        │
# └─────┴──────────┘

Set the return_dtype parameter to control the resulting data type directly.

df.with_columns(
  replaced: Polars.col("a").replace(mapping, return_dtype: Polars::UInt8)
)
# =>
# shape: (3, 2)
# ┌─────┬──────────┐
# │ a   ┆ replaced │
# │ --- ┆ ---      │
# │ str ┆ u8       │
# ╞═════╪══════════╡
# │ x   ┆ 1        │
# │ y   ┆ 2        │
# │ z   ┆ 3        │
# └─────┴──────────┘

Expression input is supported for all parameters.

df = Polars::DataFrame.new({"a" => [1, 2, 2, 3], "b" => [1.5, 2.5, 5.0, 1.0]})
df.with_columns(
  replaced: Polars.col("a").replace(
    Polars.col("a").max,
    Polars.col("b").sum,
    default: Polars.col("b")
  )
)
# =>
# shape: (4, 3)
# ┌─────┬─────┬──────────┐
# │ a   ┆ b   ┆ replaced │
# │ --- ┆ --- ┆ ---      │
# │ i64 ┆ f64 ┆ f64      │
# ╞═════╪═════╪══════════╡
# │ 1   ┆ 1.5 ┆ 1.5      │
# │ 2   ┆ 2.5 ┆ 2.5      │
# │ 2   ┆ 5.0 ┆ 5.0      │
# │ 3   ┆ 1.0 ┆ 10.0     │
# └─────┴─────┴──────────┘

Parameters:

  • old (Object)

    Value or sequence of values to replace. Accepts expression input. Sequences are parsed as Series, other non-expression inputs are parsed as literals. Also accepts a mapping of values to their replacement.

  • new (Object) (defaults to: NO_DEFAULT)

    Value or sequence of values to replace by. Accepts expression input. Sequences are parsed as Series, other non-expression inputs are parsed as literals. Length must match the length of old or have length 1.

  • default (Object) (defaults to: NO_DEFAULT)

    Set values that were not replaced to this value. Defaults to keeping the original value. Accepts expression input. Non-expression inputs are parsed as literals.

  • return_dtype (Object) (defaults to: nil)

    The data type of the resulting expression. If set to nil (default), the data type is determined automatically based on the other inputs.

Returns:



7053
7054
7055
7056
7057
7058
7059
7060
7061
7062
7063
7064
7065
7066
7067
7068
7069
7070
7071
7072
7073
7074
7075
7076
7077
7078
7079
7080
# File 'lib/polars/expr.rb', line 7053

def replace(old, new = NO_DEFAULT, default: NO_DEFAULT, return_dtype: nil)
  if !default.eql?(NO_DEFAULT)
    return replace_strict(old, new, default: default, return_dtype: return_dtype)
  end

  if new.eql?(NO_DEFAULT) && old.is_a?(Hash)
    new = Series.new(old.values)
    old = Series.new(old.keys)
  else
    if old.is_a?(::Array)
      old = Series.new(old)
    end
    if new.is_a?(::Array)
      new = Series.new(new)
    end
  end

  old = Utils.parse_into_expression(old, str_as_lit: true)
  new = Utils.parse_into_expression(new, str_as_lit: true)

  result = _from_rbexpr(_rbexpr.replace(old, new))

  if !return_dtype.nil?
    result = result.cast(return_dtype)
  end

  result
end

#replace_strict(old, new = NO_DEFAULT, default: NO_DEFAULT, return_dtype: nil) ⇒ Expr

Note:

The global string cache must be enabled when replacing categorical values.

Replace all values by different values.

Examples:

Replace values by passing sequences to the old and new parameters.

df = Polars::DataFrame.new({"a" => [1, 2, 2, 3]})
df.with_columns(
  replaced: Polars.col("a").replace_strict([1, 2, 3], [100, 200, 300])
)
# =>
# shape: (4, 2)
# ┌─────┬──────────┐
# │ a   ┆ replaced │
# │ --- ┆ ---      │
# │ i64 ┆ i64      │
# ╞═════╪══════════╡
# │ 1   ┆ 100      │
# │ 2   ┆ 200      │
# │ 2   ┆ 200      │
# │ 3   ┆ 300      │
# └─────┴──────────┘

By default, an error is raised if any non-null values were not replaced. Specify a default to set all values that were not matched.

mapping = {2 => 200, 3 => 300}
df.with_columns(replaced: Polars.col("a").replace_strict(mapping, default: -1))
# =>
# shape: (4, 2)
# ┌─────┬──────────┐
# │ a   ┆ replaced │
# │ --- ┆ ---      │
# │ i64 ┆ i64      │
# ╞═════╪══════════╡
# │ 1   ┆ -1       │
# │ 2   ┆ 200      │
# │ 2   ┆ 200      │
# │ 3   ┆ 300      │
# └─────┴──────────┘

Replacing by values of a different data type sets the return type based on a combination of the new data type and the default data type.

df = Polars::DataFrame.new({"a" => ["x", "y", "z"]})
mapping = {"x" => 1, "y" => 2, "z" => 3}
df.with_columns(replaced: Polars.col("a").replace_strict(mapping))
# =>
# shape: (3, 2)
# ┌─────┬──────────┐
# │ a   ┆ replaced │
# │ --- ┆ ---      │
# │ str ┆ i64      │
# ╞═════╪══════════╡
# │ x   ┆ 1        │
# │ y   ┆ 2        │
# │ z   ┆ 3        │
# └─────┴──────────┘
df.with_columns(replaced: Polars.col("a").replace_strict(mapping, default: "x"))
# =>
# shape: (3, 2)
# ┌─────┬──────────┐
# │ a   ┆ replaced │
# │ --- ┆ ---      │
# │ str ┆ str      │
# ╞═════╪══════════╡
# │ x   ┆ 1        │
# │ y   ┆ 2        │
# │ z   ┆ 3        │
# └─────┴──────────┘

Set the return_dtype parameter to control the resulting data type directly.

df.with_columns(
  replaced: Polars.col("a").replace_strict(mapping, return_dtype: Polars::UInt8)
)
# =>
# shape: (3, 2)
# ┌─────┬──────────┐
# │ a   ┆ replaced │
# │ --- ┆ ---      │
# │ str ┆ u8       │
# ╞═════╪══════════╡
# │ x   ┆ 1        │
# │ y   ┆ 2        │
# │ z   ┆ 3        │
# └─────┴──────────┘

Expression input is supported for all parameters.

df = Polars::DataFrame.new({"a" => [1, 2, 2, 3], "b" => [1.5, 2.5, 5.0, 1.0]})
df.with_columns(
  replaced: Polars.col("a").replace_strict(
    Polars.col("a").max,
    Polars.col("b").sum,
    default: Polars.col("b")
  )
)
# =>
# shape: (4, 3)
# ┌─────┬─────┬──────────┐
# │ a   ┆ b   ┆ replaced │
# │ --- ┆ --- ┆ ---      │
# │ i64 ┆ f64 ┆ f64      │
# ╞═════╪═════╪══════════╡
# │ 1   ┆ 1.5 ┆ 1.5      │
# │ 2   ┆ 2.5 ┆ 2.5      │
# │ 2   ┆ 5.0 ┆ 5.0      │
# │ 3   ┆ 1.0 ┆ 10.0     │
# └─────┴─────┴──────────┘

Parameters:

  • old (Object)

    Value or sequence of values to replace. Accepts expression input. Sequences are parsed as Series, other non-expression inputs are parsed as literals. Also accepts a mapping of values to their replacement as syntactic sugar for replace_all(old: Series.new(mapping.keys), new: Serie.new(mapping.values)).

  • new (Object) (defaults to: NO_DEFAULT)

    Value or sequence of values to replace by. Accepts expression input. Sequences are parsed as Series, other non-expression inputs are parsed as literals. Length must match the length of old or have length 1.

  • default (Object) (defaults to: NO_DEFAULT)

    Set values that were not replaced to this value. If no default is specified, (default), an error is raised if any values were not replaced. Accepts expression input. Non-expression inputs are parsed as literals.

  • return_dtype (Object) (defaults to: nil)

    The data type of the resulting expression. If set to nil (default), the data type is determined automatically based on the other inputs.

Returns:



7209
7210
7211
7212
7213
7214
7215
7216
7217
7218
7219
7220
7221
7222
7223
7224
7225
7226
7227
7228
# File 'lib/polars/expr.rb', line 7209

def replace_strict(
  old,
  new = NO_DEFAULT,
  default: NO_DEFAULT,
  return_dtype: nil
)
  if new.eql?(NO_DEFAULT) && old.is_a?(Hash)
    new = Series.new(old.values)
    old = Series.new(old.keys)
  end

  old = Utils.parse_into_expression(old, str_as_lit: true, list_as_series: true)
  new = Utils.parse_into_expression(new, str_as_lit: true, list_as_series: true)

  default = default.eql?(NO_DEFAULT) ? nil : Utils.parse_into_expression(default, str_as_lit: true)

  _from_rbexpr(
    _rbexpr.replace_strict(old, new, default, return_dtype)
  )
end

#reshape(dims) ⇒ Expr

Reshape this Expr to a flat Series or a Series of Lists.

Examples:

df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7, 8, 9]})
square = df.select(Polars.col("foo").reshape([3, 3]))
# =>
# shape: (3, 1)
# ┌───────────────┐
# │ foo           │
# │ ---           │
# │ array[i64, 3] │
# ╞═══════════════╡
# │ [1, 2, 3]     │
# │ [4, 5, 6]     │
# │ [7, 8, 9]     │
# └───────────────┘
square.select(Polars.col("foo").reshape([9]))
# =>
# shape: (9, 1)
# ┌─────┐
# │ foo │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 2   │
# │ 3   │
# │ 4   │
# │ 5   │
# │ 6   │
# │ 7   │
# │ 8   │
# │ 9   │
# └─────┘

Parameters:

  • dims (Array)

    Tuple of the dimension sizes. If a -1 is used in any of the dimensions, that dimension is inferred.

Returns:



6413
6414
6415
# File 'lib/polars/expr.rb', line 6413

def reshape(dims)
  _from_rbexpr(_rbexpr.reshape(dims))
end

#reverseExpr

Reverse the selection.

Examples:

df = Polars::DataFrame.new(
  {
    "A" => [1, 2, 3, 4, 5],
    "fruits" => ["banana", "banana", "apple", "apple", "banana"],
    "B" => [5, 4, 3, 2, 1],
    "cars" => ["beetle", "audi", "beetle", "beetle", "beetle"]
  }
)
df.select(
  [
    Polars.all,
    Polars.all.reverse.name.suffix("_reverse")
  ]
)
# =>
# shape: (5, 8)
# ┌─────┬────────┬─────┬────────┬───────────┬────────────────┬───────────┬──────────────┐
# │ A   ┆ fruits ┆ B   ┆ cars   ┆ A_reverse ┆ fruits_reverse ┆ B_reverse ┆ cars_reverse │
# │ --- ┆ ---    ┆ --- ┆ ---    ┆ ---       ┆ ---            ┆ ---       ┆ ---          │
# │ i64 ┆ str    ┆ i64 ┆ str    ┆ i64       ┆ str            ┆ i64       ┆ str          │
# ╞═════╪════════╪═════╪════════╪═══════════╪════════════════╪═══════════╪══════════════╡
# │ 1   ┆ banana ┆ 5   ┆ beetle ┆ 5         ┆ banana         ┆ 1         ┆ beetle       │
# │ 2   ┆ banana ┆ 4   ┆ audi   ┆ 4         ┆ apple          ┆ 2         ┆ beetle       │
# │ 3   ┆ apple  ┆ 3   ┆ beetle ┆ 3         ┆ apple          ┆ 3         ┆ beetle       │
# │ 4   ┆ apple  ┆ 2   ┆ beetle ┆ 2         ┆ banana         ┆ 4         ┆ audi         │
# │ 5   ┆ banana ┆ 1   ┆ beetle ┆ 1         ┆ banana         ┆ 5         ┆ beetle       │
# └─────┴────────┴─────┴────────┴───────────┴────────────────┴───────────┴──────────────┘

Returns:



1935
1936
1937
# File 'lib/polars/expr.rb', line 1935

def reverse
  _from_rbexpr(_rbexpr.reverse)
end

#rleExpr

Get the lengths of runs of identical values.

Examples:

df = Polars::DataFrame.new(Polars::Series.new("s", [1, 1, 2, 1, nil, 1, 3, 3]))
df.select(Polars.col("s").rle).unnest("s")
# =>
# shape: (6, 2)
# ┌─────┬───────┐
# │ len ┆ value │
# │ --- ┆ ---   │
# │ u32 ┆ i64   │
# ╞═════╪═══════╡
# │ 2   ┆ 1     │
# │ 1   ┆ 2     │
# │ 1   ┆ 1     │
# │ 1   ┆ null  │
# │ 1   ┆ 1     │
# │ 2   ┆ 3     │
# └─────┴───────┘

Returns:



2752
2753
2754
# File 'lib/polars/expr.rb', line 2752

def rle
  _from_rbexpr(_rbexpr.rle)
end

#rle_idExpr

Map values to run IDs.

Similar to RLE, but it maps each value to an ID corresponding to the run into which it falls. This is especially useful when you want to define groups by runs of identical values rather than the values themselves.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 1, 1, 1], "b" => ["x", "x", nil, "y", "y"]})
df.with_columns([Polars.col("a").rle_id.alias("a_r"), Polars.struct(["a", "b"]).rle_id.alias("ab_r")])
# =>
# shape: (5, 4)
# ┌─────┬──────┬─────┬──────┐
# │ a   ┆ b    ┆ a_r ┆ ab_r │
# │ --- ┆ ---  ┆ --- ┆ ---  │
# │ i64 ┆ str  ┆ u32 ┆ u32  │
# ╞═════╪══════╪═════╪══════╡
# │ 1   ┆ x    ┆ 0   ┆ 0    │
# │ 2   ┆ x    ┆ 1   ┆ 1    │
# │ 1   ┆ null ┆ 2   ┆ 2    │
# │ 1   ┆ y    ┆ 2   ┆ 3    │
# │ 1   ┆ y    ┆ 2   ┆ 3    │
# └─────┴──────┴─────┴──────┘

Returns:



2780
2781
2782
# File 'lib/polars/expr.rb', line 2780

def rle_id
  _from_rbexpr(_rbexpr.rle_id)
end

#rolling_max(window_size, weights: nil, min_periods: nil, center: false) ⇒ Expr

Note:

This functionality is experimental and may change without it being considered a breaking change.

Note:

If you want to compute multiple aggregation statistics over the same dynamic window, consider using group_by_rolling this method can cache the window size computation.

Apply a rolling max (moving max) over the values in this array.

A window of length window_size will traverse the array. The values that fill this window will (optionally) be multiplied with the weights given by the weight vector. The resulting values will be aggregated to their sum.

Examples:

df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]})
df.select(
  [
    Polars.col("A").rolling_max(2)
  ]
)
# =>
# shape: (6, 1)
# ┌──────┐
# │ A    │
# │ ---  │
# │ f64  │
# ╞══════╡
# │ null │
# │ 2.0  │
# │ 3.0  │
# │ 4.0  │
# │ 5.0  │
# │ 6.0  │
# └──────┘

Parameters:

  • window_size (Integer)

    The length of the window. Can be a fixed integer size, or a dynamic temporal size indicated by a timedelta or the following string language:

    • 1ns (1 nanosecond)
    • 1us (1 microsecond)
    • 1ms (1 millisecond)
    • 1s (1 second)
    • 1m (1 minute)
    • 1h (1 hour)
    • 1d (1 day)
    • 1w (1 week)
    • 1mo (1 calendar month)
    • 1y (1 calendar year)
    • 1i (1 index count)

    If a timedelta or the dynamic string language is used, the by and closed arguments must also be set.

  • weights (Array) (defaults to: nil)

    An optional slice with the same length as the window that will be multiplied elementwise with the values in the window.

  • min_periods (Integer) (defaults to: nil)

    The number of values in the window that should be non-null before computing a result. If None, it will be set equal to window size.

  • center (Boolean) (defaults to: false)

    Set the labels at the center of the window

Returns:



5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146
5147
# File 'lib/polars/expr.rb', line 5136

def rolling_max(
  window_size,
  weights: nil,
  min_periods: nil,
  center: false
)
  _from_rbexpr(
    _rbexpr.rolling_max(
      window_size, weights, min_periods, center
    )
  )
end

#rolling_max_by(by, window_size, min_periods: 1, closed: "right", warn_if_unsorted: nil) ⇒ Expr

Note:

If you want to compute multiple aggregation statistics over the same dynamic window, consider using rolling - this method can cache the window size computation.

Apply a rolling max based on another column.

Examples:

Create a DataFrame with a datetime column and a row number column

start = DateTime.new(2001, 1, 1)
stop = DateTime.new(2001, 1, 2)
df_temporal = Polars::DataFrame.new(
    {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
).with_row_index
# =>
# shape: (25, 2)
# ┌───────┬─────────────────────┐
# │ index ┆ date                │
# │ ---   ┆ ---                 │
# │ u32   ┆ datetime[ns]        │
# ╞═══════╪═════════════════════╡
# │ 0     ┆ 2001-01-01 00:00:00 │
# │ 1     ┆ 2001-01-01 01:00:00 │
# │ 2     ┆ 2001-01-01 02:00:00 │
# │ 3     ┆ 2001-01-01 03:00:00 │
# │ 4     ┆ 2001-01-01 04:00:00 │
# │ …     ┆ …                   │
# │ 20    ┆ 2001-01-01 20:00:00 │
# │ 21    ┆ 2001-01-01 21:00:00 │
# │ 22    ┆ 2001-01-01 22:00:00 │
# │ 23    ┆ 2001-01-01 23:00:00 │
# │ 24    ┆ 2001-01-02 00:00:00 │
# └───────┴─────────────────────┘

Compute the rolling max with the temporal windows closed on the right (default)

df_temporal.with_columns(
  rolling_row_max: Polars.col("index").rolling_max_by("date", "2h")
)
# =>
# shape: (25, 3)
# ┌───────┬─────────────────────┬─────────────────┐
# │ index ┆ date                ┆ rolling_row_max │
# │ ---   ┆ ---                 ┆ ---             │
# │ u32   ┆ datetime[ns]        ┆ u32             │
# ╞═══════╪═════════════════════╪═════════════════╡
# │ 0     ┆ 2001-01-01 00:00:00 ┆ 0               │
# │ 1     ┆ 2001-01-01 01:00:00 ┆ 1               │
# │ 2     ┆ 2001-01-01 02:00:00 ┆ 2               │
# │ 3     ┆ 2001-01-01 03:00:00 ┆ 3               │
# │ 4     ┆ 2001-01-01 04:00:00 ┆ 4               │
# │ …     ┆ …                   ┆ …               │
# │ 20    ┆ 2001-01-01 20:00:00 ┆ 20              │
# │ 21    ┆ 2001-01-01 21:00:00 ┆ 21              │
# │ 22    ┆ 2001-01-01 22:00:00 ┆ 22              │
# │ 23    ┆ 2001-01-01 23:00:00 ┆ 23              │
# │ 24    ┆ 2001-01-02 00:00:00 ┆ 24              │
# └───────┴─────────────────────┴─────────────────┘

Compute the rolling max with the closure of windows on both sides

df_temporal.with_columns(
  rolling_row_max: Polars.col("index").rolling_max_by(
    "date", "2h", closed: "both"
  )
)
# =>
# shape: (25, 3)
# ┌───────┬─────────────────────┬─────────────────┐
# │ index ┆ date                ┆ rolling_row_max │
# │ ---   ┆ ---                 ┆ ---             │
# │ u32   ┆ datetime[ns]        ┆ u32             │
# ╞═══════╪═════════════════════╪═════════════════╡
# │ 0     ┆ 2001-01-01 00:00:00 ┆ 0               │
# │ 1     ┆ 2001-01-01 01:00:00 ┆ 1               │
# │ 2     ┆ 2001-01-01 02:00:00 ┆ 2               │
# │ 3     ┆ 2001-01-01 03:00:00 ┆ 3               │
# │ 4     ┆ 2001-01-01 04:00:00 ┆ 4               │
# │ …     ┆ …                   ┆ …               │
# │ 20    ┆ 2001-01-01 20:00:00 ┆ 20              │
# │ 21    ┆ 2001-01-01 21:00:00 ┆ 21              │
# │ 22    ┆ 2001-01-01 22:00:00 ┆ 22              │
# │ 23    ┆ 2001-01-01 23:00:00 ┆ 23              │
# │ 24    ┆ 2001-01-02 00:00:00 ┆ 24              │
# └───────┴─────────────────────┴─────────────────┘

Parameters:

  • by (String)

    This column must be of dtype Datetime or Date.

  • window_size (String)

    The length of the window. Can be a dynamic temporal size indicated by a timedelta or the following string language:

    • 1ns (1 nanosecond)
    • 1us (1 microsecond)
    • 1ms (1 millisecond)
    • 1s (1 second)
    • 1m (1 minute)
    • 1h (1 hour)
    • 1d (1 calendar day)
    • 1w (1 calendar week)
    • 1mo (1 calendar month)
    • 1q (1 calendar quarter)
    • 1y (1 calendar year)

    By "calendar day", we mean the corresponding time on the next day (which may not be 24 hours, due to daylight savings). Similarly for "calendar week", "calendar month", "calendar quarter", and "calendar year".

  • min_periods (Integer) (defaults to: 1)

    The number of values in the window that should be non-null before computing a result.

  • closed ('left', 'right', 'both', 'none') (defaults to: "right")

    Define which sides of the temporal interval are closed (inclusive), defaults to 'right'.

  • warn_if_unsorted (Boolean) (defaults to: nil)

    Warn if data is not known to be sorted by by column.

Returns:



4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
# File 'lib/polars/expr.rb', line 4215

def rolling_max_by(
  by,
  window_size,
  min_periods: 1,
  closed: "right",
  warn_if_unsorted: nil
)
  window_size = _prepare_rolling_by_window_args(window_size)
  by = Utils.parse_into_expression(by)
  _from_rbexpr(
    _rbexpr.rolling_max_by(by, window_size, min_periods, closed)
  )
end

#rolling_mean(window_size, weights: nil, min_periods: nil, center: false) ⇒ Expr

Note:

This functionality is experimental and may change without it being considered a breaking change.

Note:

If you want to compute multiple aggregation statistics over the same dynamic window, consider using group_by_rolling this method can cache the window size computation.

Apply a rolling mean (moving mean) over the values in this array.

A window of length window_size will traverse the array. The values that fill this window will (optionally) be multiplied with the weights given by the weight vector. The resulting values will be aggregated to their sum.

Examples:

df = Polars::DataFrame.new({"A" => [1.0, 8.0, 6.0, 2.0, 16.0, 10.0]})
df.select(
  [
    Polars.col("A").rolling_mean(2)
  ]
)
# =>
# shape: (6, 1)
# ┌──────┐
# │ A    │
# │ ---  │
# │ f64  │
# ╞══════╡
# │ null │
# │ 4.5  │
# │ 7.0  │
# │ 4.0  │
# │ 9.0  │
# │ 13.0 │
# └──────┘

Parameters:

  • window_size (Integer)

    The length of the window. Can be a fixed integer size, or a dynamic temporal size indicated by a timedelta or the following string language:

    • 1ns (1 nanosecond)
    • 1us (1 microsecond)
    • 1ms (1 millisecond)
    • 1s (1 second)
    • 1m (1 minute)
    • 1h (1 hour)
    • 1d (1 day)
    • 1w (1 week)
    • 1mo (1 calendar month)
    • 1y (1 calendar year)
    • 1i (1 index count)

    If a timedelta or the dynamic string language is used, the by and closed arguments must also be set.

  • weights (Array) (defaults to: nil)

    An optional slice with the same length as the window that will be multiplied elementwise with the values in the window.

  • min_periods (Integer) (defaults to: nil)

    The number of values in the window that should be non-null before computing a result. If None, it will be set equal to window size.

  • center (Boolean) (defaults to: false)

    Set the labels at the center of the window

Returns:



5214
5215
5216
5217
5218
5219
5220
5221
5222
5223
5224
5225
# File 'lib/polars/expr.rb', line 5214

def rolling_mean(
  window_size,
  weights: nil,
  min_periods: nil,
  center: false
)
  _from_rbexpr(
    _rbexpr.rolling_mean(
      window_size, weights, min_periods, center
    )
  )
end

#rolling_mean_by(by, window_size, min_periods: 1, closed: "right", warn_if_unsorted: nil) ⇒ Expr

Note:

If you want to compute multiple aggregation statistics over the same dynamic window, consider using rolling - this method can cache the window size computation.

Apply a rolling mean based on another column.

Examples:

Create a DataFrame with a datetime column and a row number column

start = DateTime.new(2001, 1, 1)
stop = DateTime.new(2001, 1, 2)
df_temporal = Polars::DataFrame.new(
    {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
).with_row_index
# =>
# shape: (25, 2)
# ┌───────┬─────────────────────┐
# │ index ┆ date                │
# │ ---   ┆ ---                 │
# │ u32   ┆ datetime[ns]        │
# ╞═══════╪═════════════════════╡
# │ 0     ┆ 2001-01-01 00:00:00 │
# │ 1     ┆ 2001-01-01 01:00:00 │
# │ 2     ┆ 2001-01-01 02:00:00 │
# │ 3     ┆ 2001-01-01 03:00:00 │
# │ 4     ┆ 2001-01-01 04:00:00 │
# │ …     ┆ …                   │
# │ 20    ┆ 2001-01-01 20:00:00 │
# │ 21    ┆ 2001-01-01 21:00:00 │
# │ 22    ┆ 2001-01-01 22:00:00 │
# │ 23    ┆ 2001-01-01 23:00:00 │
# │ 24    ┆ 2001-01-02 00:00:00 │
# └───────┴─────────────────────┘

Compute the rolling mean with the temporal windows closed on the right (default)

df_temporal.with_columns(
  rolling_row_mean: Polars.col("index").rolling_mean_by(
    "date", "2h"
  )
)
# =>
# shape: (25, 3)
# ┌───────┬─────────────────────┬──────────────────┐
# │ index ┆ date                ┆ rolling_row_mean │
# │ ---   ┆ ---                 ┆ ---              │
# │ u32   ┆ datetime[ns]        ┆ f64              │
# ╞═══════╪═════════════════════╪══════════════════╡
# │ 0     ┆ 2001-01-01 00:00:00 ┆ 0.0              │
# │ 1     ┆ 2001-01-01 01:00:00 ┆ 0.5              │
# │ 2     ┆ 2001-01-01 02:00:00 ┆ 1.5              │
# │ 3     ┆ 2001-01-01 03:00:00 ┆ 2.5              │
# │ 4     ┆ 2001-01-01 04:00:00 ┆ 3.5              │
# │ …     ┆ …                   ┆ …                │
# │ 20    ┆ 2001-01-01 20:00:00 ┆ 19.5             │
# │ 21    ┆ 2001-01-01 21:00:00 ┆ 20.5             │
# │ 22    ┆ 2001-01-01 22:00:00 ┆ 21.5             │
# │ 23    ┆ 2001-01-01 23:00:00 ┆ 22.5             │
# │ 24    ┆ 2001-01-02 00:00:00 ┆ 23.5             │
# └───────┴─────────────────────┴──────────────────┘

Compute the rolling mean with the closure of windows on both sides

df_temporal.with_columns(
  rolling_row_mean: Polars.col("index").rolling_mean_by(
    "date", "2h", closed: "both"
  )
)
# =>
# shape: (25, 3)
# ┌───────┬─────────────────────┬──────────────────┐
# │ index ┆ date                ┆ rolling_row_mean │
# │ ---   ┆ ---                 ┆ ---              │
# │ u32   ┆ datetime[ns]        ┆ f64              │
# ╞═══════╪═════════════════════╪══════════════════╡
# │ 0     ┆ 2001-01-01 00:00:00 ┆ 0.0              │
# │ 1     ┆ 2001-01-01 01:00:00 ┆ 0.5              │
# │ 2     ┆ 2001-01-01 02:00:00 ┆ 1.0              │
# │ 3     ┆ 2001-01-01 03:00:00 ┆ 2.0              │
# │ 4     ┆ 2001-01-01 04:00:00 ┆ 3.0              │
# │ …     ┆ …                   ┆ …                │
# │ 20    ┆ 2001-01-01 20:00:00 ┆ 19.0             │
# │ 21    ┆ 2001-01-01 21:00:00 ┆ 20.0             │
# │ 22    ┆ 2001-01-01 22:00:00 ┆ 21.0             │
# │ 23    ┆ 2001-01-01 23:00:00 ┆ 22.0             │
# │ 24    ┆ 2001-01-02 00:00:00 ┆ 23.0             │
# └───────┴─────────────────────┴──────────────────┘

Parameters:

  • by (String)

    This column must be of dtype Datetime or Date.

  • window_size (String)

    The length of the window. Can be a dynamic temporal size indicated by a timedelta or the following string language:

    • 1ns (1 nanosecond)
    • 1us (1 microsecond)
    • 1ms (1 millisecond)
    • 1s (1 second)
    • 1m (1 minute)
    • 1h (1 hour)
    • 1d (1 calendar day)
    • 1w (1 calendar week)
    • 1mo (1 calendar month)
    • 1q (1 calendar quarter)
    • 1y (1 calendar year)

    By "calendar day", we mean the corresponding time on the next day (which may not be 24 hours, due to daylight savings). Similarly for "calendar week", "calendar month", "calendar quarter", and "calendar year".

  • min_periods (Integer) (defaults to: 1)

    The number of values in the window that should be non-null before computing a result.

  • closed ('left', 'right', 'both', 'none') (defaults to: "right")

    Define which sides of the temporal interval are closed (inclusive), defaults to 'right'.

  • warn_if_unsorted (Boolean) (defaults to: nil)

    Warn if data is not known to be sorted by by column.

Returns:



4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
# File 'lib/polars/expr.rb', line 4346

def rolling_mean_by(
  by,
  window_size,
  min_periods: 1,
  closed: "right",
  warn_if_unsorted: nil
)
  window_size = _prepare_rolling_by_window_args(window_size)
  by = Utils.parse_into_expression(by)
  _from_rbexpr(
    _rbexpr.rolling_mean_by(
      by,
      window_size,
      min_periods,
      closed
    )
  )
end

#rolling_median(window_size, weights: nil, min_periods: nil, center: false) ⇒ Expr

Note:

This functionality is experimental and may change without it being considered a breaking change.

Note:

If you want to compute multiple aggregation statistics over the same dynamic window, consider using group_by_rolling this method can cache the window size computation.

Compute a rolling median.

Examples:

df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]})
df.select(
  [
    Polars.col("A").rolling_median(3)
  ]
)
# =>
# shape: (6, 1)
# ┌──────┐
# │ A    │
# │ ---  │
# │ f64  │
# ╞══════╡
# │ null │
# │ null │
# │ 2.0  │
# │ 3.0  │
# │ 4.0  │
# │ 6.0  │
# └──────┘

Parameters:

  • window_size (Integer)

    The length of the window. Can be a fixed integer size, or a dynamic temporal size indicated by a timedelta or the following string language:

    • 1ns (1 nanosecond)
    • 1us (1 microsecond)
    • 1ms (1 millisecond)
    • 1s (1 second)
    • 1m (1 minute)
    • 1h (1 hour)
    • 1d (1 day)
    • 1w (1 week)
    • 1mo (1 calendar month)
    • 1y (1 calendar year)
    • 1i (1 index count)

    If a timedelta or the dynamic string language is used, the by and closed arguments must also be set.

  • weights (Array) (defaults to: nil)

    An optional slice with the same length as the window that will be multiplied elementwise with the values in the window.

  • min_periods (Integer) (defaults to: nil)

    The number of values in the window that should be non-null before computing a result. If None, it will be set equal to window size.

  • center (Boolean) (defaults to: false)

    Set the labels at the center of the window

Returns:



5524
5525
5526
5527
5528
5529
5530
5531
5532
5533
5534
5535
# File 'lib/polars/expr.rb', line 5524

def rolling_median(
  window_size,
  weights: nil,
  min_periods: nil,
  center: false
)
  _from_rbexpr(
    _rbexpr.rolling_median(
      window_size, weights, min_periods, center
    )
  )
end

#rolling_median_by(by, window_size, min_periods: 1, closed: "right", warn_if_unsorted: nil) ⇒ Expr

Note:

If you want to compute multiple aggregation statistics over the same dynamic window, consider using rolling - this method can cache the window size computation.

Compute a rolling median based on another column.

Examples:

Create a DataFrame with a datetime column and a row number column

start = DateTime.new(2001, 1, 1)
stop = DateTime.new(2001, 1, 2)
df_temporal = Polars::DataFrame.new(
  {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
).with_row_index
# =>
# shape: (25, 2)
# ┌───────┬─────────────────────┐
# │ index ┆ date                │
# │ ---   ┆ ---                 │
# │ u32   ┆ datetime[ns]        │
# ╞═══════╪═════════════════════╡
# │ 0     ┆ 2001-01-01 00:00:00 │
# │ 1     ┆ 2001-01-01 01:00:00 │
# │ 2     ┆ 2001-01-01 02:00:00 │
# │ 3     ┆ 2001-01-01 03:00:00 │
# │ 4     ┆ 2001-01-01 04:00:00 │
# │ …     ┆ …                   │
# │ 20    ┆ 2001-01-01 20:00:00 │
# │ 21    ┆ 2001-01-01 21:00:00 │
# │ 22    ┆ 2001-01-01 22:00:00 │
# │ 23    ┆ 2001-01-01 23:00:00 │
# │ 24    ┆ 2001-01-02 00:00:00 │
# └───────┴─────────────────────┘

Compute the rolling median with the temporal windows closed on the right:

df_temporal.with_columns(
  rolling_row_median: Polars.col("index").rolling_median_by(
    "date", "2h"
  )
)
# =>
# shape: (25, 3)
# ┌───────┬─────────────────────┬────────────────────┐
# │ index ┆ date                ┆ rolling_row_median │
# │ ---   ┆ ---                 ┆ ---                │
# │ u32   ┆ datetime[ns]        ┆ f64                │
# ╞═══════╪═════════════════════╪════════════════════╡
# │ 0     ┆ 2001-01-01 00:00:00 ┆ 0.0                │
# │ 1     ┆ 2001-01-01 01:00:00 ┆ 0.5                │
# │ 2     ┆ 2001-01-01 02:00:00 ┆ 1.5                │
# │ 3     ┆ 2001-01-01 03:00:00 ┆ 2.5                │
# │ 4     ┆ 2001-01-01 04:00:00 ┆ 3.5                │
# │ …     ┆ …                   ┆ …                  │
# │ 20    ┆ 2001-01-01 20:00:00 ┆ 19.5               │
# │ 21    ┆ 2001-01-01 21:00:00 ┆ 20.5               │
# │ 22    ┆ 2001-01-01 22:00:00 ┆ 21.5               │
# │ 23    ┆ 2001-01-01 23:00:00 ┆ 22.5               │
# │ 24    ┆ 2001-01-02 00:00:00 ┆ 23.5               │
# └───────┴─────────────────────┴────────────────────┘

Parameters:

  • by (String)

    This column must be of dtype Datetime or Date.

  • window_size (String)

    The length of the window. Can be a dynamic temporal size indicated by a timedelta or the following string language:

    • 1ns (1 nanosecond)
    • 1us (1 microsecond)
    • 1ms (1 millisecond)
    • 1s (1 second)
    • 1m (1 minute)
    • 1h (1 hour)
    • 1d (1 calendar day)
    • 1w (1 calendar week)
    • 1mo (1 calendar month)
    • 1q (1 calendar quarter)
    • 1y (1 calendar year)

    By "calendar day", we mean the corresponding time on the next day (which may not be 24 hours, due to daylight savings). Similarly for "calendar week", "calendar month", "calendar quarter", and "calendar year".

  • min_periods (Integer) (defaults to: 1)

    The number of values in the window that should be non-null before computing a result.

  • closed ('left', 'right', 'both', 'none') (defaults to: "right")

    Define which sides of the temporal interval are closed (inclusive), defaults to 'right'.

  • warn_if_unsorted (Boolean) (defaults to: nil)

    Warn if data is not known to be sorted by by column.

Returns:



4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
# File 'lib/polars/expr.rb', line 4861

def rolling_median_by(
  by,
  window_size,
  min_periods: 1,
  closed: "right",
  warn_if_unsorted: nil
)
  window_size = _prepare_rolling_by_window_args(window_size)
  by = Utils.parse_into_expression(by)
  _from_rbexpr(
    _rbexpr.rolling_median_by(by, window_size, min_periods, closed)
  )
end

#rolling_min(window_size, weights: nil, min_periods: nil, center: false) ⇒ Expr

Note:

This functionality is experimental and may change without it being considered a breaking change.

Note:

If you want to compute multiple aggregation statistics over the same dynamic window, consider using group_by_rolling this method can cache the window size computation.

Apply a rolling min (moving min) over the values in this array.

A window of length window_size will traverse the array. The values that fill this window will (optionally) be multiplied with the weights given by the weight vector. The resulting values will be aggregated to their sum.

Examples:

df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]})
df.select(
  [
    Polars.col("A").rolling_min(2)
  ]
)
# =>
# shape: (6, 1)
# ┌──────┐
# │ A    │
# │ ---  │
# │ f64  │
# ╞══════╡
# │ null │
# │ 1.0  │
# │ 2.0  │
# │ 3.0  │
# │ 4.0  │
# │ 5.0  │
# └──────┘

Parameters:

  • window_size (Integer)

    The length of the window. Can be a fixed integer size, or a dynamic temporal size indicated by a timedelta or the following string language:

    • 1ns (1 nanosecond)
    • 1us (1 microsecond)
    • 1ms (1 millisecond)
    • 1s (1 second)
    • 1m (1 minute)
    • 1h (1 hour)
    • 1d (1 day)
    • 1w (1 week)
    • 1mo (1 calendar month)
    • 1y (1 calendar year)
    • 1i (1 index count)

    If a timedelta or the dynamic string language is used, the by and closed arguments must also be set.

  • weights (Array) (defaults to: nil)

    An optional slice with the same length as the window that will be multiplied elementwise with the values in the window.

  • min_periods (Integer) (defaults to: nil)

    The number of values in the window that should be non-null before computing a result. If None, it will be set equal to window size.

  • center (Boolean) (defaults to: false)

    Set the labels at the center of the window

Returns:



5058
5059
5060
5061
5062
5063
5064
5065
5066
5067
5068
5069
# File 'lib/polars/expr.rb', line 5058

def rolling_min(
  window_size,
  weights: nil,
  min_periods: nil,
  center: false
)
  _from_rbexpr(
    _rbexpr.rolling_min(
      window_size, weights, min_periods, center
    )
  )
end

#rolling_min_by(by, window_size, min_periods: 1, closed: "right", warn_if_unsorted: nil) ⇒ Expr

Note:

If you want to compute multiple aggregation statistics over the same dynamic window, consider using rolling - this method can cache the window size computation.

Apply a rolling min based on another column.

Examples:

Create a DataFrame with a datetime column and a row number column

start = DateTime.new(2001, 1, 1)
stop = DateTime.new(2001, 1, 2)
df_temporal = Polars::DataFrame.new(
  {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
).with_row_index
# =>
# shape: (25, 2)
# ┌───────┬─────────────────────┐
# │ index ┆ date                │
# │ ---   ┆ ---                 │
# │ u32   ┆ datetime[ns]        │
# ╞═══════╪═════════════════════╡
# │ 0     ┆ 2001-01-01 00:00:00 │
# │ 1     ┆ 2001-01-01 01:00:00 │
# │ 2     ┆ 2001-01-01 02:00:00 │
# │ 3     ┆ 2001-01-01 03:00:00 │
# │ 4     ┆ 2001-01-01 04:00:00 │
# │ …     ┆ …                   │
# │ 20    ┆ 2001-01-01 20:00:00 │
# │ 21    ┆ 2001-01-01 21:00:00 │
# │ 22    ┆ 2001-01-01 22:00:00 │
# │ 23    ┆ 2001-01-01 23:00:00 │
# │ 24    ┆ 2001-01-02 00:00:00 │
# └───────┴─────────────────────┘

Compute the rolling min with the temporal windows closed on the right (default)

df_temporal.with_columns(
  rolling_row_min: Polars.col("index").rolling_min_by("date", "2h")
)
# =>
# shape: (25, 3)
# ┌───────┬─────────────────────┬─────────────────┐
# │ index ┆ date                ┆ rolling_row_min │
# │ ---   ┆ ---                 ┆ ---             │
# │ u32   ┆ datetime[ns]        ┆ u32             │
# ╞═══════╪═════════════════════╪═════════════════╡
# │ 0     ┆ 2001-01-01 00:00:00 ┆ 0               │
# │ 1     ┆ 2001-01-01 01:00:00 ┆ 0               │
# │ 2     ┆ 2001-01-01 02:00:00 ┆ 1               │
# │ 3     ┆ 2001-01-01 03:00:00 ┆ 2               │
# │ 4     ┆ 2001-01-01 04:00:00 ┆ 3               │
# │ …     ┆ …                   ┆ …               │
# │ 20    ┆ 2001-01-01 20:00:00 ┆ 19              │
# │ 21    ┆ 2001-01-01 21:00:00 ┆ 20              │
# │ 22    ┆ 2001-01-01 22:00:00 ┆ 21              │
# │ 23    ┆ 2001-01-01 23:00:00 ┆ 22              │
# │ 24    ┆ 2001-01-02 00:00:00 ┆ 23              │
# └───────┴─────────────────────┴─────────────────┘

Parameters:

  • by (String)

    This column must be of dtype Datetime or Date.

  • window_size (String)

    The length of the window. Can be a dynamic temporal size indicated by a timedelta or the following string language:

    • 1ns (1 nanosecond)
    • 1us (1 microsecond)
    • 1ms (1 millisecond)
    • 1s (1 second)
    • 1m (1 minute)
    • 1h (1 hour)
    • 1d (1 calendar day)
    • 1w (1 calendar week)
    • 1mo (1 calendar month)
    • 1q (1 calendar quarter)
    • 1y (1 calendar year)

    By "calendar day", we mean the corresponding time on the next day (which may not be 24 hours, due to daylight savings). Similarly for "calendar week", "calendar month", "calendar quarter", and "calendar year".

  • min_periods (Integer) (defaults to: 1)

    The number of values in the window that should be non-null before computing a result.

  • closed ('left', 'right', 'both', 'none') (defaults to: "right")

    Define which sides of the temporal interval are closed (inclusive), defaults to 'right'.

  • warn_if_unsorted (Boolean) (defaults to: nil)

    Warn if data is not known to be sorted by by column.

Returns:



4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
# File 'lib/polars/expr.rb', line 4086

def rolling_min_by(
  by,
  window_size,
  min_periods: 1,
  closed: "right",
  warn_if_unsorted: nil
)
  window_size = _prepare_rolling_by_window_args(window_size)
  by = Utils.parse_into_expression(by)
  _from_rbexpr(
    _rbexpr.rolling_min_by(by, window_size, min_periods, closed)
  )
end

#rolling_quantile(quantile, interpolation: "nearest", window_size: 2, weights: nil, min_periods: nil, center: false) ⇒ Expr

Note:

This functionality is experimental and may change without it being considered a breaking change.

Note:

If you want to compute multiple aggregation statistics over the same dynamic window, consider using group_by_rolling this method can cache the window size computation.

Compute a rolling quantile.

Examples:

df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]})
df.select(
  [
    Polars.col("A").rolling_quantile(0.33, window_size: 3)
  ]
)
# =>
# shape: (6, 1)
# ┌──────┐
# │ A    │
# │ ---  │
# │ f64  │
# ╞══════╡
# │ null │
# │ null │
# │ 1.0  │
# │ 2.0  │
# │ 3.0  │
# │ 4.0  │
# └──────┘

Parameters:

  • quantile (Float)

    Quantile between 0.0 and 1.0.

  • interpolation ("nearest", "higher", "lower", "midpoint", "linear") (defaults to: "nearest")

    Interpolation method.

  • window_size (Integer) (defaults to: 2)

    The length of the window. Can be a fixed integer size, or a dynamic temporal size indicated by a timedelta or the following string language:

    • 1ns (1 nanosecond)
    • 1us (1 microsecond)
    • 1ms (1 millisecond)
    • 1s (1 second)
    • 1m (1 minute)
    • 1h (1 hour)
    • 1d (1 day)
    • 1w (1 week)
    • 1mo (1 calendar month)
    • 1y (1 calendar year)
    • 1i (1 index count)

    If a timedelta or the dynamic string language is used, the by and closed arguments must also be set.

  • weights (Array) (defaults to: nil)

    An optional slice with the same length as the window that will be multiplied elementwise with the values in the window.

  • min_periods (Integer) (defaults to: nil)

    The number of values in the window that should be non-null before computing a result. If None, it will be set equal to window size.

  • center (Boolean) (defaults to: false)

    Set the labels at the center of the window

Returns:



5602
5603
5604
5605
5606
5607
5608
5609
5610
5611
5612
5613
5614
5615
# File 'lib/polars/expr.rb', line 5602

def rolling_quantile(
  quantile,
  interpolation: "nearest",
  window_size: 2,
  weights: nil,
  min_periods: nil,
  center: false
)
  _from_rbexpr(
    _rbexpr.rolling_quantile(
      quantile, interpolation, window_size, weights, min_periods, center
    )
  )
end

#rolling_quantile_by(by, window_size, quantile:, interpolation: "nearest", min_periods: 1, closed: "right", warn_if_unsorted: nil) ⇒ Expr

Note:

If you want to compute multiple aggregation statistics over the same dynamic window, consider using rolling - this method can cache the window size computation.

Compute a rolling quantile based on another column.

Examples:

Create a DataFrame with a datetime column and a row number column

start = DateTime.new(2001, 1, 1)
stop = DateTime.new(2001, 1, 2)
df_temporal = Polars::DataFrame.new(
    {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
).with_row_index
# =>
# shape: (25, 2)
# ┌───────┬─────────────────────┐
# │ index ┆ date                │
# │ ---   ┆ ---                 │
# │ u32   ┆ datetime[ns]        │
# ╞═══════╪═════════════════════╡
# │ 0     ┆ 2001-01-01 00:00:00 │
# │ 1     ┆ 2001-01-01 01:00:00 │
# │ 2     ┆ 2001-01-01 02:00:00 │
# │ 3     ┆ 2001-01-01 03:00:00 │
# │ 4     ┆ 2001-01-01 04:00:00 │
# │ …     ┆ …                   │
# │ 20    ┆ 2001-01-01 20:00:00 │
# │ 21    ┆ 2001-01-01 21:00:00 │
# │ 22    ┆ 2001-01-01 22:00:00 │
# │ 23    ┆ 2001-01-01 23:00:00 │
# │ 24    ┆ 2001-01-02 00:00:00 │
# └───────┴─────────────────────┘

Compute the rolling quantile with the temporal windows closed on the right:

df_temporal.with_columns(
  rolling_row_quantile: Polars.col("index").rolling_quantile_by(
    "date", "2h", quantile: 0.3
  )
)
# =>
# shape: (25, 3)
# ┌───────┬─────────────────────┬──────────────────────┐
# │ index ┆ date                ┆ rolling_row_quantile │
# │ ---   ┆ ---                 ┆ ---                  │
# │ u32   ┆ datetime[ns]        ┆ f64                  │
# ╞═══════╪═════════════════════╪══════════════════════╡
# │ 0     ┆ 2001-01-01 00:00:00 ┆ 0.0                  │
# │ 1     ┆ 2001-01-01 01:00:00 ┆ 0.0                  │
# │ 2     ┆ 2001-01-01 02:00:00 ┆ 1.0                  │
# │ 3     ┆ 2001-01-01 03:00:00 ┆ 2.0                  │
# │ 4     ┆ 2001-01-01 04:00:00 ┆ 3.0                  │
# │ …     ┆ …                   ┆ …                    │
# │ 20    ┆ 2001-01-01 20:00:00 ┆ 19.0                 │
# │ 21    ┆ 2001-01-01 21:00:00 ┆ 20.0                 │
# │ 22    ┆ 2001-01-01 22:00:00 ┆ 21.0                 │
# │ 23    ┆ 2001-01-01 23:00:00 ┆ 22.0                 │
# │ 24    ┆ 2001-01-02 00:00:00 ┆ 23.0                 │
# └───────┴─────────────────────┴──────────────────────┘

Parameters:

  • by (String)

    This column must be of dtype Datetime or Date.

  • quantile (Float)

    Quantile between 0.0 and 1.0.

  • interpolation ('nearest', 'higher', 'lower', 'midpoint', 'linear') (defaults to: "nearest")

    Interpolation method.

  • window_size (String)

    The length of the window. Can be a dynamic temporal size indicated by a timedelta or the following string language:

    • 1ns (1 nanosecond)
    • 1us (1 microsecond)
    • 1ms (1 millisecond)
    • 1s (1 second)
    • 1m (1 minute)
    • 1h (1 hour)
    • 1d (1 calendar day)
    • 1w (1 calendar week)
    • 1mo (1 calendar month)
    • 1q (1 calendar quarter)
    • 1y (1 calendar year)

    By "calendar day", we mean the corresponding time on the next day (which may not be 24 hours, due to daylight savings). Similarly for "calendar week", "calendar month", "calendar quarter", and "calendar year".

  • min_periods (Integer) (defaults to: 1)

    The number of values in the window that should be non-null before computing a result.

  • closed ('left', 'right', 'both', 'none') (defaults to: "right")

    Define which sides of the temporal interval are closed (inclusive), defaults to 'right'.

  • warn_if_unsorted (Boolean) (defaults to: nil)

    Warn if data is not known to be sorted by by column.

Returns:



4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
# File 'lib/polars/expr.rb', line 4970

def rolling_quantile_by(
  by,
  window_size,
  quantile:,
  interpolation: "nearest",
  min_periods: 1,
  closed: "right",
  warn_if_unsorted: nil
)
  window_size = _prepare_rolling_by_window_args(window_size)
  by = Utils.parse_into_expression(by)
  _from_rbexpr(
    _rbexpr.rolling_quantile_by(
      by,
      quantile,
      interpolation,
      window_size,
      min_periods,
      closed,
    )
  )
end

#rolling_skew(window_size, bias: true) ⇒ Expr

Compute a rolling skew.

Examples:

df = Polars::DataFrame.new({"a" => [1, 4, 2, 9]})
df.select(Polars.col("a").rolling_skew(3))
# =>
# shape: (4, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ null     │
# │ null     │
# │ 0.381802 │
# │ 0.47033  │
# └──────────┘

Parameters:

  • window_size (Integer)

    Integer size of the rolling window.

  • bias (Boolean) (defaults to: true)

    If false, the calculations are corrected for statistical bias.

Returns:



5705
5706
5707
# File 'lib/polars/expr.rb', line 5705

def rolling_skew(window_size, bias: true)
  _from_rbexpr(_rbexpr.rolling_skew(window_size, bias))
end

#rolling_std(window_size, weights: nil, min_periods: nil, center: false, ddof: 1) ⇒ Expr

Note:

This functionality is experimental and may change without it being considered a breaking change.

Note:

If you want to compute multiple aggregation statistics over the same dynamic window, consider using group_by_rolling this method can cache the window size computation.

Compute a rolling standard deviation.

A window of length window_size will traverse the array. The values that fill this window will (optionally) be multiplied with the weights given by the weight vector. The resulting values will be aggregated to their sum.

Examples:

df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]})
df.select(
  [
    Polars.col("A").rolling_std(3)
  ]
)
# =>
# shape: (6, 1)
# ┌──────────┐
# │ A        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ null     │
# │ null     │
# │ 1.0      │
# │ 1.0      │
# │ 1.527525 │
# │ 2.0      │
# └──────────┘

Parameters:

  • window_size (Integer)

    The length of the window. Can be a fixed integer size, or a dynamic temporal size indicated by a timedelta or the following string language:

    • 1ns (1 nanosecond)
    • 1us (1 microsecond)
    • 1ms (1 millisecond)
    • 1s (1 second)
    • 1m (1 minute)
    • 1h (1 hour)
    • 1d (1 day)
    • 1w (1 week)
    • 1mo (1 calendar month)
    • 1y (1 calendar year)
    • 1i (1 index count)

    If a timedelta or the dynamic string language is used, the by and closed arguments must also be set.

  • weights (Array) (defaults to: nil)

    An optional slice with the same length as the window that will be multiplied elementwise with the values in the window.

  • min_periods (Integer) (defaults to: nil)

    The number of values in the window that should be non-null before computing a result. If None, it will be set equal to window size.

  • center (Boolean) (defaults to: false)

    Set the labels at the center of the window

Returns:



5370
5371
5372
5373
5374
5375
5376
5377
5378
5379
5380
5381
5382
# File 'lib/polars/expr.rb', line 5370

def rolling_std(
  window_size,
  weights: nil,
  min_periods: nil,
  center: false,
  ddof: 1
)
  _from_rbexpr(
    _rbexpr.rolling_std(
      window_size, weights, min_periods, center, ddof
    )
  )
end

#rolling_std_by(by, window_size, min_periods: 1, closed: "right", ddof: 1, warn_if_unsorted: nil) ⇒ Expr

Note:

If you want to compute multiple aggregation statistics over the same dynamic window, consider using rolling - this method can cache the window size computation.

Compute a rolling standard deviation based on another column.

Examples:

Create a DataFrame with a datetime column and a row number column

start = DateTime.new(2001, 1, 1)
stop = DateTime.new(2001, 1, 2)
df_temporal = Polars::DataFrame.new(
    {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
).with_row_index
# =>
# shape: (25, 2)
# ┌───────┬─────────────────────┐
# │ index ┆ date                │
# │ ---   ┆ ---                 │
# │ u32   ┆ datetime[ns]        │
# ╞═══════╪═════════════════════╡
# │ 0     ┆ 2001-01-01 00:00:00 │
# │ 1     ┆ 2001-01-01 01:00:00 │
# │ 2     ┆ 2001-01-01 02:00:00 │
# │ 3     ┆ 2001-01-01 03:00:00 │
# │ 4     ┆ 2001-01-01 04:00:00 │
# │ …     ┆ …                   │
# │ 20    ┆ 2001-01-01 20:00:00 │
# │ 21    ┆ 2001-01-01 21:00:00 │
# │ 22    ┆ 2001-01-01 22:00:00 │
# │ 23    ┆ 2001-01-01 23:00:00 │
# │ 24    ┆ 2001-01-02 00:00:00 │
# └───────┴─────────────────────┘

Compute the rolling std with the temporal windows closed on the right (default)

df_temporal.with_columns(
  rolling_row_std: Polars.col("index").rolling_std_by("date", "2h")
)
# =>
# shape: (25, 3)
# ┌───────┬─────────────────────┬─────────────────┐
# │ index ┆ date                ┆ rolling_row_std │
# │ ---   ┆ ---                 ┆ ---             │
# │ u32   ┆ datetime[ns]        ┆ f64             │
# ╞═══════╪═════════════════════╪═════════════════╡
# │ 0     ┆ 2001-01-01 00:00:00 ┆ null            │
# │ 1     ┆ 2001-01-01 01:00:00 ┆ 0.707107        │
# │ 2     ┆ 2001-01-01 02:00:00 ┆ 0.707107        │
# │ 3     ┆ 2001-01-01 03:00:00 ┆ 0.707107        │
# │ 4     ┆ 2001-01-01 04:00:00 ┆ 0.707107        │
# │ …     ┆ …                   ┆ …               │
# │ 20    ┆ 2001-01-01 20:00:00 ┆ 0.707107        │
# │ 21    ┆ 2001-01-01 21:00:00 ┆ 0.707107        │
# │ 22    ┆ 2001-01-01 22:00:00 ┆ 0.707107        │
# │ 23    ┆ 2001-01-01 23:00:00 ┆ 0.707107        │
# │ 24    ┆ 2001-01-02 00:00:00 ┆ 0.707107        │
# └───────┴─────────────────────┴─────────────────┘

Compute the rolling std with the closure of windows on both sides

df_temporal.with_columns(
  rolling_row_std: Polars.col("index").rolling_std_by(
    "date", "2h", closed: "both"
  )
)
# =>
# shape: (25, 3)
# ┌───────┬─────────────────────┬─────────────────┐
# │ index ┆ date                ┆ rolling_row_std │
# │ ---   ┆ ---                 ┆ ---             │
# │ u32   ┆ datetime[ns]        ┆ f64             │
# ╞═══════╪═════════════════════╪═════════════════╡
# │ 0     ┆ 2001-01-01 00:00:00 ┆ null            │
# │ 1     ┆ 2001-01-01 01:00:00 ┆ 0.707107        │
# │ 2     ┆ 2001-01-01 02:00:00 ┆ 1.0             │
# │ 3     ┆ 2001-01-01 03:00:00 ┆ 1.0             │
# │ 4     ┆ 2001-01-01 04:00:00 ┆ 1.0             │
# │ …     ┆ …                   ┆ …               │
# │ 20    ┆ 2001-01-01 20:00:00 ┆ 1.0             │
# │ 21    ┆ 2001-01-01 21:00:00 ┆ 1.0             │
# │ 22    ┆ 2001-01-01 22:00:00 ┆ 1.0             │
# │ 23    ┆ 2001-01-01 23:00:00 ┆ 1.0             │
# │ 24    ┆ 2001-01-02 00:00:00 ┆ 1.0             │
# └───────┴─────────────────────┴─────────────────┘

Parameters:

  • by (String)

    This column must be of dtype Datetime or Date.

  • window_size (String)

    The length of the window. Can be a dynamic temporal size indicated by a timedelta or the following string language:

    • 1ns (1 nanosecond)
    • 1us (1 microsecond)
    • 1ms (1 millisecond)
    • 1s (1 second)
    • 1m (1 minute)
    • 1h (1 hour)
    • 1d (1 calendar day)
    • 1w (1 calendar week)
    • 1mo (1 calendar month)
    • 1q (1 calendar quarter)
    • 1y (1 calendar year)

    By "calendar day", we mean the corresponding time on the next day (which may not be 24 hours, due to daylight savings). Similarly for "calendar week", "calendar month", "calendar quarter", and "calendar year".

  • min_periods (Integer) (defaults to: 1)

    The number of values in the window that should be non-null before computing a result.

  • closed ('left', 'right', 'both', 'none') (defaults to: "right")

    Define which sides of the temporal interval are closed (inclusive), defaults to 'right'.

  • ddof (Integer) (defaults to: 1)

    "Delta Degrees of Freedom": The divisor for a length N window is N - ddof

  • warn_if_unsorted (Boolean) (defaults to: nil)

    Warn if data is not known to be sorted by by column.

Returns:



4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
# File 'lib/polars/expr.rb', line 4611

def rolling_std_by(
  by,
  window_size,
  min_periods: 1,
  closed: "right",
  ddof: 1,
  warn_if_unsorted: nil
)
  window_size = _prepare_rolling_by_window_args(window_size)
  by = Utils.parse_into_expression(by)
  _from_rbexpr(
    _rbexpr.rolling_std_by(
      by,
      window_size,
      min_periods,
      closed,
      ddof
    )
  )
end

#rolling_sum(window_size, weights: nil, min_periods: nil, center: false) ⇒ Expr

Note:

This functionality is experimental and may change without it being considered a breaking change.

Note:

If you want to compute multiple aggregation statistics over the same dynamic window, consider using group_by_rolling this method can cache the window size computation.

Apply a rolling sum (moving sum) over the values in this array.

A window of length window_size will traverse the array. The values that fill this window will (optionally) be multiplied with the weights given by the weight vector. The resulting values will be aggregated to their sum.

Examples:

df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]})
df.select(
  [
    Polars.col("A").rolling_sum(2)
  ]
)
# =>
# shape: (6, 1)
# ┌──────┐
# │ A    │
# │ ---  │
# │ f64  │
# ╞══════╡
# │ null │
# │ 3.0  │
# │ 5.0  │
# │ 7.0  │
# │ 9.0  │
# │ 11.0 │
# └──────┘

Parameters:

  • window_size (Integer)

    The length of the window. Can be a fixed integer size, or a dynamic temporal size indicated by a timedelta or the following string language:

    • 1ns (1 nanosecond)
    • 1us (1 microsecond)
    • 1ms (1 millisecond)
    • 1s (1 second)
    • 1m (1 minute)
    • 1h (1 hour)
    • 1d (1 day)
    • 1w (1 week)
    • 1mo (1 calendar month)
    • 1y (1 calendar year)
    • 1i (1 index count)

    If a timedelta or the dynamic string language is used, the by and closed arguments must also be set.

  • weights (Array) (defaults to: nil)

    An optional slice with the same length as the window that will be multiplied elementwise with the values in the window.

  • min_periods (Integer) (defaults to: nil)

    The number of values in the window that should be non-null before computing a result. If None, it will be set equal to window size.

  • center (Boolean) (defaults to: false)

    Set the labels at the center of the window

Returns:



5292
5293
5294
5295
5296
5297
5298
5299
5300
5301
5302
5303
# File 'lib/polars/expr.rb', line 5292

def rolling_sum(
  window_size,
  weights: nil,
  min_periods: nil,
  center: false
)
  _from_rbexpr(
    _rbexpr.rolling_sum(
      window_size, weights, min_periods, center
    )
  )
end

#rolling_sum_by(by, window_size, min_periods: 1, closed: "right", warn_if_unsorted: nil) ⇒ Expr

Note:

If you want to compute multiple aggregation statistics over the same dynamic window, consider using rolling - this method can cache the window size computation.

Apply a rolling sum based on another column.

Examples:

Create a DataFrame with a datetime column and a row number column

start = DateTime.new(2001, 1, 1)
stop = DateTime.new(2001, 1, 2)
df_temporal = Polars::DataFrame.new(
    {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
).with_row_index
# =>
# shape: (25, 2)
# ┌───────┬─────────────────────┐
# │ index ┆ date                │
# │ ---   ┆ ---                 │
# │ u32   ┆ datetime[ns]        │
# ╞═══════╪═════════════════════╡
# │ 0     ┆ 2001-01-01 00:00:00 │
# │ 1     ┆ 2001-01-01 01:00:00 │
# │ 2     ┆ 2001-01-01 02:00:00 │
# │ 3     ┆ 2001-01-01 03:00:00 │
# │ 4     ┆ 2001-01-01 04:00:00 │
# │ …     ┆ …                   │
# │ 20    ┆ 2001-01-01 20:00:00 │
# │ 21    ┆ 2001-01-01 21:00:00 │
# │ 22    ┆ 2001-01-01 22:00:00 │
# │ 23    ┆ 2001-01-01 23:00:00 │
# │ 24    ┆ 2001-01-02 00:00:00 │
# └───────┴─────────────────────┘

Compute the rolling sum with the temporal windows closed on the right (default)

df_temporal.with_columns(
  rolling_row_sum: Polars.col("index").rolling_sum_by("date", "2h")
)
# =>
# shape: (25, 3)
# ┌───────┬─────────────────────┬─────────────────┐
# │ index ┆ date                ┆ rolling_row_sum │
# │ ---   ┆ ---                 ┆ ---             │
# │ u32   ┆ datetime[ns]        ┆ u32             │
# ╞═══════╪═════════════════════╪═════════════════╡
# │ 0     ┆ 2001-01-01 00:00:00 ┆ 0               │
# │ 1     ┆ 2001-01-01 01:00:00 ┆ 1               │
# │ 2     ┆ 2001-01-01 02:00:00 ┆ 3               │
# │ 3     ┆ 2001-01-01 03:00:00 ┆ 5               │
# │ 4     ┆ 2001-01-01 04:00:00 ┆ 7               │
# │ …     ┆ …                   ┆ …               │
# │ 20    ┆ 2001-01-01 20:00:00 ┆ 39              │
# │ 21    ┆ 2001-01-01 21:00:00 ┆ 41              │
# │ 22    ┆ 2001-01-01 22:00:00 ┆ 43              │
# │ 23    ┆ 2001-01-01 23:00:00 ┆ 45              │
# │ 24    ┆ 2001-01-02 00:00:00 ┆ 47              │
# └───────┴─────────────────────┴─────────────────┘

Compute the rolling sum with the closure of windows on both sides

df_temporal.with_columns(
  rolling_row_sum: Polars.col("index").rolling_sum_by(
    "date", "2h", closed: "both"
  )
)
# =>
# shape: (25, 3)
# ┌───────┬─────────────────────┬─────────────────┐
# │ index ┆ date                ┆ rolling_row_sum │
# │ ---   ┆ ---                 ┆ ---             │
# │ u32   ┆ datetime[ns]        ┆ u32             │
# ╞═══════╪═════════════════════╪═════════════════╡
# │ 0     ┆ 2001-01-01 00:00:00 ┆ 0               │
# │ 1     ┆ 2001-01-01 01:00:00 ┆ 1               │
# │ 2     ┆ 2001-01-01 02:00:00 ┆ 3               │
# │ 3     ┆ 2001-01-01 03:00:00 ┆ 6               │
# │ 4     ┆ 2001-01-01 04:00:00 ┆ 9               │
# │ …     ┆ …                   ┆ …               │
# │ 20    ┆ 2001-01-01 20:00:00 ┆ 57              │
# │ 21    ┆ 2001-01-01 21:00:00 ┆ 60              │
# │ 22    ┆ 2001-01-01 22:00:00 ┆ 63              │
# │ 23    ┆ 2001-01-01 23:00:00 ┆ 66              │
# │ 24    ┆ 2001-01-02 00:00:00 ┆ 69              │
# └───────┴─────────────────────┴─────────────────┘

Parameters:

  • by (String)

    This column must of dtype {Date, Datetime}

  • window_size (String)

    The length of the window. Can be a dynamic temporal size indicated by a timedelta or the following string language:

    • 1ns (1 nanosecond)
    • 1us (1 microsecond)
    • 1ms (1 millisecond)
    • 1s (1 second)
    • 1m (1 minute)
    • 1h (1 hour)
    • 1d (1 calendar day)
    • 1w (1 calendar week)
    • 1mo (1 calendar month)
    • 1q (1 calendar quarter)
    • 1y (1 calendar year)

    By "calendar day", we mean the corresponding time on the next day (which may not be 24 hours, due to daylight savings). Similarly for "calendar week", "calendar month", "calendar quarter", and "calendar year".

  • min_periods (Integer) (defaults to: 1)

    The number of values in the window that should be non-null before computing a result.

  • closed ('left', 'right', 'both', 'none') (defaults to: "right")

    Define which sides of the temporal interval are closed (inclusive), defaults to 'right'.

  • warn_if_unsorted (Boolean) (defaults to: nil)

    Warn if data is not known to be sorted by by column.

Returns:



4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
# File 'lib/polars/expr.rb', line 4480

def rolling_sum_by(
  by,
  window_size,
  min_periods: 1,
  closed: "right",
  warn_if_unsorted: nil
)
  window_size = _prepare_rolling_by_window_args(window_size)
  by = Utils.parse_into_expression(by)
  _from_rbexpr(
    _rbexpr.rolling_sum_by(by, window_size, min_periods, closed)
  )
end

#rolling_var(window_size, weights: nil, min_periods: nil, center: false, ddof: 1) ⇒ Expr

Note:

This functionality is experimental and may change without it being considered a breaking change.

Note:

If you want to compute multiple aggregation statistics over the same dynamic window, consider using group_by_rolling this method can cache the window size computation.

Compute a rolling variance.

A window of length window_size will traverse the array. The values that fill this window will (optionally) be multiplied with the weights given by the weight vector. The resulting values will be aggregated to their sum.

Examples:

df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]})
df.select(
  [
    Polars.col("A").rolling_var(3)
  ]
)
# =>
# shape: (6, 1)
# ┌──────────┐
# │ A        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ null     │
# │ null     │
# │ 1.0      │
# │ 1.0      │
# │ 2.333333 │
# │ 4.0      │
# └──────────┘

Parameters:

  • window_size (Integer)

    The length of the window. Can be a fixed integer size, or a dynamic temporal size indicated by a timedelta or the following string language:

    • 1ns (1 nanosecond)
    • 1us (1 microsecond)
    • 1ms (1 millisecond)
    • 1s (1 second)
    • 1m (1 minute)
    • 1h (1 hour)
    • 1d (1 day)
    • 1w (1 week)
    • 1mo (1 calendar month)
    • 1y (1 calendar year)
    • 1i (1 index count)

    If a timedelta or the dynamic string language is used, the by and closed arguments must also be set.

  • weights (Array) (defaults to: nil)

    An optional slice with the same length as the window that will be multiplied elementwise with the values in the window.

  • min_periods (Integer) (defaults to: nil)

    The number of values in the window that should be non-null before computing a result. If None, it will be set equal to window size.

  • center (Boolean) (defaults to: false)

    Set the labels at the center of the window

Returns:



5449
5450
5451
5452
5453
5454
5455
5456
5457
5458
5459
5460
5461
# File 'lib/polars/expr.rb', line 5449

def rolling_var(
  window_size,
  weights: nil,
  min_periods: nil,
  center: false,
  ddof: 1
)
  _from_rbexpr(
    _rbexpr.rolling_var(
      window_size, weights, min_periods, center, ddof
    )
  )
end

#rolling_var_by(by, window_size, min_periods: 1, closed: "right", ddof: 1, warn_if_unsorted: nil) ⇒ Expr

Note:

If you want to compute multiple aggregation statistics over the same dynamic window, consider using rolling - this method can cache the window size computation.

Compute a rolling variance based on another column.

Examples:

Create a DataFrame with a datetime column and a row number column

start = DateTime.new(2001, 1, 1)
stop = DateTime.new(2001, 1, 2)
df_temporal = Polars::DataFrame.new(
    {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
).with_row_index
# =>
# shape: (25, 2)
# ┌───────┬─────────────────────┐
# │ index ┆ date                │
# │ ---   ┆ ---                 │
# │ u32   ┆ datetime[ns]        │
# ╞═══════╪═════════════════════╡
# │ 0     ┆ 2001-01-01 00:00:00 │
# │ 1     ┆ 2001-01-01 01:00:00 │
# │ 2     ┆ 2001-01-01 02:00:00 │
# │ 3     ┆ 2001-01-01 03:00:00 │
# │ 4     ┆ 2001-01-01 04:00:00 │
# │ …     ┆ …                   │
# │ 20    ┆ 2001-01-01 20:00:00 │
# │ 21    ┆ 2001-01-01 21:00:00 │
# │ 22    ┆ 2001-01-01 22:00:00 │
# │ 23    ┆ 2001-01-01 23:00:00 │
# │ 24    ┆ 2001-01-02 00:00:00 │
# └───────┴─────────────────────┘

Compute the rolling var with the temporal windows closed on the right (default)

df_temporal.with_columns(
  rolling_row_var: Polars.col("index").rolling_var_by("date", "2h")
)
# =>
# shape: (25, 3)
# ┌───────┬─────────────────────┬─────────────────┐
# │ index ┆ date                ┆ rolling_row_var │
# │ ---   ┆ ---                 ┆ ---             │
# │ u32   ┆ datetime[ns]        ┆ f64             │
# ╞═══════╪═════════════════════╪═════════════════╡
# │ 0     ┆ 2001-01-01 00:00:00 ┆ null            │
# │ 1     ┆ 2001-01-01 01:00:00 ┆ 0.5             │
# │ 2     ┆ 2001-01-01 02:00:00 ┆ 0.5             │
# │ 3     ┆ 2001-01-01 03:00:00 ┆ 0.5             │
# │ 4     ┆ 2001-01-01 04:00:00 ┆ 0.5             │
# │ …     ┆ …                   ┆ …               │
# │ 20    ┆ 2001-01-01 20:00:00 ┆ 0.5             │
# │ 21    ┆ 2001-01-01 21:00:00 ┆ 0.5             │
# │ 22    ┆ 2001-01-01 22:00:00 ┆ 0.5             │
# │ 23    ┆ 2001-01-01 23:00:00 ┆ 0.5             │
# │ 24    ┆ 2001-01-02 00:00:00 ┆ 0.5             │
# └───────┴─────────────────────┴─────────────────┘

Compute the rolling var with the closure of windows on both sides

df_temporal.with_columns(
  rolling_row_var: Polars.col("index").rolling_var_by(
    "date", "2h", closed: "both"
  )
)
# =>
# shape: (25, 3)
# ┌───────┬─────────────────────┬─────────────────┐
# │ index ┆ date                ┆ rolling_row_var │
# │ ---   ┆ ---                 ┆ ---             │
# │ u32   ┆ datetime[ns]        ┆ f64             │
# ╞═══════╪═════════════════════╪═════════════════╡
# │ 0     ┆ 2001-01-01 00:00:00 ┆ null            │
# │ 1     ┆ 2001-01-01 01:00:00 ┆ 0.5             │
# │ 2     ┆ 2001-01-01 02:00:00 ┆ 1.0             │
# │ 3     ┆ 2001-01-01 03:00:00 ┆ 1.0             │
# │ 4     ┆ 2001-01-01 04:00:00 ┆ 1.0             │
# │ …     ┆ …                   ┆ …               │
# │ 20    ┆ 2001-01-01 20:00:00 ┆ 1.0             │
# │ 21    ┆ 2001-01-01 21:00:00 ┆ 1.0             │
# │ 22    ┆ 2001-01-01 22:00:00 ┆ 1.0             │
# │ 23    ┆ 2001-01-01 23:00:00 ┆ 1.0             │
# │ 24    ┆ 2001-01-02 00:00:00 ┆ 1.0             │
# └───────┴─────────────────────┴─────────────────┘

Parameters:

  • by (String)

    This column must be of dtype Datetime or Date.

  • window_size (String)

    The length of the window. Can be a dynamic temporal size indicated by a timedelta or the following string language:

    • 1ns (1 nanosecond)
    • 1us (1 microsecond)
    • 1ms (1 millisecond)
    • 1s (1 second)
    • 1m (1 minute)
    • 1h (1 hour)
    • 1d (1 calendar day)
    • 1w (1 calendar week)
    • 1mo (1 calendar month)
    • 1q (1 calendar quarter)
    • 1y (1 calendar year)

    By "calendar day", we mean the corresponding time on the next day (which may not be 24 hours, due to daylight savings). Similarly for "calendar week", "calendar month", "calendar quarter", and "calendar year".

  • min_periods (Integer) (defaults to: 1)

    The number of values in the window that should be non-null before computing a result.

  • closed ('left', 'right', 'both', 'none') (defaults to: "right")

    Define which sides of the temporal interval are closed (inclusive), defaults to 'right'.

  • ddof (Integer) (defaults to: 1)

    "Delta Degrees of Freedom": The divisor for a length N window is N - ddof

  • warn_if_unsorted (Boolean) (defaults to: nil)

    Warn if data is not known to be sorted by by column.

Returns:



4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
# File 'lib/polars/expr.rb', line 4749

def rolling_var_by(
  by,
  window_size,
  min_periods: 1,
  closed: "right",
  ddof: 1,
  warn_if_unsorted: nil
)
  window_size = _prepare_rolling_by_window_args(window_size)
  by = Utils.parse_into_expression(by)
  _from_rbexpr(
    _rbexpr.rolling_var_by(
      by,
      window_size,
      min_periods,
      closed,
      ddof
    )
  )
end

#round(decimals = 0) ⇒ Expr

Round underlying floating point data by decimals digits.

Examples:

df = Polars::DataFrame.new({"a" => [0.33, 0.52, 1.02, 1.17]})
df.select(Polars.col("a").round(1))
# =>
# shape: (4, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 0.3 │
# │ 0.5 │
# │ 1.0 │
# │ 1.2 │
# └─────┘

Parameters:

  • decimals (Integer) (defaults to: 0)

    Number of decimals to round by.

Returns:



1179
1180
1181
# File 'lib/polars/expr.rb', line 1179

def round(decimals = 0)
  _from_rbexpr(_rbexpr.round(decimals))
end

#sample(frac: nil, with_replacement: true, shuffle: false, seed: nil, n: nil) ⇒ Expr

Sample from this expression.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3]})
df.select(Polars.col("a").sample(frac: 1.0, with_replacement: true, seed: 1))
# =>
# shape: (3, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 3   │
# │ 1   │
# │ 1   │
# └─────┘

Parameters:

  • frac (Float) (defaults to: nil)

    Fraction of items to return. Cannot be used with n.

  • with_replacement (Boolean) (defaults to: true)

    Allow values to be sampled more than once.

  • shuffle (Boolean) (defaults to: false)

    Shuffle the order of sampled data points.

  • seed (Integer) (defaults to: nil)

    Seed for the random number generator. If set to None (default), a random seed is used.

  • n (Integer) (defaults to: nil)

    Number of items to return. Cannot be used with frac.

Returns:



6476
6477
6478
6479
6480
6481
6482
6483
6484
6485
6486
6487
6488
6489
6490
6491
6492
6493
6494
6495
6496
6497
6498
6499
# File 'lib/polars/expr.rb', line 6476

def sample(
  frac: nil,
  with_replacement: true,
  shuffle: false,
  seed: nil,
  n: nil
)
  if !n.nil? && !frac.nil?
    raise ArgumentError, "cannot specify both `n` and `frac`"
  end

  if !n.nil? && frac.nil?
    n = Utils.parse_into_expression(n)
    return _from_rbexpr(_rbexpr.sample_n(n, with_replacement, shuffle, seed))
  end

  if frac.nil?
    frac = 1.0
  end
  frac = Utils.parse_into_expression(frac)
  _from_rbexpr(
    _rbexpr.sample_frac(frac, with_replacement, shuffle, seed)
  )
end

#search_sorted(element, side: "any") ⇒ Expr

Find indices where elements should be inserted to maintain order.

Examples:

df = Polars::DataFrame.new(
  {
    "values" => [1, 2, 3, 5]
  }
)
df.select(
  [
    Polars.col("values").search_sorted(0).alias("zero"),
    Polars.col("values").search_sorted(3).alias("three"),
    Polars.col("values").search_sorted(6).alias("six")
  ]
)
# =>
# shape: (1, 3)
# ┌──────┬───────┬─────┐
# │ zero ┆ three ┆ six │
# │ ---  ┆ ---   ┆ --- │
# │ u32  ┆ u32   ┆ u32 │
# ╞══════╪═══════╪═════╡
# │ 0    ┆ 2     ┆ 4   │
# └──────┴───────┴─────┘

Parameters:

  • element (Object)

    Expression or scalar value.

Returns:



1540
1541
1542
1543
# File 'lib/polars/expr.rb', line 1540

def search_sorted(element, side: "any")
  element = Utils.parse_into_expression(element, str_as_lit: false)
  _from_rbexpr(_rbexpr.search_sorted(element, side))
end

#set_sorted(descending: false) ⇒ Expr

Note:

This can lead to incorrect results if this Series is not sorted!! Use with care!

Flags the expression as 'sorted'.

Enables downstream code to user fast paths for sorted arrays.

Examples:

df = Polars::DataFrame.new({"values" => [1, 2, 3]})
df.select(Polars.col("values").set_sorted.max)
# =>
# shape: (1, 1)
# ┌────────┐
# │ values │
# │ ---    │
# │ i64    │
# ╞════════╡
# │ 3      │
# └────────┘

Parameters:

  • descending (Boolean) (defaults to: false)

    Whether the Series order is descending.

Returns:



6853
6854
6855
# File 'lib/polars/expr.rb', line 6853

def set_sorted(descending: false)
  _from_rbexpr(_rbexpr.set_sorted_flag(descending))
end

#shift(n = 1, fill_value: nil) ⇒ Expr

Shift the values by a given period.

Examples:

df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4]})
df.select(Polars.col("foo").shift(1))
# =>
# shape: (4, 1)
# ┌──────┐
# │ foo  │
# │ ---  │
# │ i64  │
# ╞══════╡
# │ null │
# │ 1    │
# │ 2    │
# │ 3    │
# └──────┘

Parameters:

  • n (Integer) (defaults to: 1)

    Number of places to shift (may be negative).

  • fill_value (Object) (defaults to: nil)

    Fill the resulting null values with this value.

Returns:



1701
1702
1703
1704
1705
1706
1707
# File 'lib/polars/expr.rb', line 1701

def shift(n = 1, fill_value: nil)
  if !fill_value.nil?
    fill_value = Utils.parse_into_expression(fill_value, str_as_lit: true)
  end
  n = Utils.parse_into_expression(n)
  _from_rbexpr(_rbexpr.shift(n, fill_value))
end

#shift_and_fill(periods, fill_value) ⇒ Expr

Shift the values by a given period and fill the resulting null values.

Examples:

df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4]})
df.select(Polars.col("foo").shift_and_fill(1, "a"))
# =>
# shape: (4, 1)
# ┌─────┐
# │ foo │
# │ --- │
# │ str │
# ╞═════╡
# │ a   │
# │ 1   │
# │ 2   │
# │ 3   │
# └─────┘

Parameters:

  • periods (Integer)

    Number of places to shift (may be negative).

  • fill_value (Object)

    Fill nil values with the result of this expression.

Returns:



1733
1734
1735
# File 'lib/polars/expr.rb', line 1733

def shift_and_fill(periods, fill_value)
  shift(periods, fill_value: fill_value)
end

#shrink_dtypeExpr

Shrink numeric columns to the minimal required datatype.

Shrink to the dtype needed to fit the extrema of this Series. This can be used to reduce memory pressure.

Examples:

Polars::DataFrame.new(
  {
    "a" => [1, 2, 3],
    "b" => [1, 2, 2 << 32],
    "c" => [-1, 2, 1 << 30],
    "d" => [-112, 2, 112],
    "e" => [-112, 2, 129],
    "f" => ["a", "b", "c"],
    "g" => [0.1, 1.32, 0.12],
    "h" => [true, nil, false]
  }
).select(Polars.all.shrink_dtype)
# =>
# shape: (3, 8)
# ┌─────┬────────────┬────────────┬──────┬──────┬─────┬──────┬───────┐
# │ a   ┆ b          ┆ c          ┆ d    ┆ e    ┆ f   ┆ g    ┆ h     │
# │ --- ┆ ---        ┆ ---        ┆ ---  ┆ ---  ┆ --- ┆ ---  ┆ ---   │
# │ i8  ┆ i64        ┆ i32        ┆ i8   ┆ i16  ┆ str ┆ f32  ┆ bool  │
# ╞═════╪════════════╪════════════╪══════╪══════╪═════╪══════╪═══════╡
# │ 1   ┆ 1          ┆ -1         ┆ -112 ┆ -112 ┆ a   ┆ 0.1  ┆ true  │
# │ 2   ┆ 2          ┆ 2          ┆ 2    ┆ 2    ┆ b   ┆ 1.32 ┆ null  │
# │ 3   ┆ 8589934592 ┆ 1073741824 ┆ 112  ┆ 129  ┆ c   ┆ 0.12 ┆ false │
# └─────┴────────────┴────────────┴──────┴──────┴─────┴──────┴───────┘

Returns:



6913
6914
6915
# File 'lib/polars/expr.rb', line 6913

def shrink_dtype
  _from_rbexpr(_rbexpr.shrink_dtype)
end

#shuffle(seed: nil) ⇒ Expr

Shuffle the contents of this expr.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3]})
df.select(Polars.col("a").shuffle(seed: 1))
# =>
# shape: (3, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 2   │
# │ 1   │
# │ 3   │
# └─────┘

Parameters:

  • seed (Integer) (defaults to: nil)

    Seed for the random number generator. If set to None (default), a random seed is generated using the random module.

Returns:



6439
6440
6441
6442
6443
6444
# File 'lib/polars/expr.rb', line 6439

def shuffle(seed: nil)
  if seed.nil?
    seed = rand(10000)
  end
  _from_rbexpr(_rbexpr.shuffle(seed))
end

#signExpr

Compute the element-wise indication of the sign.

Examples:

df = Polars::DataFrame.new({"a" => [-9.0, -0.0, 0.0, 4.0, nil]})
df.select(Polars.col("a").sign)
# =>
# shape: (5, 1)
# ┌──────┐
# │ a    │
# │ ---  │
# │ f64  │
# ╞══════╡
# │ -1.0 │
# │ -0.0 │
# │ 0.0  │
# │ 1.0  │
# │ null │
# └──────┘

Returns:



6127
6128
6129
# File 'lib/polars/expr.rb', line 6127

def sign
  _from_rbexpr(_rbexpr.sign)
end

#sinExpr

Compute the element-wise value for the sine.

Examples:

df = Polars::DataFrame.new({"a" => [0.0]})
df.select(Polars.col("a").sin)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 0.0 │
# └─────┘

Returns:



6147
6148
6149
# File 'lib/polars/expr.rb', line 6147

def sin
  _from_rbexpr(_rbexpr.sin)
end

#sinhExpr

Compute the element-wise value for the hyperbolic sine.

Examples:

df = Polars::DataFrame.new({"a" => [1.0]})
df.select(Polars.col("a").sinh)
# =>
# shape: (1, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 1.175201 │
# └──────────┘

Returns:



6267
6268
6269
# File 'lib/polars/expr.rb', line 6267

def sinh
  _from_rbexpr(_rbexpr.sinh)
end

#skew(bias: true) ⇒ Expr

Compute the sample skewness of a data set.

For normally distributed data, the skewness should be about zero. For unimodal continuous distributions, a skewness value greater than zero means that there is more weight in the right tail of the distribution. The function skewtest can be used to determine if the skewness value is close enough to zero, statistically speaking.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3, 2, 1]})
df.select(Polars.col("a").skew)
# =>
# shape: (1, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 0.343622 │
# └──────────┘

Parameters:

  • bias (Boolean) (defaults to: true)

    If false, the calculations are corrected for statistical bias.

Returns:



5926
5927
5928
# File 'lib/polars/expr.rb', line 5926

def skew(bias: true)
  _from_rbexpr(_rbexpr.skew(bias))
end

#slice(offset, length = nil) ⇒ Expr

Get a slice of this expression.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [8, 9, 10, 11],
    "b" => [nil, 4, 4, 4]
  }
)
df.select(Polars.all.slice(1, 2))
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 9   ┆ 4   │
# │ 10  ┆ 4   │
# └─────┴─────┘

Parameters:

  • offset (Integer)

    Start index. Negative indexing is supported.

  • length (Integer) (defaults to: nil)

    Length of the slice. If set to nil, all rows starting at the offset will be selected.

Returns:



814
815
816
817
818
819
820
821
822
# File 'lib/polars/expr.rb', line 814

def slice(offset, length = nil)
  if !offset.is_a?(Expr)
    offset = Polars.lit(offset)
  end
  if !length.is_a?(Expr)
    length = Polars.lit(length)
  end
  _from_rbexpr(_rbexpr.slice(offset._rbexpr, length._rbexpr))
end

#sort(reverse: false, nulls_last: false) ⇒ Expr

Sort this column. In projection/ selection context the whole column is sorted.

If used in a group by context, the groups are sorted.

Examples:

df = Polars::DataFrame.new(
  {
    "group" => [
      "one",
      "one",
      "one",
      "two",
      "two",
      "two"
    ],
    "value" => [1, 98, 2, 3, 99, 4]
  }
)
df.select(Polars.col("value").sort)
# =>
# shape: (6, 1)
# ┌───────┐
# │ value │
# │ ---   │
# │ i64   │
# ╞═══════╡
# │ 1     │
# │ 2     │
# │ 3     │
# │ 4     │
# │ 98    │
# │ 99    │
# └───────┘
df.select(Polars.col("value").sort)
# =>
# shape: (6, 1)
# ┌───────┐
# │ value │
# │ ---   │
# │ i64   │
# ╞═══════╡
# │ 1     │
# │ 2     │
# │ 3     │
# │ 4     │
# │ 98    │
# │ 99    │
# └───────┘
df.group_by("group").agg(Polars.col("value").sort)
# =>
# shape: (2, 2)
# ┌───────┬────────────┐
# │ group ┆ value      │
# │ ---   ┆ ---        │
# │ str   ┆ list[i64]  │
# ╞═══════╪════════════╡
# │ two   ┆ [3, 4, 99] │
# │ one   ┆ [1, 2, 98] │
# └───────┴────────────┘

Parameters:

  • reverse (Boolean) (defaults to: false)

    false -> order from small to large. true -> order from large to small.

  • nulls_last (Boolean) (defaults to: false)

    If true nulls are considered to be larger than any valid value.

Returns:



1350
1351
1352
# File 'lib/polars/expr.rb', line 1350

def sort(reverse: false, nulls_last: false)
  _from_rbexpr(_rbexpr.sort_with(reverse, nulls_last))
end

#sort_by(by, *more_by, reverse: false, nulls_last: false, multithreaded: true, maintain_order: false) ⇒ Expr

Sort this column by the ordering of another column, or multiple other columns.

In projection/ selection context the whole column is sorted. If used in a group by context, the groups are sorted.

Examples:

df = Polars::DataFrame.new(
  {
    "group" => [
      "one",
      "one",
      "one",
      "two",
      "two",
      "two"
    ],
    "value" => [1, 98, 2, 3, 99, 4]
  }
)
df.select(Polars.col("group").sort_by("value"))
# =>
# shape: (6, 1)
# ┌───────┐
# │ group │
# │ ---   │
# │ str   │
# ╞═══════╡
# │ one   │
# │ one   │
# │ two   │
# │ two   │
# │ one   │
# │ two   │
# └───────┘

Parameters:

  • by (Object)

    The column(s) used for sorting.

  • reverse (Boolean) (defaults to: false)

    false -> order from small to large. true -> order from large to small.

Returns:



1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
# File 'lib/polars/expr.rb', line 1587

def sort_by(by, *more_by, reverse: false, nulls_last: false, multithreaded: true, maintain_order: false)
  by = Utils.parse_into_list_of_expressions(by, *more_by)
  reverse = Utils.extend_bool(reverse, by.length, "reverse", "by")
  nulls_last = Utils.extend_bool(nulls_last, by.length, "nulls_last", "by")
  _from_rbexpr(
    _rbexpr.sort_by(
      by, reverse, nulls_last, multithreaded, maintain_order
    )
  )
end

#sqrtExpr

Compute the square root of the elements.

Examples:

df = Polars::DataFrame.new({"values" => [1.0, 2.0, 4.0]})
df.select(Polars.col("values").sqrt)
# =>
# shape: (3, 1)
# ┌──────────┐
# │ values   │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 1.0      │
# │ 1.414214 │
# │ 2.0      │
# └──────────┘

Returns:



245
246
247
# File 'lib/polars/expr.rb', line 245

def sqrt
  self**0.5
end

#std(ddof: 1) ⇒ Expr

Get standard deviation.

Examples:

df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
df.select(Polars.col("a").std)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 1.0 │
# └─────┘

Parameters:

  • ddof (Integer) (defaults to: 1)

    Degrees of freedom.

Returns:



1958
1959
1960
# File 'lib/polars/expr.rb', line 1958

def std(ddof: 1)
  _from_rbexpr(_rbexpr.std(ddof))
end

#strStringExpr

Create an object namespace of all string related methods.

Returns:



7282
7283
7284
# File 'lib/polars/expr.rb', line 7282

def str
  StringExpr.new(self)
end

#structStructExpr

Create an object namespace of all struct related methods.

Returns:



7289
7290
7291
# File 'lib/polars/expr.rb', line 7289

def struct
  StructExpr.new(self)
end

#sub(other) ⇒ Expr

Method equivalent of subtraction operator expr - other.

Examples:

df = Polars::DataFrame.new({"x" => [0, 1, 2, 3, 4]})
df.with_columns(
  Polars.col("x").sub(2).alias("x-2"),
  Polars.col("x").sub(Polars.col("x").cum_sum).alias("x-expr"),
)
# =>
# shape: (5, 3)
# ┌─────┬─────┬────────┐
# │ x   ┆ x-2 ┆ x-expr │
# │ --- ┆ --- ┆ ---    │
# │ i64 ┆ i64 ┆ i64    │
# ╞═════╪═════╪════════╡
# │ 0   ┆ -2  ┆ 0      │
# │ 1   ┆ -1  ┆ 0      │
# │ 2   ┆ 0   ┆ -1     │
# │ 3   ┆ 1   ┆ -3     │
# │ 4   ┆ 2   ┆ -6     │
# └─────┴─────┴────────┘

Parameters:

  • other (Object)

    Numeric literal or expression value.

Returns:



3598
3599
3600
# File 'lib/polars/expr.rb', line 3598

def sub(other)
  self - other
end

#suffix(suffix) ⇒ Expr

Add a suffix to the root column name of the expression.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 3],
    "b" => ["x", "y", "z"]
  }
)
df.with_columns(Polars.all.reverse.name.suffix("_reverse"))
# =>
# shape: (3, 4)
# ┌─────┬─────┬───────────┬───────────┐
# │ a   ┆ b   ┆ a_reverse ┆ b_reverse │
# │ --- ┆ --- ┆ ---       ┆ ---       │
# │ i64 ┆ str ┆ i64       ┆ str       │
# ╞═════╪═════╪═══════════╪═══════════╡
# │ 1   ┆ x   ┆ 3         ┆ z         │
# │ 2   ┆ y   ┆ 2         ┆ y         │
# │ 3   ┆ z   ┆ 1         ┆ x         │
# └─────┴─────┴───────────┴───────────┘

Returns:



461
462
463
# File 'lib/polars/expr.rb', line 461

def suffix(suffix)
  name.suffix(suffix)
end

#sumExpr

Note:

Dtypes in :i8, :u8, :i16, and :u16 are cast to :i64 before summing to prevent overflow issues.

Get sum value.

Examples:

df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
df.select(Polars.col("a").sum)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 0   │
# └─────┘

Returns:



2085
2086
2087
# File 'lib/polars/expr.rb', line 2085

def sum
  _from_rbexpr(_rbexpr.sum)
end

#tail(n = 10) ⇒ Expr

Get the last n rows.

Examples:

df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7]})
df.tail(3)
# =>
# shape: (3, 1)
# ┌─────┐
# │ foo │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 5   │
# │ 6   │
# │ 7   │
# └─────┘

Parameters:

  • n (Integer) (defaults to: 10)

    Number of rows to return.

Returns:



3132
3133
3134
# File 'lib/polars/expr.rb', line 3132

def tail(n = 10)
  _from_rbexpr(_rbexpr.tail(n))
end

#tanExpr

Compute the element-wise value for the tangent.

Examples:

df = Polars::DataFrame.new({"a" => [1.0]})
df.select(Polars.col("a").tan)
# =>
# shape: (1, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 1.557408 │
# └──────────┘

Returns:



6187
6188
6189
# File 'lib/polars/expr.rb', line 6187

def tan
  _from_rbexpr(_rbexpr.tan)
end

#tanhExpr

Compute the element-wise value for the hyperbolic tangent.

Examples:

df = Polars::DataFrame.new({"a" => [1.0]})
df.select(Polars.col("a").tanh)
# =>
# shape: (1, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 0.761594 │
# └──────────┘

Returns:



6307
6308
6309
# File 'lib/polars/expr.rb', line 6307

def tanh
  _from_rbexpr(_rbexpr.tanh)
end

#to_physicalExpr

Cast to physical representation of the logical dtype.

  • :date -> :i32
  • :datetime -> :i64
  • :time -> :i64
  • :duration -> :i64
  • :cat -> :u32
  • Other data types will be left unchanged.

Examples:

Polars::DataFrame.new({"vals" => ["a", "x", nil, "a"]}).with_columns(
  [
    Polars.col("vals").cast(:cat),
    Polars.col("vals")
      .cast(:cat)
      .to_physical
      .alias("vals_physical")
  ]
)
# =>
# shape: (4, 2)
# ┌──────┬───────────────┐
# │ vals ┆ vals_physical │
# │ ---  ┆ ---           │
# │ cat  ┆ u32           │
# ╞══════╪═══════════════╡
# │ a    ┆ 0             │
# │ x    ┆ 1             │
# │ null ┆ null          │
# │ a    ┆ 0             │
# └──────┴───────────────┘

Returns:



178
179
180
# File 'lib/polars/expr.rb', line 178

def to_physical
  _from_rbexpr(_rbexpr.to_physical)
end

#to_sString Also known as: inspect

Returns a string representing the Expr.

Returns:



20
21
22
# File 'lib/polars/expr.rb', line 20

def to_s
  _rbexpr.to_str
end

#top_k(k: 5) ⇒ Expr

Return the k largest elements.

If 'reverse: true` the smallest elements will be given.

Examples:

df = Polars::DataFrame.new(
  {
    "value" => [1, 98, 2, 3, 99, 4]
  }
)
df.select(
  [
    Polars.col("value").top_k.alias("top_k"),
    Polars.col("value").bottom_k.alias("bottom_k")
  ]
)
# =>
# shape: (5, 2)
# ┌───────┬──────────┐
# │ top_k ┆ bottom_k │
# │ ---   ┆ ---      │
# │ i64   ┆ i64      │
# ╞═══════╪══════════╡
# │ 99    ┆ 1        │
# │ 98    ┆ 2        │
# │ 4     ┆ 3        │
# │ 3     ┆ 4        │
# │ 2     ┆ 98       │
# └───────┴──────────┘

Parameters:

  • k (Integer) (defaults to: 5)

    Number of elements to return.

Returns:



1388
1389
1390
1391
# File 'lib/polars/expr.rb', line 1388

def top_k(k: 5)
  k = Utils.parse_into_expression(k)
  _from_rbexpr(_rbexpr.top_k(k))
end

#truediv(other) ⇒ Expr

Method equivalent of float division operator expr / other.

Examples:

df = Polars::DataFrame.new(
  {"x" => [-2, -1, 0, 1, 2], "y" => [0.5, 0.0, 0.0, -4.0, -0.5]}
)
df.with_columns(
  Polars.col("x").truediv(2).alias("x/2"),
  Polars.col("x").truediv(Polars.col("y")).alias("x/y")
)
# =>
# shape: (5, 4)
# ┌─────┬──────┬──────┬───────┐
# │ x   ┆ y    ┆ x/2  ┆ x/y   │
# │ --- ┆ ---  ┆ ---  ┆ ---   │
# │ i64 ┆ f64  ┆ f64  ┆ f64   │
# ╞═════╪══════╪══════╪═══════╡
# │ -2  ┆ 0.5  ┆ -1.0 ┆ -4.0  │
# │ -1  ┆ 0.0  ┆ -0.5 ┆ -inf  │
# │ 0   ┆ 0.0  ┆ 0.0  ┆ NaN   │
# │ 1   ┆ -4.0 ┆ 0.5  ┆ -0.25 │
# │ 2   ┆ -0.5 ┆ 1.0  ┆ -4.0  │
# └─────┴──────┴──────┴───────┘

Parameters:

  • other (Object)

    Numeric literal or expression value.

Returns:



3653
3654
3655
# File 'lib/polars/expr.rb', line 3653

def truediv(other)
  self / other
end

#unique(maintain_order: false) ⇒ Expr

Get unique values of this expression.

Examples:

df = Polars::DataFrame.new({"a" => [1, 1, 2]})
df.select(Polars.col("a").unique(maintain_order: true))
# =>
# shape: (2, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 2   │
# └─────┘

Parameters:

  • maintain_order (Boolean) (defaults to: false)

    Maintain order of data. This requires more work.

Returns:



2277
2278
2279
2280
2281
2282
2283
# File 'lib/polars/expr.rb', line 2277

def unique(maintain_order: false)
  if maintain_order
    _from_rbexpr(_rbexpr.unique_stable)
  else
    _from_rbexpr(_rbexpr.unique)
  end
end

#unique_countsExpr

Return a count of the unique values in the order of appearance.

This method differs from value_counts in that it does not return the values, only the counts and might be faster

Examples:

df = Polars::DataFrame.new(
  {
    "id" => ["a", "b", "b", "c", "c", "c"]
  }
)
df.select(
  [
    Polars.col("id").unique_counts
  ]
)
# =>
# shape: (3, 1)
# ┌─────┐
# │ id  │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 1   │
# │ 2   │
# │ 3   │
# └─────┘

Returns:



6711
6712
6713
# File 'lib/polars/expr.rb', line 6711

def unique_counts
  _from_rbexpr(_rbexpr.unique_counts)
end

#upper_boundExpr

Calculate the upper bound.

Returns a unit Series with the highest value possible for the dtype of this expression.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3, 2, 1]})
df.select(Polars.col("a").upper_bound)
# =>
# shape: (1, 1)
# ┌─────────────────────┐
# │ a                   │
# │ ---                 │
# │ i64                 │
# ╞═════════════════════╡
# │ 9223372036854775807 │
# └─────────────────────┘

Returns:



6103
6104
6105
# File 'lib/polars/expr.rb', line 6103

def upper_bound
  _from_rbexpr(_rbexpr.upper_bound)
end

#value_counts(sort: false, parallel: false, name: nil, normalize: false) ⇒ Expr

Count all unique values and create a struct mapping value to count.

Examples:

df = Polars::DataFrame.new(
  {
    "id" => ["a", "b", "b", "c", "c", "c"]
  }
)
df.select(
  [
    Polars.col("id").value_counts(sort: true),
  ]
)
# =>
# shape: (3, 1)
# ┌───────────┐
# │ id        │
# │ ---       │
# │ struct[2] │
# ╞═══════════╡
# │ {"c",3}   │
# │ {"b",2}   │
# │ {"a",1}   │
# └───────────┘

Parameters:

  • sort (Boolean) (defaults to: false)

    Sort the output by count in descending order. If set to false (default), the order of the output is random.

  • parallel (Boolean) (defaults to: false)

    Execute the computation in parallel.

  • name (String) (defaults to: nil)

    Give the resulting count column a specific name; if normalize is true defaults to "count", otherwise defaults to "proportion".

  • normalize (Boolean) (defaults to: false)

    If true gives relative frequencies of the unique values

Returns:



6664
6665
6666
6667
6668
6669
6670
6671
6672
6673
6674
6675
6676
6677
6678
6679
6680
# File 'lib/polars/expr.rb', line 6664

def value_counts(
  sort: false,
  parallel: false,
  name: nil,
  normalize: false
)
  if name.nil?
    if normalize
      name = "proportion"
    else
      name = "count"
    end
  end
  _from_rbexpr(
    _rbexpr.value_counts(sort, parallel, name, normalize)
  )
end

#var(ddof: 1) ⇒ Expr

Get variance.

Examples:

df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
df.select(Polars.col("a").var)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 1.0 │
# └─────┘

Parameters:

  • ddof (Integer) (defaults to: 1)

    Degrees of freedom.

Returns:



1981
1982
1983
# File 'lib/polars/expr.rb', line 1981

def var(ddof: 1)
  _from_rbexpr(_rbexpr.var(ddof))
end

#where(predicate) ⇒ Expr

Filter a single column.

Alias for #filter.

Examples:

df = Polars::DataFrame.new(
  {
    "group_col" => ["g1", "g1", "g2"],
    "b" => [1, 2, 3]
  }
)
(
  df.group_by("group_col").agg(
    [
      Polars.col("b").where(Polars.col("b") < 2).sum.alias("lt"),
      Polars.col("b").where(Polars.col("b") >= 2).sum.alias("gte")
    ]
  )
).sort("group_col")
# =>
# shape: (2, 3)
# ┌───────────┬─────┬─────┐
# │ group_col ┆ lt  ┆ gte │
# │ ---       ┆ --- ┆ --- │
# │ str       ┆ i64 ┆ i64 │
# ╞═══════════╪═════╪═════╡
# │ g1        ┆ 1   ┆ 2   │
# │ g2        ┆ 0   ┆ 3   │
# └───────────┴─────┴─────┘

Parameters:

  • predicate (Expr)

    Boolean expression.

Returns:



2857
2858
2859
# File 'lib/polars/expr.rb', line 2857

def where(predicate)
  filter(predicate)
end

#xor(other) ⇒ Expr

Method equivalent of bitwise exclusive-or operator expr ^ other.

Examples:

df = Polars::DataFrame.new(
  {"x" => [true, false, true, false], "y" => [true, true, false, false]}
)
df.with_columns(Polars.col("x").xor(Polars.col("y")).alias("x ^ y"))
# =>
# shape: (4, 3)
# ┌───────┬───────┬───────┐
# │ x     ┆ y     ┆ x ^ y │
# │ ---   ┆ ---   ┆ ---   │
# │ bool  ┆ bool  ┆ bool  │
# ╞═══════╪═══════╪═══════╡
# │ true  ┆ true  ┆ false │
# │ false ┆ true  ┆ true  │
# │ true  ┆ false ┆ true  │
# │ false ┆ false ┆ false │
# └───────┴───────┴───────┘

Parameters:

  • other (Object)

    Integer or boolean value; accepts expression input.

Returns:



3707
3708
3709
# File 'lib/polars/expr.rb', line 3707

def xor(other)
  self ^ other
end

#|(other) ⇒ Expr

Bitwise OR.

Returns:



42
43
44
# File 'lib/polars/expr.rb', line 42

def |(other)
  _from_rbexpr(_rbexpr._or(_to_rbexpr(other)))
end