Module: Polars::Functions

Included in:
Polars
Defined in:
lib/polars/string_cache.rb,
lib/polars/functions/col.rb,
lib/polars/functions/len.rb,
lib/polars/functions/lit.rb,
lib/polars/functions/lazy.rb,
lib/polars/functions/eager.rb,
lib/polars/functions/random.rb,
lib/polars/functions/repeat.rb,
lib/polars/functions/business.rb,
lib/polars/functions/datatype.rb,
lib/polars/functions/whenthen.rb,
lib/polars/functions/as_datatype.rb,
lib/polars/functions/escape_regex.rb,
lib/polars/functions/range/int_range.rb,
lib/polars/functions/range/date_range.rb,
lib/polars/functions/range/time_range.rb,
lib/polars/functions/range/linear_space.rb,
lib/polars/functions/aggregation/vertical.rb,
lib/polars/functions/range/datetime_range.rb,
lib/polars/functions/aggregation/horizontal.rb

Instance Method Summary collapse

Instance Method Details

#align_frames(*frames, on:, how: nil, select: nil, descending: false) ⇒ Object

Align an array of frames using the unique values from one or more columns as a key.

Frames that do not contain the given key values have rows injected (with nulls filling the non-key columns), and each resulting frame is sorted by the key.

The original column order of input frames is not changed unless select is specified (in which case the final column order is determined from that).

Note that this does not result in a joined frame - you receive the same number of frames back that you passed in, but each is now aligned by key and has the same number of rows.

Examples:

df1 = Polars::DataFrame.new(
  {
    "dt" => [Date.new(2022, 9, 1), Date.new(2022, 9, 2), Date.new(2022, 9, 3)],
    "x" => [3.5, 4.0, 1.0],
    "y" => [10.0, 2.5, 1.5]
  }
)
df2 = Polars::DataFrame.new(
  {
    "dt" => [Date.new(2022, 9, 2), Date.new(2022, 9, 3), Date.new(2022, 9, 1)],
    "x" => [8.0, 1.0, 3.5],
    "y" => [1.5, 12.0, 5.0]
  }
)
df3 = Polars::DataFrame.new(
  {
    "dt" => [Date.new(2022, 9, 3), Date.new(2022, 9, 2)],
    "x" => [2.0, 5.0],
    "y" => [2.5, 2.0]
  }
)
af1, af2, af3 = Polars.align_frames(
  df1, df2, df3, on: "dt", how: "left", select: ["x", "y"]
)
(af1 * af2 * af3).fill_null(0).select(Polars.sum_horizontal("*").alias("dot"))
# =>
# shape: (3, 1)
# ┌───────┐
# │ dot   │
# │ ---   │
# │ f64   │
# ╞═══════╡
# │ 0.0   │
# │ 167.5 │
# │ 47.0  │
# └───────┘

Parameters:

  • frames (Array)

    Array of DataFrames or LazyFrames.

  • on (Object)

    One or more columns whose unique values will be used to align the frames.

  • select (Object) (defaults to: nil)

    Optional post-alignment column select to constrain and/or order the columns returned from the newly aligned frames.

  • descending (Object) (defaults to: false)

    Sort the alignment column values in descending order; can be a single boolean or a list of booleans associated with each column in on.

Returns:



464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
# File 'lib/polars/functions/eager.rb', line 464

def align_frames(
  *frames,
  on:,
  how: nil,
  select: nil,
  descending: false
)
  # TODO update
  if how.nil?
    warn "The default `how` for `align_frames` method will change from `left` to `full` in a future version"
    how = "left"
  end

  if frames.empty?
    return []
  elsif frames.map(&:class).uniq.length != 1
    raise TypeError, "Input frames must be of a consistent type (all LazyFrame or all DataFrame)"
  end

  # establish the superset of all "on" column values, sort, and cache
  eager = frames[0].is_a?(DataFrame)
  alignment_frame = (
    concat(frames.map { |df| df.lazy.select(on) })
      .unique(maintain_order: false)
      .sort(on, descending: descending)
  )
  alignment_frame = (
    eager ? alignment_frame.collect.lazy : alignment_frame.cache
  )
  # finally, align all frames
  aligned_frames =
    frames.map do |df|
      alignment_frame.join(
        df.lazy,
        on: alignment_frame.columns,
        how: how
      ).select(df.columns)
    end
  if !select.nil?
    aligned_frames = aligned_frames.map { |df| df.select(select) }
  end

  eager ? aligned_frames.map(&:collect) : aligned_frames
end

#all(*names, ignore_nulls: true) ⇒ Expr

Either return an expression representing all columns, or evaluate a bitwise AND operation.

If no arguments are passed, this function is syntactic sugar for col("*"). Otherwise, this function is syntactic sugar for col(names).all.

Examples:

Selecting all columns.

df = Polars::DataFrame.new(
  {
    "a" => [true, false, true],
    "b" => [false, false, false]
  }
)
df.select(Polars.all.sum)
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ u32 ┆ u32 │
# ╞═════╪═════╡
# │ 2   ┆ 0   │
# └─────┴─────┘

Evaluate bitwise AND for a column.

df.select(Polars.all("a"))
# =>
# shape: (1, 1)
# ┌───────┐
# │ a     │
# │ ---   │
# │ bool  │
# ╞═══════╡
# │ false │
# └───────┘

Parameters:

  • names (Array)

    Name(s) of the columns to use in the aggregation.

  • ignore_nulls (Boolean) (defaults to: true)

    Ignore null values (default).

Returns:



44
45
46
47
48
49
50
# File 'lib/polars/functions/aggregation/vertical.rb', line 44

def all(*names, ignore_nulls: true)
  if names.empty?
    return col("*")
  end

  col(*names).all(ignore_nulls: ignore_nulls)
end

#all_horizontal(*exprs) ⇒ Expr

Compute the bitwise AND horizontally across columns.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [false, false, true, true, false, nil],
    "b" => [false, true, true, nil, nil, nil],
    "c" => ["u", "v", "w", "x", "y", "z"]
  }
)
df.with_columns(all: Polars.all_horizontal("a", "b"))
# =>
# shape: (6, 4)
# ┌───────┬───────┬─────┬───────┐
# │ a     ┆ b     ┆ c   ┆ all   │
# │ ---   ┆ ---   ┆ --- ┆ ---   │
# │ bool  ┆ bool  ┆ str ┆ bool  │
# ╞═══════╪═══════╪═════╪═══════╡
# │ false ┆ false ┆ u   ┆ false │
# │ false ┆ true  ┆ v   ┆ false │
# │ true  ┆ true  ┆ w   ┆ true  │
# │ true  ┆ null  ┆ x   ┆ null  │
# │ false ┆ null  ┆ y   ┆ false │
# │ null  ┆ null  ┆ z   ┆ null  │
# └───────┴───────┴─────┴───────┘

Parameters:

  • exprs (Array)

    Column(s) to use in the aggregation. Accepts expression input. Strings are parsed as column names, other non-expression inputs are parsed as literals.

Returns:



34
35
36
37
# File 'lib/polars/functions/aggregation/horizontal.rb', line 34

def all_horizontal(*exprs)
  rbexprs = Utils.parse_into_list_of_expressions(*exprs)
  Utils.wrap_expr(Plr.all_horizontal(rbexprs))
end

#any(*names, ignore_nulls: true) ⇒ Expr

Evaluate a bitwise OR operation.

Syntactic sugar for col(names).any.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [true, false, true],
    "b" => [false, false, false]
  }
)
df.select(Polars.any("a"))
# =>
# shape: (1, 1)
# ┌──────┐
# │ a    │
# │ ---  │
# │ bool │
# ╞══════╡
# │ true │
# └──────┘

Parameters:

  • names (Array)

    Name(s) of the columns to use in the aggregation.

  • ignore_nulls (Boolean) (defaults to: true)

    Ignore null values (default).

Returns:



80
81
82
# File 'lib/polars/functions/aggregation/vertical.rb', line 80

def any(*names, ignore_nulls: true)
  col(*names).any(ignore_nulls: ignore_nulls)
end

#any_horizontal(*exprs) ⇒ Expr

Compute the bitwise OR horizontally across columns.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [false, false, true, true, false, nil],
    "b" => [false, true, true, nil, nil, nil],
    "c" => ["u", "v", "w", "x", "y", "z"]
  }
)
df.with_columns(any: Polars.any_horizontal("a", "b"))
# =>
# shape: (6, 4)
# ┌───────┬───────┬─────┬───────┐
# │ a     ┆ b     ┆ c   ┆ any   │
# │ ---   ┆ ---   ┆ --- ┆ ---   │
# │ bool  ┆ bool  ┆ str ┆ bool  │
# ╞═══════╪═══════╪═════╪═══════╡
# │ false ┆ false ┆ u   ┆ false │
# │ false ┆ true  ┆ v   ┆ true  │
# │ true  ┆ true  ┆ w   ┆ true  │
# │ true  ┆ null  ┆ x   ┆ true  │
# │ false ┆ null  ┆ y   ┆ null  │
# │ null  ┆ null  ┆ z   ┆ null  │
# └───────┴───────┴─────┴───────┘

Parameters:

  • exprs (Array)

    Column(s) to use in the aggregation. Accepts expression input. Strings are parsed as column names, other non-expression inputs are parsed as literals.

Returns:



70
71
72
73
# File 'lib/polars/functions/aggregation/horizontal.rb', line 70

def any_horizontal(*exprs)
  rbexprs = Utils.parse_into_list_of_expressions(*exprs)
  Utils.wrap_expr(Plr.any_horizontal(rbexprs))
end

#approx_n_unique(*columns) ⇒ Expr

Approximate count of unique values.

This function is syntactic sugar for col(columns).approx_n_unique, and uses the HyperLogLog++ algorithm for cardinality estimation.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 1],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.approx_n_unique("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 2   │
# └─────┘
df.select(Polars.approx_n_unique("b", "c"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ b   ┆ c   │
# │ --- ┆ --- │
# │ u32 ┆ u32 │
# ╞═════╪═════╡
# │ 3   ┆ 2   │
# └─────┴─────┘

Parameters:

  • columns (Array)

    One or more column names.

Returns:



429
430
431
# File 'lib/polars/functions/lazy.rb', line 429

def approx_n_unique(*columns)
  col(*columns).approx_n_unique
end

#arctan2(y, x) ⇒ Expr

Compute two argument arctan in radians.

Returns the angle (in radians) in the plane between the positive x-axis and the ray from the origin to (x,y).

Examples:

c = Math.sqrt(2) / 2
df = Polars::DataFrame.new(
  {
    "y" => [c, -c, c, -c],
    "x" => [c, c, -c, -c]
  }
)
df.with_columns(Polars.arctan2("y", "x").alias("atan2"))
# =>
# shape: (4, 3)
# ┌───────────┬───────────┬───────────┐
# │ y         ┆ x         ┆ atan2     │
# │ ---       ┆ ---       ┆ ---       │
# │ f64       ┆ f64       ┆ f64       │
# ╞═══════════╪═══════════╪═══════════╡
# │ 0.707107  ┆ 0.707107  ┆ 0.785398  │
# │ -0.707107 ┆ 0.707107  ┆ -0.785398 │
# │ 0.707107  ┆ -0.707107 ┆ 2.356194  │
# │ -0.707107 ┆ -0.707107 ┆ -2.356194 │
# └───────────┴───────────┴───────────┘

Parameters:

  • y (Object)

    Column name or Expression.

  • x (Object)

    Column name or Expression.

Returns:



1340
1341
1342
1343
1344
1345
1346
1347
1348
# File 'lib/polars/functions/lazy.rb', line 1340

def arctan2(y, x)
  if Utils.strlike?(y)
    y = col(y)
  end
  if Utils.strlike?(x)
    x = col(x)
  end
  Utils.wrap_expr(Plr.arctan2(y._rbexpr, x._rbexpr))
end

#arg_sort_by(exprs, *more_exprs, descending: false, nulls_last: false, multithreaded: true, maintain_order: false) ⇒ Expr

Find the indexes that would sort the columns.

Argsort by multiple columns. The first column will be used for the ordering. If there are duplicates in the first column, the second column will be used to determine the ordering and so on.

Examples:

Pass a single column name to compute the arg sort by that column.

df = Polars::DataFrame.new(
  {
    "a" => [0, 1, 1, 0],
    "b" => [3, 2, 3, 2],
    "c" => [1, 2, 3, 4]
  }
)
df.select(Polars.arg_sort_by("a"))
# =>
# shape: (4, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 0   │
# │ 3   │
# │ 1   │
# │ 2   │
# └─────┘

Compute the arg sort by multiple columns by either passing a list of columns, or by specifying each column as a positional argument.

df.select(Polars.arg_sort_by(["a", "b"], descending: true))
# =>
# shape: (4, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 2   │
# │ 1   │
# │ 0   │
# │ 3   │
# └─────┘

Use gather to apply the arg sort to other columns.

df.select(Polars.col("c").gather(Polars.arg_sort_by("a")))
# =>
# shape: (4, 1)
# ┌─────┐
# │ c   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 4   │
# │ 2   │
# │ 3   │
# └─────┘

Parameters:

  • exprs (Object)

    Columns use to determine the ordering.

  • more_exprs (Array)

    Additional columns to arg sort by, specified as positional arguments.

  • descending (Boolean) (defaults to: false)

    Default is ascending.

  • nulls_last (Boolean) (defaults to: false)

    Place null values last.

  • multithreaded (Boolean) (defaults to: true)

    Sort using multiple threads.

  • maintain_order (Boolean) (defaults to: false)

    Whether the order should be maintained if elements are equal.

Returns:



1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
# File 'lib/polars/functions/lazy.rb', line 1505

def arg_sort_by(
  exprs,
  *more_exprs,
  descending: false,
  nulls_last: false,
  multithreaded: true,
  maintain_order: false
)
  exprs = Utils.parse_into_list_of_expressions(exprs, *more_exprs)
  descending = Utils.extend_bool(descending, exprs.length, "descending", "exprs")
  nulls_last = Utils.extend_bool(nulls_last, exprs.length, "nulls_last", "exprs")
  Utils.wrap_expr(Plr.arg_sort_by(exprs, descending, nulls_last, multithreaded, maintain_order))
end

#arg_where(condition, eager: false) ⇒ Expr, Series

Return indices where condition evaluates true.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3, 4, 5]})
df.select(
  [
    Polars.arg_where(Polars.col("a") % 2 == 0)
  ]
).to_series
# =>
# shape: (2,)
# Series: 'a' [u32]
# [
#         1
#         3
# ]

Parameters:

  • condition (Expr)

    Boolean expression to evaluate

  • eager (Boolean) (defaults to: false)

    Whether to apply this function eagerly (as opposed to lazily).

Returns:



1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
# File 'lib/polars/functions/lazy.rb', line 1631

def arg_where(condition, eager: false)
  if eager
    if !condition.is_a?(Series)
      raise ArgumentError, "expected 'Series' in 'arg_where' if 'eager: true', got #{condition.class.name}"
    end
    condition.to_frame.select(arg_where(Polars.col(condition.name))).to_series
  else
    condition = Utils.parse_into_expression(condition, str_as_lit: true)
    Utils.wrap_expr(Plr.arg_where(condition))
  end
end

#business_day_count(start, stop, week_mask: [true, true, true, true, true, false, false], holidays: []) ⇒ Expr

Note:

This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.

Count the number of business days between start and end (not including end).

Examples:

df = Polars::DataFrame.new(
  {
    "start" => [Date.new(2020, 1, 1), Date.new(2020, 1, 2)],
    "end" => [Date.new(2020, 1, 2), Date.new(2020, 1, 10)]
  }
)
df.with_columns(
  business_day_count: Polars.business_day_count("start", "end")
)
# =>
# shape: (2, 3)
# ┌────────────┬────────────┬────────────────────┐
# │ start      ┆ end        ┆ business_day_count │
# │ ---        ┆ ---        ┆ ---                │
# │ date       ┆ date       ┆ i32                │
# ╞════════════╪════════════╪════════════════════╡
# │ 2020-01-01 ┆ 2020-01-02 ┆ 1                  │
# │ 2020-01-02 ┆ 2020-01-10 ┆ 6                  │
# └────────────┴────────────┴────────────────────┘

You can pass a custom weekend - for example, if you only take Sunday off:

week_mask = [true, true, true, true, true, true, false]
df.with_columns(
  business_day_count: Polars.business_day_count(
    "start", "end", week_mask: week_mask
  )
)
# =>
# shape: (2, 3)
# ┌────────────┬────────────┬────────────────────┐
# │ start      ┆ end        ┆ business_day_count │
# │ ---        ┆ ---        ┆ ---                │
# │ date       ┆ date       ┆ i32                │
# ╞════════════╪════════════╪════════════════════╡
# │ 2020-01-01 ┆ 2020-01-02 ┆ 1                  │
# │ 2020-01-02 ┆ 2020-01-10 ┆ 7                  │
# └────────────┴────────────┴────────────────────┘

You can also pass a list of holidays to exclude from the count:

holidays = [Date.new(2020, 1, 1), Date.new(2020, 1, 2)]
df.with_columns(
  business_day_count: Polars.business_day_count("start", "end", holidays: holidays)
)
# =>
# shape: (2, 3)
# ┌────────────┬────────────┬────────────────────┐
# │ start      ┆ end        ┆ business_day_count │
# │ ---        ┆ ---        ┆ ---                │
# │ date       ┆ date       ┆ i32                │
# ╞════════════╪════════════╪════════════════════╡
# │ 2020-01-01 ┆ 2020-01-02 ┆ 0                  │
# │ 2020-01-02 ┆ 2020-01-10 ┆ 5                  │
# └────────────┴────────────┴────────────────────┘

Parameters:

  • start (Object)

    Start dates.

  • stop (Object)

    End dates.

  • week_mask (Array) (defaults to: [true, true, true, true, true, false, false])

    Which days of the week to count. The default is Monday to Friday. If you wanted to count only Monday to Thursday, you would pass [true, true, true, true, false, false, false].

  • holidays (Array) (defaults to: [])

    Holidays to exclude from the count.

Returns:



76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# File 'lib/polars/functions/business.rb', line 76

def business_day_count(
  start,
  stop,
  week_mask: [true, true, true, true, true, false, false],
  holidays: []
)
  start_rbexpr = Utils.parse_into_expression(start)
  end_rbexpr = Utils.parse_into_expression(stop)
  unix_epoch = ::Date.new(1970, 1, 1)
  Utils.wrap_expr(
    Plr.business_day_count(
      start_rbexpr,
      end_rbexpr,
      week_mask,
      holidays.map { |holiday| holiday - unix_epoch }
    )
  )
end

#coalesce(exprs, *more_exprs, eager: false) ⇒ Expr

Folds the columns from left to right, keeping the first non-null value.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, nil, nil, nil],
    "b" => [1, 2, nil, nil],
    "c" => [5, nil, 3, nil]
  }
)
df.with_columns(Polars.coalesce(["a", "b", "c", 10]).alias("d"))
# =>
# shape: (4, 4)
# ┌──────┬──────┬──────┬─────┐
# │ a    ┆ b    ┆ c    ┆ d   │
# │ ---  ┆ ---  ┆ ---  ┆ --- │
# │ i64  ┆ i64  ┆ i64  ┆ i64 │
# ╞══════╪══════╪══════╪═════╡
# │ 1    ┆ 1    ┆ 5    ┆ 1   │
# │ null ┆ 2    ┆ null ┆ 2   │
# │ null ┆ null ┆ 3    ┆ 3   │
# │ null ┆ null ┆ null ┆ 10  │
# └──────┴──────┴──────┴─────┘
df.with_columns(Polars.coalesce(Polars.col(["a", "b", "c"]), 10.0).alias("d"))
# =>
# shape: (4, 4)
# ┌──────┬──────┬──────┬──────┐
# │ a    ┆ b    ┆ c    ┆ d    │
# │ ---  ┆ ---  ┆ ---  ┆ ---  │
# │ i64  ┆ i64  ┆ i64  ┆ f64  │
# ╞══════╪══════╪══════╪══════╡
# │ 1    ┆ 1    ┆ 5    ┆ 1.0  │
# │ null ┆ 2    ┆ null ┆ 2.0  │
# │ null ┆ null ┆ 3    ┆ 3.0  │
# │ null ┆ null ┆ null ┆ 10.0 │
# └──────┴──────┴──────┴──────┘
s1 = Polars::Series.new("a", [nil, 2, nil])
s2 = Polars::Series.new("b", [1, nil, 3])
Polars.coalesce(s1, s2, eager: true)
# =>
# shape: (3,)
# Series: 'a' [i64]
# [
#         1
#         2
#         3
# ]

Parameters:

  • exprs (Array)

    Columns to coalesce. Accepts expression input. Strings are parsed as column names, other non-expression inputs are parsed as literals.

  • more_exprs (Hash)

    Additional columns to coalesce, specified as positional arguments.

  • eager (Boolean) (defaults to: false)

    Evaluate immediately and return a Series; this requires that at least one of the given arguments is a Series. If set to false (default), return an expression instead.

Returns:



1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
# File 'lib/polars/functions/lazy.rb', line 1706

def coalesce(exprs, *more_exprs, eager: false)
  if eager
    exprs = [exprs] + more_exprs
    series = exprs.filter_map { |e| e if e.is_a?(Series) }
    if !series.any?
      msg = "expected at least one Series in 'coalesce' if 'eager: true'"
      raise ArgumentError, msg
    end

    exprs = exprs.map { |e| e.is_a?(Series) ? e.name : e }
    Polars::DataFrame.new(series).select(coalesce(exprs, eager: false)).to_series
  else
    exprs = Utils.parse_into_list_of_expressions(exprs, *more_exprs)
    Utils.wrap_expr(Plr.coalesce(exprs))
  end
end

#col(name, *more_names) ⇒ Expr

Return an expression representing a column in a DataFrame.

Returns:



6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# File 'lib/polars/functions/col.rb', line 6

def col(name, *more_names)
  if more_names.any?
    if Utils.strlike?(name)
      names_str = [name]
      names_str.concat(more_names)
      return Selector._by_name(names_str.map(&:to_s), strict: true, expand_patterns: true).as_expr
    elsif Utils.is_polars_dtype(name)
      dtypes = [name]
      dtypes.concat(more_names)
      return Selector._by_type(dtypes).as_expr
    else
      msg = "invalid input for `col`\n\nExpected `str` or `DataType`, got #{name.class.name}."
      raise TypeError, msg
    end
  end

  if Utils.strlike?(name)
    Utils.wrap_expr(Plr.col(name.to_s))
  elsif Utils.is_polars_dtype(name)
    dtypes = [name]
    Selector._by_dtype(dtypes).as_expr
  elsif name.is_a?(::Array) || name.is_a?(::Set)
    names = Array(name)
    if names.empty?
      return Utils.wrap_expr(Plr.cols(names))
    end

    item = names[0]
    if Utils.strlike?(item)
      Selector._by_name(names.map(&:to_s), strict: true, expand_patterns: true).as_expr
    elsif Utils.is_polars_dtype(item)
      Selector._by_dtype(names).as_expr
    else
      msg = "invalid input for `col`\n\nExpected iterable of type `str` or `DataType`, got iterable of type #{item.class.name}."
      raise TypeError, msg
    end
  else
    msg = "invalid input for `col`\n\nExpected `str` or `DataType`, got #{name.class.name}."
    raise TypeError, msg
  end
end

#collect_all(lazy_frames, optimizations: DEFAULT_QUERY_OPT_FLAGS, engine: "auto", lazy: false) ⇒ Array

Collect multiple LazyFrames at the same time.

This runs all the computation graphs in parallel on Polars threadpool.

Parameters:

  • lazy_frames (Boolean)

    A list of LazyFrames to collect.

  • optimizations (defaults to: DEFAULT_QUERY_OPT_FLAGS)

    The optimization passes done during query optimization.

    This has no effect if lazy is set to true.

  • engine (String) (defaults to: "auto")

    Select the engine used to process the query, optional. At the moment, if set to "auto" (default), the query is run using the polars streaming engine. Polars will also attempt to use the engine set by the POLARS_ENGINE_AFFINITY environment variable. If it cannot run the query using the selected engine, the query is run using the polars streaming engine.

  • lazy (Boolean) (defaults to: false)

    Return as LazyFrame that can be collected later. This is only correct if all inputs sink to disk.

    This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.

Returns:



1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
# File 'lib/polars/functions/lazy.rb', line 1545

def collect_all(
  lazy_frames,
  optimizations: DEFAULT_QUERY_OPT_FLAGS,
  engine: "auto",
  lazy: false
)
  lfs = lazy_frames.map { |lf| lf._ldf }

  if lazy
    msg = "the `lazy` parameter of `collect_all` is considered unstable."
    Utils.issue_unstable_warning(msg)

    ldf = Plr.collect_all_lazy(lfs, optimizations._rboptflags)
    lf = LazyFrame._from_pyldf(ldf)
    return lf
  end

  engine = LazyFrame._select_engine(engine)
  out = Plr.collect_all(lfs, engine, optimizations._rboptflags)

  # wrap the rbdataframes into dataframe
  result = out.map { |rbdf| Utils.wrap_df(rbdf) }

  result
end

#concat(items, rechunk: false, how: "vertical", parallel: true, strict: false) ⇒ Object

Aggregate multiple Dataframes/Series to a single DataFrame/Series.

Examples:

df1 = Polars::DataFrame.new({"a" => [1], "b" => [3]})
df2 = Polars::DataFrame.new({"a" => [2], "b" => [4]})
Polars.concat([df1, df2])  # default is 'vertical' strategy
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 1   ┆ 3   │
# │ 2   ┆ 4   │
# └─────┴─────┘
df1 = Polars::DataFrame.new({"a" => [1], "b" => [3]})
df2 = Polars::DataFrame.new({"a" => [2.5], "b" => [4]})
Polars.concat([df1, df2], how: "vertical_relaxed")  # 'a' coerced into f64
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ f64 ┆ i64 │
# ╞═════╪═════╡
# │ 1.0 ┆ 3   │
# │ 2.5 ┆ 4   │
# └─────┴─────┘
df_h1 = Polars::DataFrame.new({"l1" => [1, 2], "l2" => [3, 4]})
df_h2 = Polars::DataFrame.new({"r1" => [5, 6], "r2" => [7, 8], "r3" => [9, 10]})
Polars.concat([df_h1, df_h2], how: "horizontal")
# =>
# shape: (2, 5)
# ┌─────┬─────┬─────┬─────┬─────┐
# │ l1  ┆ l2  ┆ r1  ┆ r2  ┆ r3  │
# │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
# │ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 │
# ╞═════╪═════╪═════╪═════╪═════╡
# │ 1   ┆ 3   ┆ 5   ┆ 7   ┆ 9   │
# │ 2   ┆ 4   ┆ 6   ┆ 8   ┆ 10  │
# └─────┴─────┴─────┴─────┴─────┘
df_d1 = Polars::DataFrame.new({"a" => [1], "b" => [3]})
df_d2 = Polars::DataFrame.new({"a" => [2], "c" => [4]})
Polars.concat([df_d1, df_d2], how: "diagonal")
# =>
# shape: (2, 3)
# ┌─────┬──────┬──────┐
# │ a   ┆ b    ┆ c    │
# │ --- ┆ ---  ┆ ---  │
# │ i64 ┆ i64  ┆ i64  │
# ╞═════╪══════╪══════╡
# │ 1   ┆ 3    ┆ null │
# │ 2   ┆ null ┆ 4    │
# └─────┴──────┴──────┘
df_a1 = Polars::DataFrame.new({"id" => [1, 2], "x" => [3, 4]})
df_a2 = Polars::DataFrame.new({"id" => [2, 3], "y" => [5, 6]})
df_a3 = Polars::DataFrame.new({"id" => [1, 3], "z" => [7, 8]})
Polars.concat([df_a1, df_a2, df_a3], how: "align")
# =>
# shape: (3, 4)
# ┌─────┬──────┬──────┬──────┐
# │ id  ┆ x    ┆ y    ┆ z    │
# │ --- ┆ ---  ┆ ---  ┆ ---  │
# │ i64 ┆ i64  ┆ i64  ┆ i64  │
# ╞═════╪══════╪══════╪══════╡
# │ 1   ┆ 3    ┆ null ┆ 7    │
# │ 2   ┆ 4    ┆ 5    ┆ null │
# │ 3   ┆ null ┆ 6    ┆ 8    │
# └─────┴──────┴──────┴──────┘

Parameters:

  • items (Object)

    DataFrames/Series/LazyFrames to concatenate.

  • rechunk (Boolean) (defaults to: false)

    Make sure that all data is in contiguous memory.

  • how ("vertical", "vertical_relaxed", "diagonal", "diagonal_relaxed", "horizontal") (defaults to: "vertical")
    • Vertical: applies multiple vstack operations.
    • Diagonal: finds a union between the column schemas and fills missing column values with null.
    • Horizontal: stacks Series horizontally and fills with nulls if the lengths don't match.
  • parallel (Boolean) (defaults to: true)

    Only relevant for LazyFrames. This determines if the concatenated lazy computations may be executed in parallel.

  • strict (Boolean) (defaults to: false)

    When how=horizontal, require all DataFrames to be the same height, raising an error if not.

Returns:



98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
# File 'lib/polars/functions/eager.rb', line 98

def concat(items, rechunk: false, how: "vertical", parallel: true, strict: false)
  elems = items.to_a

  if elems.empty?
    raise ArgumentError, "cannot concat empty list"
  end

  if how == "align"
    if !elems[0].is_a?(DataFrame) && !elems[0].is_a?(LazyFrame)
      msg = "'align' strategy is not supported for #{elems[0].class.name}"
      raise TypeError, msg
    end

    # establish common columns, maintaining the order in which they appear
    all_columns = elems.flat_map { |e| e.collect_schema.names }
    key = all_columns.uniq.map.with_index.to_h
    common_cols = elems.map { |e| e.collect_schema.names }
      .reduce { |x, y| Set.new(x) & Set.new(y) }
      .sort_by { |k| key[k] }
    # we require at least one key column for 'align'
    if common_cols.empty?
      msg = "'align' strategy requires at least one common column"
      raise InvalidOperationError, msg
    end

    # align the frame data using a full outer join with no suffix-resolution
    # (so we raise an error in case of column collision, like "horizontal")
    lf = elems.map { |df| df.lazy }.reduce do |x, y|
      x.join(
        y,
        how: "full",
        on: common_cols,
        suffix: "_PL_CONCAT_RIGHT",
        maintain_order: "right_left"
      )
      # Coalesce full outer join columns
      .with_columns(
        common_cols.map { |name| F.coalesce([name, "#{name}_PL_CONCAT_RIGHT"]) }
      )
      .drop(common_cols.map { |name| "#{name}_PL_CONCAT_RIGHT" })
    end.sort(common_cols)

    eager = elems[0].is_a?(DataFrame)
    return eager ? lf.collect : lf
  end

  first = elems[0]

  if first.is_a?(DataFrame)
    if how == "vertical"
      out = Utils.wrap_df(Plr.concat_df(elems))
    elsif how == "vertical_relaxed"
      out = Utils.wrap_ldf(
        Plr.concat_lf(
          elems.map { |df| df.lazy },
          rechunk,
          parallel,
          true
        )
      ).collect(optimizations: QueryOptFlags._eager)
    elsif how == "diagonal"
      out = Utils.wrap_df(Plr.concat_df_diagonal(elems))
    elsif how == "diagonal_relaxed"
      out = Utils.wrap_ldf(
        Plr.concat_lf_diagonal(
          elems.map { |df| df.lazy },
          rechunk,
          parallel,
          true
        )
      ).collect(optimizations: QueryOptFlags._eager)
    elsif how == "horizontal"
      out = Utils.wrap_df(Plr.concat_df_horizontal(elems, strict))
    else
      raise ArgumentError, "how must be one of {{'vertical', 'vertical_relaxed', 'diagonal', 'diagonal_relaxed', 'horizontal'}}, got #{how}"
    end
  elsif first.is_a?(LazyFrame)
    if how == "vertical"
      return Utils.wrap_ldf(Plr.concat_lf(elems, rechunk, parallel, false))
    elsif how == "vertical_relaxed"
      return Utils.wrap_ldf(Plr.concat_lf(elems, rechunk, parallel, true))
    elsif how == "diagonal"
      return Utils.wrap_ldf(Plr.concat_lf_diagonal(elems, rechunk, parallel, false))
    elsif how == "diagonal_relaxed"
      return Utils.wrap_ldf(Plr.concat_lf_diagonal(elems, rechunk, parallel, true))
    elsif how == "horizontal"
      return Utils.wrap_ldf(Plr.concat_lf_horizontal(elems, parallel, strict))
    else
      raise ArgumentError, "Lazy only allows 'vertical', 'vertical_relaxed', 'diagonal', and 'diagonal_relaxed' concat strategy."
    end
  elsif first.is_a?(Series)
    if how == "vertical"
      out = Utils.wrap_s(Plr.concat_series(elems))
    else
      msg = "Series only supports 'vertical' concat strategy"
      raise ArgumentError, msg
    end
  elsif first.is_a?(Expr)
    out = first
    elems[1..-1].each do |e|
      out = out.append(e)
    end
  else
    raise ArgumentError, "did not expect type: #{first.class.name} in 'Polars.concat'."
  end

  if rechunk
    out.rechunk
  else
    out
  end
end

#concat_arr(exprs, *more_exprs) ⇒ Expr

Note:

This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.

Horizontally concatenate columns into a single array column.

Non-array columns are reshaped to a unit-width array. All columns must have a dtype of either Polars::Array.new(<DataType>, width) or Polars::<DataType>.

Examples:

Concatenate 2 array columns:

Polars.select(
  a: Polars::Series.new([[1], [3], nil], dtype: Polars::Array.new(Polars::Int64, 1)),
  b: Polars::Series.new([[3], [nil], [5]], dtype: Polars::Array.new(Polars::Int64, 1))
).with_columns(
  Polars.concat_arr("a", "b").alias("concat_arr(a, b)"),
  Polars.concat_arr("a", Polars.first("b")).alias("concat_arr(a, first(b))")
)
# =>
# shape: (3, 4)
# ┌───────────────┬───────────────┬──────────────────┬─────────────────────────┐
# │ a             ┆ b             ┆ concat_arr(a, b) ┆ concat_arr(a, first(b)) │
# │ ---           ┆ ---           ┆ ---              ┆ ---                     │
# │ array[i64, 1] ┆ array[i64, 1] ┆ array[i64, 2]    ┆ array[i64, 2]           │
# ╞═══════════════╪═══════════════╪══════════════════╪═════════════════════════╡
# │ [1]           ┆ [3]           ┆ [1, 3]           ┆ [1, 3]                  │
# │ [3]           ┆ [null]        ┆ [3, null]        ┆ [3, 3]                  │
# │ null          ┆ [5]           ┆ null             ┆ null                    │
# └───────────────┴───────────────┴──────────────────┴─────────────────────────┘

Parameters:

  • exprs (Object)

    Columns to concatenate into a single array column. Accepts expression input. Strings are parsed as column names, other non-expression inputs are parsed as literals.

  • more_exprs (Array)

    Additional columns to concatenate into a single array column, specified as positional arguments.

Returns:



417
418
419
420
# File 'lib/polars/functions/as_datatype.rb', line 417

def concat_arr(exprs, *more_exprs)
  exprs = Utils.parse_into_list_of_expressions(exprs, *more_exprs)
  Utils.wrap_expr(Plr.concat_arr(exprs))
end

#concat_list(exprs, *more_exprs) ⇒ Expr

Concat the arrays in a Series dtype List in linear time.

Examples:

Concatenate two existing list columns. Null values are propagated.

df = Polars::DataFrame.new({"a" => [[1, 2], [3], [4, 5]], "b" => [[4], [], nil]})
df.with_columns(concat_list: Polars.concat_list("a", "b"))
# =>
# shape: (3, 3)
# ┌───────────┬───────────┬─────────────┐
# │ a         ┆ b         ┆ concat_list │
# │ ---       ┆ ---       ┆ ---         │
# │ list[i64] ┆ list[i64] ┆ list[i64]   │
# ╞═══════════╪═══════════╪═════════════╡
# │ [1, 2]    ┆ [4]       ┆ [1, 2, 4]   │
# │ [3]       ┆ []        ┆ [3]         │
# │ [4, 5]    ┆ null      ┆ null        │
# └───────────┴───────────┴─────────────┘

Non-list columns are cast to a list before concatenation. The output data type is the supertype of the concatenated columns.

df.select("a", concat_list: Polars.concat_list("a", Polars.lit("x")))
# =>
# shape: (3, 2)
# ┌───────────┬─────────────────┐
# │ a         ┆ concat_list     │
# │ ---       ┆ ---             │
# │ list[i64] ┆ list[str]       │
# ╞═══════════╪═════════════════╡
# │ [1, 2]    ┆ ["1", "2", "x"] │
# │ [3]       ┆ ["3", "x"]      │
# │ [4, 5]    ┆ ["4", "5", "x"] │
# └───────────┴─────────────────┘

Create lagged columns and collect them into a list. This mimics a rolling window.

df = Polars::DataFrame.new({"A" => [1.0, 2.0, 9.0, 2.0, 13.0]})
df = df.select(3.times.map { |i| Polars.col("A").shift(i).alias("A_lag_#{i}") })
df.select(
  Polars.concat_list(3.times.map { |i| "A_lag_#{i}" }.reverse).alias("A_rolling")
)
# =>
# shape: (5, 1)
# ┌───────────────────┐
# │ A_rolling         │
# │ ---               │
# │ list[f64]         │
# ╞═══════════════════╡
# │ [null, null, 1.0] │
# │ [null, 1.0, 2.0]  │
# │ [1.0, 2.0, 9.0]   │
# │ [2.0, 9.0, 2.0]   │
# │ [9.0, 2.0, 13.0]  │
# └───────────────────┘

Returns:



374
375
376
377
# File 'lib/polars/functions/as_datatype.rb', line 374

def concat_list(exprs, *more_exprs)
  exprs = Utils.parse_into_list_of_expressions(exprs, *more_exprs)
  Utils.wrap_expr(Plr.concat_list(exprs))
end

#concat_str(exprs, *more_exprs, separator: "", ignore_nulls: false) ⇒ Expr

Horizontally concat Utf8 Series in linear time. Non-Utf8 columns are cast to Utf8.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 3],
    "b" => ["dogs", "cats", nil],
    "c" => ["play", "swim", "walk"]
  }
)
df.with_columns(
  [
    Polars.concat_str(
      [
        Polars.col("a") * 2,
        Polars.col("b"),
        Polars.col("c")
      ],
      separator: " "
    ).alias("full_sentence")
  ]
)
# =>
# shape: (3, 4)
# ┌─────┬──────┬──────┬───────────────┐
# │ a   ┆ b    ┆ c    ┆ full_sentence │
# │ --- ┆ ---  ┆ ---  ┆ ---           │
# │ i64 ┆ str  ┆ str  ┆ str           │
# ╞═════╪══════╪══════╪═══════════════╡
# │ 1   ┆ dogs ┆ play ┆ 2 dogs play   │
# │ 2   ┆ cats ┆ swim ┆ 4 cats swim   │
# │ 3   ┆ null ┆ walk ┆ null          │
# └─────┴──────┴──────┴───────────────┘

Parameters:

  • exprs (Object)

    Columns to concat into a Utf8 Series.

  • more_exprs (Array)

    Additional columns to concatenate into a single string column, specified as positional arguments.

  • separator (String) (defaults to: "")

    String value that will be used to separate the values.

  • ignore_nulls (Boolean) (defaults to: false)

    Ignore null values (default).

Returns:



544
545
546
547
# File 'lib/polars/functions/as_datatype.rb', line 544

def concat_str(exprs, *more_exprs, separator: "", ignore_nulls: false)
  exprs = Utils.parse_into_list_of_expressions(exprs, *more_exprs)
  Utils.wrap_expr(Plr.concat_str(exprs, separator, ignore_nulls))
end

#corr(a, b, method: "pearson", ddof: nil, propagate_nans: false, eager: false) ⇒ Expr

Compute the Pearson's or Spearman rank correlation correlation between two columns.

Examples:

Pearson's correlation:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.corr("a", "b"))
# =>
# shape: (1, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 0.544705 │
# └──────────┘

Spearman rank correlation:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.corr("a", "b", method: "spearman"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 0.5 │
# └─────┘

Eager evaluation:

s1 = Polars::Series.new("a", [1, 8, 3])
s2 = Polars::Series.new("b", [4, 5, 2])
Polars.corr(s1, s2, eager: true)
# =>
# shape: (1,)
# Series: 'a' [f64]
# [
#         0.544705
# ]
Polars.corr(s1, s2, method: "spearman", eager: true)
# =>
# shape: (1,)
# Series: 'a' [f64]
# [
#         0.5
# ]

Parameters:

  • a (Object)

    Column name or Expression.

  • b (Object)

    Column name or Expression.

  • method ("pearson", "spearman") (defaults to: "pearson")

    Correlation method.

  • ddof (Integer) (defaults to: nil)

    "Delta Degrees of Freedom": the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1.

  • propagate_nans (Boolean) (defaults to: false)

    If true any NaN encountered will lead to NaN in the output. Defaults to false where NaN are regarded as larger than any finite number and thus lead to the highest rank.

  • eager (Boolean) (defaults to: false)

    Evaluate immediately and return a Series; this requires that at least one of the given arguments is a Series. If set to false (default), return an expression instead.

Returns:



774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
# File 'lib/polars/functions/lazy.rb', line 774

def corr(
  a,
  b,
  method: "pearson",
  ddof: nil,
  propagate_nans: false,
  eager: false
)
  if !ddof.nil?
    Utils.issue_deprecation_warning(
      "The `ddof` parameter has no effect. Do not use it."
    )
  end

  if eager
    if !(a.is_a?(Series) || b.is_a?(Series))
      msg = "expected at least one Series in 'corr' inputs if 'eager: true'"
      raise ArgumentError, msg
    end

    frame = Polars::DataFrame.new([a, b].filter_map { |e| e if e.is_a?(Series) })
    exprs = [a, b].map { |e| e.is_a?(Series) ? e.name : e }
    frame.select(
      corr(*exprs, eager: false, method: method, propagate_nans: propagate_nans)
    ).to_series
  else
    a = Utils.parse_into_expression(a)
    b = Utils.parse_into_expression(b)

    if method == "pearson"
      Utils.wrap_expr(Plr.pearson_corr(a, b))
    elsif method == "spearman"
      Utils.wrap_expr(Plr.spearman_rank_corr(a, b, propagate_nans))
    else
      msg = "method must be one of {{'pearson', 'spearman'}}, got #{method}"
      raise ArgumentError, msg
    end
  end
end

#count(*columns) ⇒ Expr

Return the number of non-null values in the column.

This function is syntactic sugar for col(columns).count.

Calling this function without any arguments returns the number of rows in the context. This way of using the function is deprecated. Please use len instead.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, nil],
    "b" => [3, nil, nil],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.count("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 2   │
# └─────┘

Return the number of non-null values in multiple columns.

df.select(Polars.count("b", "c"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ b   ┆ c   │
# │ --- ┆ --- │
# │ u32 ┆ u32 │
# ╞═════╪═════╡
# │ 1   ┆ 3   │
# └─────┴─────┘

Parameters:

  • columns (Array)

    One or more column names.

Returns:



97
98
99
100
101
102
103
104
# File 'lib/polars/functions/lazy.rb', line 97

def count(*columns)
  if columns.empty?
    warn "`Polars.count` is deprecated. Use `Polars.length` instead."
    return Utils.wrap_expr(Plr.len.alias("count"))
  end

  col(*columns).count
end

#cov(a, b, ddof: 1, eager: false) ⇒ Expr

Compute the covariance between two columns/ expressions.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.cov("a", "b"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 3.0 │
# └─────┘

Eager evaluation:

s1 = Polars::Series.new("a", [1, 8, 3])
s2 = Polars::Series.new("b", [4, 5, 2])
Polars.cov(s1, s2, eager: true)
# =>
# shape: (1,)
# Series: 'a' [f64]
# [
#         3.0
# ]

Parameters:

  • a (Object)

    Column name or Expression.

  • b (Object)

    Column name or Expression.

  • ddof (Integer) (defaults to: 1)

    "Delta Degrees of Freedom": the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1.

  • eager (Boolean) (defaults to: false)

    Evaluate immediately and return a Series; this requires that at least one of the given arguments is a Series. If set to false (default), return an expression instead.

Returns:



860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
# File 'lib/polars/functions/lazy.rb', line 860

def cov(a, b, ddof: 1, eager: false)
  if eager
    if !(a.is_a?(Series) || b.is_a?(Series))
      msg = "expected at least one Series in 'cov' inputs if 'eager: true'"
      raise ArgumentError, msg
    end

    frame = Polars::DataFrame.new([a, b].filter_map { |e| e if e.is_a?(Series) })
    exprs = [a, b].map { |e| e.is_a?(Series) ? e.name : e }
    frame.select(cov(*exprs, eager: false, ddof: ddof)).to_series
  else
    a_rbexpr = Utils.parse_into_expression(a)
    b_rbexpr = Utils.parse_into_expression(b)
    Utils.wrap_expr(Plr.cov(a_rbexpr, b_rbexpr, ddof))
  end
end

#cum_count(*columns, reverse: false) ⇒ Expr

Return the cumulative count of the non-null values in the column.

This function is syntactic sugar for col(columns).cum_count.

If no arguments are passed, returns the cumulative count of a context. Rows containing null values count towards the result.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, nil], "b" => [3, nil, nil]})
df.select(Polars.cum_count("a"))
# =>
# shape: (3, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 1   │
# │ 2   │
# │ 2   │
# └─────┘

Parameters:

  • columns (Array)

    Name(s) of the columns to use.

  • reverse (Boolean) (defaults to: false)

    Reverse the operation.

Returns:



134
135
136
# File 'lib/polars/functions/lazy.rb', line 134

def cum_count(*columns, reverse: false)
  col(*columns).cum_count(reverse: reverse)
end

#cum_fold(acc, exprs, returns_scalar: false, return_dtype: nil, include_init: false, &function) ⇒ Object

Note:

If you simply want the first encountered expression as accumulator, consider using cum_reduce.

Cumulatively accumulate over multiple columns horizontally/row wise with a left fold.

Every cumulative result is added as a separate field in a Struct column.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 3],
    "b" => [3, 4, 5],
    "c" => [5, 6, 7]
  }
)
df.with_columns(
  Polars.cum_fold(Polars.lit(1), Polars.all) { |acc, x| acc + x }
)
# =>
# shape: (3, 4)
# ┌─────┬─────┬─────┬───────────┐
# │ a   ┆ b   ┆ c   ┆ cum_fold  │
# │ --- ┆ --- ┆ --- ┆ ---       │
# │ i64 ┆ i64 ┆ i64 ┆ struct[3] │
# ╞═════╪═════╪═════╪═══════════╡
# │ 1   ┆ 3   ┆ 5   ┆ {2,5,10}  │
# │ 2   ┆ 4   ┆ 6   ┆ {3,7,13}  │
# │ 3   ┆ 5   ┆ 7   ┆ {4,9,16}  │
# └─────┴─────┴─────┴───────────┘

Parameters:

  • acc (Object)

    Accumulator Expression. This is the value that will be initialized when the fold starts. For a sum this could for instance be lit(0).

  • exprs (Object)

    Expressions to aggregate over. May also be a wildcard expression.

  • returns_scalar (Boolean) (defaults to: false)

    Whether or not function applied returns a scalar. This must be set correctly by the user.

  • return_dtype (Object) (defaults to: nil)

    Output datatype. If not set, the dtype will be inferred based on the dtype of the accumulator.

  • include_init (Boolean) (defaults to: false)

    Include the initial accumulator state as struct field.

Returns:



1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
# File 'lib/polars/functions/lazy.rb', line 1214

def cum_fold(
  acc,
  exprs,
  returns_scalar: false,
  return_dtype: nil,
  include_init: false,
  &function
)
  acc = Utils.parse_into_expression(acc, str_as_lit: true)
  if exprs.is_a?(Expr)
    exprs = [exprs]
  end

  rt = nil
  if !return_dtype.nil?
    rt = Utils.parse_into_datatype_expr(return_dtype)._rbdatatype_expr
  end

  exprs = Utils.parse_into_list_of_expressions(exprs)
  Utils.wrap_expr(
    Plr.cum_fold(
      acc,
      _wrap_acc_lambda(function),
      exprs,
      returns_scalar,
      rt,
      include_init
    ).alias("cum_fold")
  )
end

#cum_reduce(exprs, returns_scalar: false, return_dtype: nil, &function) ⇒ Expr

Cumulatively reduce horizontally across columns with a left fold.

Every cumulative result is added as a separate field in a Struct column.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 3],
    "b" => [3, 4, 5],
    "c" => [5, 6, 7]
  }
)
df.with_columns(Polars.cum_reduce(Polars.all) { |acc, x| acc + x })
# =>
# shape: (3, 4)
# ┌─────┬─────┬─────┬────────────┐
# │ a   ┆ b   ┆ c   ┆ cum_reduce │
# │ --- ┆ --- ┆ --- ┆ ---        │
# │ i64 ┆ i64 ┆ i64 ┆ struct[3]  │
# ╞═════╪═════╪═════╪════════════╡
# │ 1   ┆ 3   ┆ 5   ┆ {1,4,9}    │
# │ 2   ┆ 4   ┆ 6   ┆ {2,6,12}   │
# │ 3   ┆ 5   ┆ 7   ┆ {3,8,15}   │
# └─────┴─────┴─────┴────────────┘

Parameters:

  • exprs (Object)

    Expressions to aggregate over. May also be a wildcard expression.

  • returns_scalar (Boolean) (defaults to: false)

    Whether or not function applied returns a scalar. This must be set correctly by the user.

  • return_dtype (Object) (defaults to: nil)

    Output datatype. If not set, the dtype will be inferred based on the dtype of the input expressions.

Returns:



1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
# File 'lib/polars/functions/lazy.rb', line 1281

def cum_reduce(
  exprs,
  returns_scalar: false,
  return_dtype: nil,
  &function
)
  if exprs.is_a?(Expr)
    exprs = [exprs]
  end

  rt = nil
  if !return_dtype.nil?
    rt = Utils.parse_into_datatype_expr(return_dtype)._rbdatatype_expr
  end

  rbexprs = Utils.parse_into_list_of_expressions(exprs)
  Utils.wrap_expr(
    Plr.cum_reduce(
      _wrap_acc_lambda(function),
      rbexprs,
      returns_scalar,
      rt
    ).alias("cum_reduce")
  )
end

#cum_sum(*names) ⇒ Expr

Cumulatively sum all values.

Syntactic sugar for col(names).cum_sum.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 3],
    "b" => [4, 5, 6]
  }
)
df.select(Polars.cum_sum("a"))
# =>
# shape: (3, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 3   │
# │ 6   │
# └─────┘

Parameters:

  • names (Object)

    Name(s) of the columns to use in the aggregation.

Returns:



277
278
279
# File 'lib/polars/functions/aggregation/vertical.rb', line 277

def cum_sum(*names)
  col(*names).cum_sum
end

#cum_sum_horizontal(*exprs) ⇒ Expr

Cumulatively sum all values horizontally across columns.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, nil],
    "c" => ["x", "y", "z"]
  }
)
df.with_columns(Polars.cum_sum_horizontal("a", "b"))
# =>
# shape: (3, 4)
# ┌─────┬──────┬─────┬───────────┐
# │ a   ┆ b    ┆ c   ┆ cum_sum   │
# │ --- ┆ ---  ┆ --- ┆ ---       │
# │ i64 ┆ i64  ┆ str ┆ struct[2] │
# ╞═════╪══════╪═════╪═══════════╡
# │ 1   ┆ 4    ┆ x   ┆ {1,5}     │
# │ 8   ┆ 5    ┆ y   ┆ {8,13}    │
# │ 3   ┆ null ┆ z   ┆ {3,null}  │
# └─────┴──────┴─────┴───────────┘

Parameters:

  • exprs (Array)

    Column(s) to use in the aggregation. Accepts expression input. Strings are parsed as column names, other non-expression inputs are parsed as literals.

Returns:



241
242
243
244
245
246
247
248
249
# File 'lib/polars/functions/aggregation/horizontal.rb', line 241

def cum_sum_horizontal(*exprs)
  rbexprs = Utils.parse_into_list_of_expressions(*exprs)
  exprs_wrapped = rbexprs.map { |e| Utils.wrap_expr(e) }

  # (Expr): use u32 as that will not cast to float as eagerly
  Polars.cum_fold(Polars.lit(0).cast(UInt32), exprs_wrapped) { |a, b| a + b }.alias(
    "cum_sum"
  )
end

#date(year, month, day) ⇒ Expr

Create a Polars literal expression of type Date.

Examples:

df = Polars::DataFrame.new(
  {
    "month" => [1, 2, 3],
    "day" => [4, 5, 6]
  }
)
df.with_columns(Polars.date(2024, Polars.col("month"), Polars.col("day")))
# =>
# shape: (3, 3)
# ┌───────┬─────┬────────────┐
# │ month ┆ day ┆ date       │
# │ ---   ┆ --- ┆ ---        │
# │ i64   ┆ i64 ┆ date       │
# ╞═══════╪═════╪════════════╡
# │ 1     ┆ 4   ┆ 2024-01-04 │
# │ 2     ┆ 5   ┆ 2024-02-05 │
# │ 3     ┆ 6   ┆ 2024-03-06 │
# └───────┴─────┴────────────┘

We can also use pl.date for filtering:

df = Polars::DataFrame.new(
  {
    "start" => [Date.new(2024, 1, 1), Date.new(2024, 1, 1), Date.new(2024, 1, 1)],
    "end" => [Date.new(2024, 5, 1), Date.new(2024, 7, 1), Date.new(2024, 9, 1)]
  }
)
df.filter(Polars.col("end") > Polars.date(2024, 6, 1))
# =>
# shape: (2, 2)
# ┌────────────┬────────────┐
# │ start      ┆ end        │
# │ ---        ┆ ---        │
# │ date       ┆ date       │
# ╞════════════╪════════════╡
# │ 2024-01-01 ┆ 2024-07-01 │
# │ 2024-01-01 ┆ 2024-09-01 │
# └────────────┴────────────┘

Parameters:

  • year (Object)

    column or literal.

  • month (Object)

    column or literal, ranging from 1-12.

  • day (Object)

    column or literal, ranging from 1-31.

Returns:



179
180
181
182
183
184
185
# File 'lib/polars/functions/as_datatype.rb', line 179

def date(
  year,
  month,
  day
)
  datetime(year, month, day).cast(Date).alias("date")
end

#date_range(start, stop, interval = "1d", closed: "both", eager: false) ⇒ Object

Note:

If both low and high are passed as date types (not datetime), and the interval granularity is no finer than 1d, the returned range is also of type date. All other permutations return a datetime Series.

Create a range of type Datetime (or Date).

Examples:

Using polars duration string to specify the interval

Polars.date_range(Date.new(2022, 1, 1), Date.new(2022, 3, 1), "1mo", eager: true).alias(
  "date"
)
# =>
# shape: (3,)
# Series: 'date' [date]
# [
#         2022-01-01
#         2022-02-01
#         2022-03-01
# ]

Parameters:

  • start (Object)

    Lower bound of the date range.

  • stop (Object)

    Upper bound of the date range.

  • interval (Object) (defaults to: "1d")

    Interval periods. It can be a polars duration string, such as 3d12h4m25s representing 3 days, 12 hours, 4 minutes, and 25 seconds.

  • closed ("both", "left", "right", "none") (defaults to: "both")

    Define whether the temporal window interval is closed or not.

  • eager (Boolean) (defaults to: false)

    Evaluate immediately and return a Series. If set to false (default), return an expression instead.

Returns:



37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/polars/functions/range/date_range.rb', line 37

def date_range(
  start,
  stop,
  interval = "1d",
  closed: "both",
  eager: false
)
  interval = Utils.parse_interval_argument(interval)

  start_rbexpr = Utils.parse_into_expression(start)
  end_rbexpr = Utils.parse_into_expression(stop)

  result = Utils.wrap_expr(
    Plr.date_range(start_rbexpr, end_rbexpr, interval, closed)
  )

  if eager
    return F.select(result).to_series
  end

  result
end

#date_ranges(start, stop, interval = "1d", closed: "both", eager: false) ⇒ Object

Note:

interval is created according to the following string language:

  • 1ns (1 nanosecond)
  • 1us (1 microsecond)
  • 1ms (1 millisecond)
  • 1s (1 second)
  • 1m (1 minute)
  • 1h (1 hour)
  • 1d (1 calendar day)
  • 1w (1 calendar week)
  • 1mo (1 calendar month)
  • 1q (1 calendar quarter)
  • 1y (1 calendar year)

Or combine them: "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds

By "calendar day", we mean the corresponding time on the next day (which may not be 24 hours, due to daylight savings). Similarly for "calendar week", "calendar month", "calendar quarter", and "calendar year".

Create a column of date ranges.

Examples:

df = Polars::DataFrame.new(
  {
    "start" => [Date.new(2022, 1, 1), Date.new(2022, 1, 2)],
    "end" => Date.new(2022, 1, 3)
  }
)
df.with_columns(date_range: Polars.date_ranges("start", "end"))
# =>
# shape: (2, 3)
# ┌────────────┬────────────┬─────────────────────────────────┐
# │ start      ┆ end        ┆ date_range                      │
# │ ---        ┆ ---        ┆ ---                             │
# │ date       ┆ date       ┆ list[date]                      │
# ╞════════════╪════════════╪═════════════════════════════════╡
# │ 2022-01-01 ┆ 2022-01-03 ┆ [2022-01-01, 2022-01-02, 2022-… │
# │ 2022-01-02 ┆ 2022-01-03 ┆ [2022-01-02, 2022-01-03]        │
# └────────────┴────────────┴─────────────────────────────────┘

Parameters:

  • start (Object)

    Lower bound of the date range.

  • stop (Object)

    Upper bound of the date range.

  • interval (Object) (defaults to: "1d")

    Interval of the range periods, specified using the Polars duration string language (see "Notes" section below).

  • closed ("both", "left", "right", "none") (defaults to: "both")

    Define which sides of the range are closed (inclusive).

  • eager (Boolean) (defaults to: false)

    Evaluate immediately and return a Series. If set to false (default), return an expression instead.

Returns:



116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# File 'lib/polars/functions/range/date_range.rb', line 116

def date_ranges(
  start,
  stop,
  interval = "1d",
  closed: "both",
  eager: false
)
  interval = Utils.parse_interval_argument(interval)
  start_rbexpr = Utils.parse_into_expression(start)
  end_rbexpr = Utils.parse_into_expression(stop)

  result = Utils.wrap_expr(Plr.date_ranges(start_rbexpr, end_rbexpr, interval, closed))

  if eager
    return F.select(result).to_series
  end

  result
end

#datetime(year, month, day, hour = nil, minute = nil, second = nil, microsecond = nil, time_unit: "us", time_zone: nil, ambiguous: "raise") ⇒ Expr

Create a Polars literal expression of type Datetime.

Examples:

df = Polars::DataFrame.new(
  {
    "month" => [1, 2, 3],
    "day" => [4, 5, 6],
    "hour" => [12, 13, 14],
    "minute" => [15, 30, 45]
  }
)
df.with_columns(
  Polars.datetime(
    2024,
    Polars.col("month"),
    Polars.col("day"),
    Polars.col("hour"),
    Polars.col("minute"),
    time_zone: "Australia/Sydney"
  )
)
# =>
# shape: (3, 5)
# ┌───────┬─────┬──────┬────────┬────────────────────────────────┐
# │ month ┆ day ┆ hour ┆ minute ┆ datetime                       │
# │ ---   ┆ --- ┆ ---  ┆ ---    ┆ ---                            │
# │ i64   ┆ i64 ┆ i64  ┆ i64    ┆ datetime[μs, Australia/Sydney] │
# ╞═══════╪═════╪══════╪════════╪════════════════════════════════╡
# │ 1     ┆ 4   ┆ 12   ┆ 15     ┆ 2024-01-04 12:15:00 AEDT       │
# │ 2     ┆ 5   ┆ 13   ┆ 30     ┆ 2024-02-05 13:30:00 AEDT       │
# │ 3     ┆ 6   ┆ 14   ┆ 45     ┆ 2024-03-06 14:45:00 AEDT       │
# └───────┴─────┴──────┴────────┴────────────────────────────────┘

We can also use Polars.datetime for filtering:

df = Polars::DataFrame.new(
  {
    "start" => [
      DateTime.new(2024, 1, 1, 0, 0, 0),
      DateTime.new(2024, 1, 1, 0, 0, 0),
      DateTime.new(2024, 1, 1, 0, 0, 0)
    ],
    "end" => [
      DateTime.new(2024, 5, 1, 20, 15, 10),
      DateTime.new(2024, 7, 1, 21, 25, 20),
      DateTime.new(2024, 9, 1, 22, 35, 30)
    ]
  }
)
df.filter(Polars.col("end") > Polars.datetime(2024, 6, 1))
# =>
# shape: (2, 2)
# ┌─────────────────────┬─────────────────────┐
# │ start               ┆ end                 │
# │ ---                 ┆ ---                 │
# │ datetime[ns]        ┆ datetime[ns]        │
# ╞═════════════════════╪═════════════════════╡
# │ 2024-01-01 00:00:00 ┆ 2024-07-01 21:25:20 │
# │ 2024-01-01 00:00:00 ┆ 2024-09-01 22:35:30 │
# └─────────────────────┴─────────────────────┘

Parameters:

  • year (Object)

    Column or literal.

  • month (Object)

    Column or literal, ranging from 1-12.

  • day (Object)

    Column or literal, ranging from 1-31.

  • hour (Object) (defaults to: nil)

    Column or literal, ranging from 0-23.

  • minute (Object) (defaults to: nil)

    Column or literal, ranging from 0-59.

  • second (Object) (defaults to: nil)

    Column or literal, ranging from 0-59.

  • microsecond (Object) (defaults to: nil)

    Column or literal, ranging from 0-999999.

  • time_unit ('us', 'ms', 'ns') (defaults to: "us")

    Time unit of the resulting expression.

  • time_zone (Object) (defaults to: nil)

    Time zone of the resulting expression.

  • ambiguous ('raise', 'earliest', 'latest', 'null') (defaults to: "raise")

    Determine how to deal with ambiguous datetimes:

    • 'raise' (default): raise
    • 'earliest': use the earliest datetime
    • 'latest': use the latest datetime
    • 'null': set to null

Returns:



90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# File 'lib/polars/functions/as_datatype.rb', line 90

def datetime(
  year,
  month,
  day,
  hour = nil,
  minute = nil,
  second = nil,
  microsecond = nil,
  time_unit: "us",
  time_zone: nil,
  ambiguous: "raise"
)
  ambiguous_expr = Utils.parse_into_expression(ambiguous, str_as_lit: true)
  year_expr = Utils.parse_into_expression(year)
  month_expr = Utils.parse_into_expression(month)
  day_expr = Utils.parse_into_expression(day)

  hour_expr = !hour.nil? ? Utils.parse_into_expression(hour) : nil
  minute_expr = !minute.nil? ? Utils.parse_into_expression(minute) : nil
  second_expr = !second.nil? ? Utils.parse_into_expression(second) : nil
  microsecond_expr = (
    !microsecond.nil? ? Utils.parse_into_expression(microsecond) : nil
  )

  Utils.wrap_expr(
    Plr.datetime(
      year_expr,
      month_expr,
      day_expr,
      hour_expr,
      minute_expr,
      second_expr,
      microsecond_expr,
      time_unit,
      time_zone,
      ambiguous_expr
    )
  )
end

#datetime_range(start, stop, interval = "1d", closed: "both", time_unit: nil, time_zone: nil, eager: false) ⇒ Object

Generate a datetime range.

Examples:

Using Polars duration string to specify the interval:

Polars.datetime_range(
  DateTime.new(2022, 1, 1), DateTime.new(2022, 3, 1), "1mo", eager: true
).alias("datetime")
# =>
# shape: (3,)
# Series: 'datetime' [datetime[ns]]
# [
#         2022-01-01 00:00:00
#         2022-02-01 00:00:00
#         2022-03-01 00:00:00
# ]

Specifying a time zone:

Polars.datetime_range(
  DateTime.new(2022, 1, 1),
  DateTime.new(2022, 3, 1),
  "1mo",
  time_zone: "America/New_York",
  eager: true
).alias("datetime")
# =>
# shape: (3,)
# Series: 'datetime' [datetime[ns, America/New_York]]
# [
#         2022-01-01 00:00:00 EST
#         2022-02-01 00:00:00 EST
#         2022-03-01 00:00:00 EST
# ]

Parameters:

  • start (Object)

    Lower bound of the datetime range.

  • stop (Object)

    Upper bound of the datetime range.

  • interval (String) (defaults to: "1d")

    Interval of the range periods, specified using the Polars duration string language.

  • closed ('both', 'left', 'right', 'none') (defaults to: "both")

    Define which sides of the range are closed (inclusive).

  • time_unit (nil, 'ns', 'us', 'ms') (defaults to: nil)

    Time unit of the resulting Datetime data type.

  • time_zone (String) (defaults to: nil)

    Time zone of the resulting Datetime data type.

  • eager (Boolean) (defaults to: false)

    Evaluate immediately and return a Series. If set to false (default), return an expression instead.

Returns:



52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# File 'lib/polars/functions/range/datetime_range.rb', line 52

def datetime_range(
  start,
  stop,
  interval = "1d",
  closed: "both",
  time_unit: nil,
  time_zone: nil,
  eager: false
)
  interval = Utils.parse_interval_argument(interval)
  if time_unit.nil? && interval.include?("ns")
    time_unit = "ns"
  end

  start_rbexpr = Utils.parse_into_expression(start)
  end_rbexpr = Utils.parse_into_expression(stop)
  result = Utils.wrap_expr(
    Plr.datetime_range(
      start_rbexpr, end_rbexpr, interval, closed, time_unit, time_zone
    )
  )

  if eager
    return Polars.select(result).to_series
  end

  result
end

#datetime_ranges(start, stop, interval: "1d", closed: "both", time_unit: nil, time_zone: nil, eager: false) ⇒ Object

Create a column of datetime ranges.

Examples:

df = Polars::DataFrame.new(
  {
    "start" => [DateTime.new(2022, 1, 1), DateTime.new(2022, 1, 2)],
    "end" => DateTime.new(2022, 1, 3),
  }
)
df.select(datetime_range: Polars.datetime_ranges("start", "end"))
# =>
# shape: (2, 1)
# ┌─────────────────────────────────┐
# │ datetime_range                  │
# │ ---                             │
# │ list[datetime[ns]]              │
# ╞═════════════════════════════════╡
# │ [2022-01-01 00:00:00, 2022-01-… │
# │ [2022-01-02 00:00:00, 2022-01-… │
# └─────────────────────────────────┘

Parameters:

  • start (Object)

    Lower bound of the datetime range.

  • stop (Object)

    Upper bound of the datetime range.

  • interval (String) (defaults to: "1d")

    Interval of the range periods, specified using the Polars duration string language.

  • closed ('both', 'left', 'right', 'none') (defaults to: "both")

    Define which sides of the range are closed (inclusive).

  • time_unit (nil, 'ns', 'us', 'ms') (defaults to: nil)

    Time unit of the resulting Datetime data type.

  • time_zone (String) (defaults to: nil)

    Time zone of the resulting Datetime data type.

  • eager (Boolean) (defaults to: false)

    Evaluate immediately and return a Series. If set to false (default), return an expression instead.

Returns:



119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# File 'lib/polars/functions/range/datetime_range.rb', line 119

def datetime_ranges(
  start,
  stop,
  interval: "1d",
  closed: "both",
  time_unit: nil,
  time_zone: nil,
  eager: false
)
  interval = Utils.parse_interval_argument(interval)
  if time_unit.nil? && interval.include?("ns")
    time_unit = "ns"
  end

  start_rbexpr = Utils.parse_into_expression(start)
  end_rbexpr = Utils.parse_into_expression(stop)

  result = Utils.wrap_expr(
    Plr.datetime_ranges(
      start_rbexpr, end_rbexpr, interval, closed, time_unit, time_zone
    )
  )

  if eager
    return Polars.select(result).to_series
  end

  result
end

#disable_string_cachenil

Disable and clear the global string cache.

Examples:

Construct two Series using the same global string cache.

Polars.enable_string_cache
s1 = Polars::Series.new("color", ["red", "green", "red"], dtype: Polars::Categorical)
s2 = Polars::Series.new("color", ["blue", "red", "green"], dtype: Polars::Categorical)
Polars.disable_string_cache

As both Series are constructed under the same global string cache, they can be concatenated.

Polars.concat([s1, s2])
# =>
# shape: (6,)
# Series: 'color' [cat]
# [
#         "red"
#         "green"
#         "red"
#         "blue"
#         "red"
#         "green"
# ]

Returns:

  • (nil)


90
91
92
# File 'lib/polars/string_cache.rb', line 90

def disable_string_cache
  Plr.disable_string_cache
end

#dtype_of(col_or_expr) ⇒ DataTypeExpr

Note:

This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.

Get a lazily evaluated :class:DataType of a column or expression.

Returns:



10
11
12
13
14
15
16
17
18
19
# File 'lib/polars/functions/datatype.rb', line 10

def dtype_of(col_or_expr)
  e = nil
  if col_or_expr.is_a?(::String)
    e = F.col(col_or_expr)
  else
    e = col_or_expr
  end

  DataTypeExpr._from_rbdatatype_expr(RbDataTypeExpr.of_expr(e._rbexpr))
end

#duration(weeks: nil, days: nil, hours: nil, minutes: nil, seconds: nil, milliseconds: nil, microseconds: nil, nanoseconds: nil, time_unit: nil) ⇒ Expr

Create polars Duration from distinct time components.

Examples:

df = Polars::DataFrame.new(
  {
    "datetime" => [DateTime.new(2022, 1, 1), DateTime.new(2022, 1, 2)],
    "add" => [1, 2]
  }
)
df.select(
  [
    (Polars.col("datetime") + Polars.duration(weeks: "add")).alias("add_weeks"),
    (Polars.col("datetime") + Polars.duration(days: "add")).alias("add_days"),
    (Polars.col("datetime") + Polars.duration(seconds: "add")).alias("add_seconds"),
    (Polars.col("datetime") + Polars.duration(milliseconds: "add")).alias(
      "add_milliseconds"
    ),
    (Polars.col("datetime") + Polars.duration(hours: "add")).alias("add_hours")
  ]
)
# =>
# shape: (2, 5)
# ┌─────────────────────┬─────────────────────┬─────────────────────┬─────────────────────────┬─────────────────────┐
# │ add_weeks           ┆ add_days            ┆ add_seconds         ┆ add_milliseconds        ┆ add_hours           │
# │ ---                 ┆ ---                 ┆ ---                 ┆ ---                     ┆ ---                 │
# │ datetime[ns]        ┆ datetime[ns]        ┆ datetime[ns]        ┆ datetime[ns]            ┆ datetime[ns]        │
# ╞═════════════════════╪═════════════════════╪═════════════════════╪═════════════════════════╪═════════════════════╡
# │ 2022-01-08 00:00:00 ┆ 2022-01-02 00:00:00 ┆ 2022-01-01 00:00:01 ┆ 2022-01-01 00:00:00.001 ┆ 2022-01-01 01:00:00 │
# │ 2022-01-16 00:00:00 ┆ 2022-01-04 00:00:00 ┆ 2022-01-02 00:00:02 ┆ 2022-01-02 00:00:00.002 ┆ 2022-01-02 02:00:00 │
# └─────────────────────┴─────────────────────┴─────────────────────┴─────────────────────────┴─────────────────────┘

Returns:



263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
# File 'lib/polars/functions/as_datatype.rb', line 263

def duration(
  weeks: nil,
  days: nil,
  hours: nil,
  minutes: nil,
  seconds: nil,
  milliseconds: nil,
  microseconds: nil,
  nanoseconds: nil,
  time_unit: nil
)
  if !nanoseconds.nil? && time_unit.nil?
    time_unit = "ns"
  end

  if !weeks.nil?
    weeks = Utils.parse_into_expression(weeks, str_as_lit: false)
  end
  if !days.nil?
    days = Utils.parse_into_expression(days, str_as_lit: false)
  end
  if !hours.nil?
    hours = Utils.parse_into_expression(hours, str_as_lit: false)
  end
  if !minutes.nil?
    minutes = Utils.parse_into_expression(minutes, str_as_lit: false)
  end
  if !seconds.nil?
    seconds = Utils.parse_into_expression(seconds, str_as_lit: false)
  end
  if !milliseconds.nil?
    milliseconds = Utils.parse_into_expression(milliseconds, str_as_lit: false)
  end
  if !microseconds.nil?
    microseconds = Utils.parse_into_expression(microseconds, str_as_lit: false)
  end
  if !nanoseconds.nil?
    nanoseconds = Utils.parse_into_expression(nanoseconds, str_as_lit: false)
  end

  if time_unit.nil?
    time_unit = "us"
  end

  Utils.wrap_expr(
    Plr.duration(
      weeks,
      days,
      hours,
      minutes,
      seconds,
      milliseconds,
      microseconds,
      nanoseconds,
      time_unit
    )
  )
end

#elementExpr

Alias for an element in evaluated in an eval expression.

Examples:

A horizontal rank computation by taking the elements of a list

df = Polars::DataFrame.new({"a" => [1, 8, 3], "b" => [4, 5, 2]})
df.with_columns(
  Polars.concat_list(["a", "b"]).list.eval(Polars.element.rank).alias("rank")
)
# =>
# shape: (3, 3)
# ┌─────┬─────┬────────────┐
# │ a   ┆ b   ┆ rank       │
# │ --- ┆ --- ┆ ---        │
# │ i64 ┆ i64 ┆ list[f64]  │
# ╞═════╪═════╪════════════╡
# │ 1   ┆ 4   ┆ [1.0, 2.0] │
# │ 8   ┆ 5   ┆ [2.0, 1.0] │
# │ 3   ┆ 2   ┆ [2.0, 1.0] │
# └─────┴─────┴────────────┘

Returns:



50
51
52
# File 'lib/polars/functions/lazy.rb', line 50

def element
  Utils.wrap_expr(Plr.element)
end

#enable_string_cachenil

Enable the global string cache.

Categorical columns created under the same global string cache have the same underlying physical value when string values are equal. This allows the columns to be concatenated or used in a join operation, for example.

Examples:

Construct two Series using the same global string cache.

Polars.enable_string_cache
s1 = Polars::Series.new("color", ["red", "green", "red"], dtype: Polars::Categorical)
s2 = Polars::Series.new("color", ["blue", "red", "green"], dtype: Polars::Categorical)
Polars.disable_string_cache

As both Series are constructed under the same global string cache, they can be concatenated.

Polars.concat([s1, s2])
# =>
# shape: (6,)
# Series: 'color' [cat]
# [
#         "red"
#         "green"
#         "red"
#         "blue"
#         "red"
#         "green"
# ]

Returns:

  • (nil)


63
64
65
# File 'lib/polars/string_cache.rb', line 63

def enable_string_cache
  Plr.enable_string_cache
end

#escape_regex(s) ⇒ String

Escapes string regex meta characters.

Parameters:

  • s (String)

    The string whose meta characters will be escaped.

Returns:



9
10
11
12
13
14
15
16
17
18
19
# File 'lib/polars/functions/escape_regex.rb', line 9

def escape_regex(s)
  if s.is_a?(Expr)
    msg = "escape_regex function is unsupported for `Expr`, you may want use `Expr.str.escape_regex` instead"
    raise TypeError, msg
  elsif !s.is_a?(::String)
    msg = "escape_regex function supports only `String` type, got `#{s.class.name}`"
    raise TypeError, msg
  end

  Plr.escape_regex(s)
end

#exclude(columns, *more_columns) ⇒ Object

Exclude certain columns from a wildcard/regex selection.

Examples:

df = Polars::DataFrame.new(
  {
    "aa" => [1, 2, 3],
    "ba" => ["a", "b", nil],
    "cc" => [nil, 2.5, 1.5]
  }
)
# =>
# shape: (3, 3)
# ┌─────┬──────┬──────┐
# │ aa  ┆ ba   ┆ cc   │
# │ --- ┆ ---  ┆ ---  │
# │ i64 ┆ str  ┆ f64  │
# ╞═════╪══════╪══════╡
# │ 1   ┆ a    ┆ null │
# │ 2   ┆ b    ┆ 2.5  │
# │ 3   ┆ null ┆ 1.5  │
# └─────┴──────┴──────┘

Exclude by column name(s):

df.select(Polars.exclude("ba"))
# =>
# shape: (3, 2)
# ┌─────┬──────┐
# │ aa  ┆ cc   │
# │ --- ┆ ---  │
# │ i64 ┆ f64  │
# ╞═════╪══════╡
# │ 1   ┆ null │
# │ 2   ┆ 2.5  │
# │ 3   ┆ 1.5  │
# └─────┴──────┘

Exclude by regex, e.g. removing all columns whose names end with the letter "a":

df.select(Polars.exclude("^.*a$"))
# =>
# shape: (3, 1)
# ┌──────┐
# │ cc   │
# │ ---  │
# │ f64  │
# ╞══════╡
# │ null │
# │ 2.5  │
# │ 1.5  │
# └──────┘

Parameters:

  • columns (Object)

    The name or datatype of the column(s) to exclude. Accepts regular expression input. Regular expressions should start with ^ and end with $.

  • more_columns (Array)

    Additional names or datatypes of columns to exclude, specified as positional arguments.

Returns:



1408
1409
1410
# File 'lib/polars/functions/lazy.rb', line 1408

def exclude(columns, *more_columns)
  col("*").exclude(columns, *more_columns)
end

#field(name) ⇒ Expr

Select a field in the current struct.with_fields scope.

Examples:

df = Polars::DataFrame.new({"a" => [{"x" => 5, "y" => 2}, {"x" => 3, "y" => 4}]})
df.select(Polars.col("a").struct.with_fields(Polars.field("x") ** 2))
# =>
# shape: (2, 1)
# ┌───────────┐
# │ a         │
# │ ---       │
# │ struct[2] │
# ╞═══════════╡
# │ {25,2}    │
# │ {9,4}     │
# └───────────┘

Parameters:

  • name (Object)

    Name of the field(s) to select.

Returns:



23
24
25
26
27
28
# File 'lib/polars/functions/lazy.rb', line 23

def field(name)
  if name.is_a?(::String)
    name = [name]
  end
  Utils.wrap_expr(Plr.field(name))
end

#first(*columns) ⇒ Expr

Get the first value.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "baz"]
  }
)
df.select(Polars.first)
# =>
# shape: (3, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 8   │
# │ 3   │
# └─────┘
df.select(Polars.first("b"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ b   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 4   │
# └─────┘
df.select(Polars.first("a", "c"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ c   │
# │ --- ┆ --- │
# │ i64 ┆ str │
# ╞═════╪═════╡
# │ 1   ┆ foo │
# └─────┴─────┘

Parameters:

  • columns (Array)

    One or more column names. If not provided (default), returns an expression to take the first column of the context instead.

Returns:



485
486
487
488
489
490
491
# File 'lib/polars/functions/lazy.rb', line 485

def first(*columns)
  if columns.empty?
    return cs.first.as_expr
  end

  col(*columns).first
end

#fold(acc, exprs, returns_scalar: false, return_dtype: nil, &function) ⇒ Expr

Accumulate over multiple columns horizontally/row wise with a left fold.

Examples:

Horizontally sum over all columns and add 1.

df = Polars::DataFrame.new(
 {
   "a" => [1, 2, 3],
   "b" => [3, 4, 5],
   "c" => [5, 6, 7]
 }
)
df.select(
  Polars.fold(Polars.lit(1), Polars.col("*")) { |acc, x| acc + x }.alias("sum")
)
# =>
# shape: (3, 1)
# ┌─────┐
# │ sum │
# │ --- │
# │ i32 │
# ╞═════╡
# │ 10  │
# │ 13  │
# │ 16  │
# └─────┘

You can also apply a condition/predicate on all columns:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 3],
    "b" => [0, 1, 2]
  }
)
df.filter(
  Polars.fold(Polars.lit(true), Polars.col("*") > 1) { |acc, x| acc & x }
)
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 3   ┆ 2   │
# └─────┴─────┘

Returns:



1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
# File 'lib/polars/functions/lazy.rb', line 1078

def fold(
  acc,
  exprs,
  returns_scalar: false,
  return_dtype: nil,
  &function
)
  acc = Utils.parse_into_expression(acc, str_as_lit: true)
  if exprs.is_a?(Expr)
    exprs = [exprs]
  end

  rt = nil
  if !return_dtype.nil?
    rt = Utils.parse_into_datatype_expr(return_dtype)._rbdatatype_expr
  end

  exprs = Utils.parse_into_list_of_expressions(exprs)
  Utils.wrap_expr(
    Plr.fold(
      acc,
      _wrap_acc_lambda(function),
      exprs,
      returns_scalar,
      rt
    )
  )
end

#format(f_string, *args) ⇒ Expr

Format expressions as a string.

Examples:

df = Polars::DataFrame.new(
  {
    "a": ["a", "b", "c"],
    "b": [1, 2, 3]
  }
)
df.select(
  [
    Polars.format("foo_{}_bar_{}", Polars.col("a"), "b").alias("fmt")
  ]
)
# =>
# shape: (3, 1)
# ┌─────────────┐
# │ fmt         │
# │ ---         │
# │ str         │
# ╞═════════════╡
# │ foo_a_bar_1 │
# │ foo_b_bar_2 │
# │ foo_c_bar_3 │
# └─────────────┘

Parameters:

  • f_string (String)

    A string that with placeholders. For example: "hello_{}" or "{}_world

  • args (Object)

    Expression(s) that fill the placeholders

Returns:



582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
# File 'lib/polars/functions/as_datatype.rb', line 582

def format(f_string, *args)
  if f_string.scan("{}").length != args.length
    raise ArgumentError, "number of placeholders should equal the number of arguments"
  end

  exprs = []

  arguments = args.each
  f_string.split(/(\{\})/).each do |s|
    if s == "{}"
      e = Utils.wrap_expr(Utils.parse_into_expression(arguments.next))
      exprs << e
    elsif s.length > 0
      exprs << lit(s)
    end
  end

  concat_str(exprs, separator: "")
end

#from_epoch(column, time_unit: "s") ⇒ Object

Utility function that parses an epoch timestamp (or Unix time) to Polars Date(time).

Depending on the unit provided, this function will return a different dtype:

  • time_unit: "d" returns pl.Date
  • time_unit: "s" returns pl.Datetime"us"
  • time_unit: "ms" returns pl.Datetime["ms"]
  • time_unit: "us" returns pl.Datetime["us"]
  • time_unit: "ns" returns pl.Datetime["ns"]

Examples:

df = Polars::DataFrame.new({"timestamp" => [1666683077, 1666683099]}).lazy
df.select(Polars.from_epoch(Polars.col("timestamp"), time_unit: "s")).collect
# =>
# shape: (2, 1)
# ┌─────────────────────┐
# │ timestamp           │
# │ ---                 │
# │ datetime[μs]        │
# ╞═════════════════════╡
# │ 2022-10-25 07:31:17 │
# │ 2022-10-25 07:31:39 │
# └─────────────────────┘

Parameters:

  • column (Object)

    Series or expression to parse integers to pl.Datetime.

  • time_unit (String) (defaults to: "s")

    The unit of the timesteps since epoch time.

Returns:



1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
# File 'lib/polars/functions/lazy.rb', line 1752

def from_epoch(column, time_unit: "s")
  if Utils.strlike?(column)
    column = F.col(column)
  elsif !column.is_a?(Series) && !column.is_a?(Expr)
    column = Series.new(column)
  end

  if time_unit == "d"
    column.cast(Date)
  elsif time_unit == "s"
    (column.cast(Int64) * 1_000_000).cast(Datetime.new("us"))
  elsif Utils::DTYPE_TEMPORAL_UNITS.include?(time_unit)
    column.cast(Datetime.new(time_unit))
  else
    raise ArgumentError, "`time_unit` must be one of {{'ns', 'us', 'ms', 's', 'd'}}, got #{time_unit.inspect}."
  end
end

#groups(column) ⇒ Object

Syntactic sugar for Polars.col("foo").agg_groups.

Returns:



1415
1416
1417
# File 'lib/polars/functions/lazy.rb', line 1415

def groups(column)
  col(column).agg_groups
end

#head(column, n = 10) ⇒ Expr

Get the first n rows.

This function is syntactic sugar for col(column).head(n).

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.head("a"))
# =>
# shape: (3, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 8   │
# │ 3   │
# └─────┘
df.select(Polars.head("a", 2))
# =>
# shape: (2, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 8   │
# └─────┘

Parameters:

  • column (Object)

    Column name.

  • n (Integer) (defaults to: 10)

    Number of rows to return.

Returns:



642
643
644
# File 'lib/polars/functions/lazy.rb', line 642

def head(column, n = 10)
  col(column).head(n)
end

#implode(*columns) ⇒ Expr

Aggregate all column values into a list.

This function is syntactic sugar for col(name).implode.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 3],
    "b" => [9, 8, 7],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.implode("a"))
# =>
# shape: (1, 1)
# ┌───────────┐
# │ a         │
# │ ---       │
# │ list[i64] │
# ╞═══════════╡
# │ [1, 2, 3] │
# └───────────┘
df.select(Polars.implode("b", "c"))
# =>
# shape: (1, 2)
# ┌───────────┬───────────────────────┐
# │ b         ┆ c                     │
# │ ---       ┆ ---                   │
# │ list[i64] ┆ list[str]             │
# ╞═══════════╪═══════════════════════╡
# │ [9, 8, 7] ┆ ["foo", "bar", "foo"] │
# └───────────┴───────────────────────┘

Parameters:

  • columns (Array)

    One or more column names.

Returns:



177
178
179
# File 'lib/polars/functions/lazy.rb', line 177

def implode(*columns)
  col(*columns).implode
end

#int_range(start = 0, stop = nil, step: 1, eager: false, dtype: Int64) ⇒ Expr, Series Also known as: arange

Create a range expression (or Series).

This can be used in a select, with_column, etc. Be sure that the resulting range size is equal to the length of the DataFrame you are collecting.

Examples:

Polars.arange(0, 3, eager: true)
# =>
# shape: (3,)
# Series: 'arange' [i64]
# [
#         0
#         1
#         2
# ]

Parameters:

  • start (Integer, Expr, Series) (defaults to: 0)

    Lower bound of range.

  • stop (Integer, Expr, Series) (defaults to: nil)

    Upper bound of range.

  • step (Integer) (defaults to: 1)

    Step size of the range.

  • eager (Boolean) (defaults to: false)

    If eager evaluation is true, a Series is returned instead of an Expr.

  • dtype (Symbol) (defaults to: Int64)

    Apply an explicit integer dtype to the resulting expression (default is Int64).

Returns:



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# File 'lib/polars/functions/range/int_range.rb', line 31

def int_range(start = 0, stop = nil, step: 1, eager: false, dtype: Int64)
  if stop.nil?
    stop = start
    start = 0
  end

  start = Utils.parse_into_expression(start)
  stop = Utils.parse_into_expression(stop)
  dtype ||= Int64
  dtype = dtype.to_s if dtype.is_a?(Symbol)
  result = Utils.wrap_expr(Plr.int_range(start, stop, step, dtype)).alias("arange")

  if eager
    return select(result).to_series
  end

  result
end

#int_ranges(start = 0, stop = nil, step: 1, dtype: Int64, eager: false) ⇒ Expr, Series

Generate a range of integers for each row of the input columns.

Examples:

df = Polars::DataFrame.new({"start" => [1, -1], "end" => [3, 2]})
df.with_columns(int_range: Polars.int_ranges("start", "end"))
# =>
# shape: (2, 3)
# ┌───────┬─────┬────────────┐
# │ start ┆ end ┆ int_range  │
# │ ---   ┆ --- ┆ ---        │
# │ i64   ┆ i64 ┆ list[i64]  │
# ╞═══════╪═════╪════════════╡
# │ 1     ┆ 3   ┆ [1, 2]     │
# │ -1    ┆ 2   ┆ [-1, 0, 1] │
# └───────┴─────┴────────────┘

end can be omitted for a shorter syntax.

df.select("end", int_range: Polars.int_ranges("end"))
# =>
# shape: (2, 2)
# ┌─────┬───────────┐
# │ end ┆ int_range │
# │ --- ┆ ---       │
# │ i64 ┆ list[i64] │
# ╞═════╪═══════════╡
# │ 3   ┆ [0, 1, 2] │
# │ 2   ┆ [0, 1]    │
# └─────┴───────────┘

Parameters:

  • start (Integer, Expr, Series) (defaults to: 0)

    Start of the range (inclusive). Defaults to 0.

  • stop (Integer, Expr, Series) (defaults to: nil)

    End of the range (exclusive). If set to nil (default), the value of start is used and start is set to 0.

  • step (Integer) (defaults to: 1)

    Step size of the range.

  • dtype (Object) (defaults to: Int64)

    Integer data type of the ranges. Defaults to Int64.

  • eager (Boolean) (defaults to: false)

    Evaluate immediately and return a Series. If set to false (default), return an expression instead.

Returns:



94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# File 'lib/polars/functions/range/int_range.rb', line 94

def int_ranges(
  start = 0,
  stop = nil,
  step: 1,
  dtype: Int64,
  eager: false
)
  if stop.nil?
    stop = start
    start = 0
  end

  dtype_expr = Utils.parse_into_datatype_expr(dtype)
  start_rbexpr = Utils.parse_into_expression(start)
  end_rbexpr = Utils.parse_into_expression(stop)
  step_rbexpr = Utils.parse_into_expression(step)
  result = Utils.wrap_expr(
    Plr.int_ranges(
      start_rbexpr, end_rbexpr, step_rbexpr, dtype_expr._rbdatatype_expr
    )
  )

  if eager
    return F.select(result).to_series
  end

  result
end

#last(*columns) ⇒ Expr

Get the last value.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "baz"]
  }
)
df.select(Polars.last)
# =>
# shape: (3, 1)
# ┌─────┐
# │ c   │
# │ --- │
# │ str │
# ╞═════╡
# │ foo │
# │ bar │
# │ baz │
# └─────┘
df.select(Polars.last("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 3   │
# └─────┘
df.select(Polars.last("b", "c"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ b   ┆ c   │
# │ --- ┆ --- │
# │ i64 ┆ str │
# ╞═════╪═════╡
# │ 2   ┆ baz │
# └─────┴─────┘

Parameters:

  • columns (Array)

    One or more column names. If set to nil (default), returns an expression to take the last column of the context instead.

Returns:



545
546
547
548
549
550
551
# File 'lib/polars/functions/lazy.rb', line 545

def last(*columns)
  if columns.empty?
    return cs.last.as_expr
  end

  col(*columns).last
end

#lenExpr Also known as: length

Return the number of rows in the context.

This is similar to COUNT(*) in SQL.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, nil],
    "b" => [3, nil, nil],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.len)
# =>
# shape: (1, 1)
# ┌─────┐
# │ len │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 3   │
# └─────┘

Generate an index column by using len in conjunction with int_range.

df.select([
  Polars.int_range(Polars.len, dtype: Polars::UInt32).alias("index"),
  Polars.all
])
# =>
# shape: (3, 4)
# ┌───────┬──────┬──────┬─────┐
# │ index ┆ a    ┆ b    ┆ c   │
# │ ---   ┆ ---  ┆ ---  ┆ --- │
# │ u32   ┆ i64  ┆ i64  ┆ str │
# ╞═══════╪══════╪══════╪═════╡
# │ 0     ┆ 1    ┆ 3    ┆ foo │
# │ 1     ┆ 2    ┆ null ┆ bar │
# │ 2     ┆ null ┆ null ┆ foo │
# └───────┴──────┴──────┴─────┘

Returns:



44
45
46
# File 'lib/polars/functions/len.rb', line 44

def len
  Utils.wrap_expr(Plr.len)
end

#linear_spaces(start, stop, num_samples, closed: "both", as_array: false, eager: false) ⇒ Expr, Series

Note:

This functionality is experimental. It may be changed at any point without it being considered a breaking change.

Generate a sequence of evenly-spaced values for each row between start and end.

The number of values in each sequence is determined by num_samples.

Examples:

df = Polars::DataFrame.new({"start" => [1, -1], "end" => [3, 2], "num_samples" => [4, 5]})
df.with_columns(ls: Polars.linear_spaces("start", "end", "num_samples"))
# =>
# shape: (2, 4)
# ┌───────┬─────┬─────────────┬────────────────────────┐
# │ start ┆ end ┆ num_samples ┆ ls                     │
# │ ---   ┆ --- ┆ ---         ┆ ---                    │
# │ i64   ┆ i64 ┆ i64         ┆ list[f64]              │
# ╞═══════╪═════╪═════════════╪════════════════════════╡
# │ 1     ┆ 3   ┆ 4           ┆ [1.0, 1.666667, … 3.0] │
# │ -1    ┆ 2   ┆ 5           ┆ [-1.0, -0.25, … 2.0]   │
# └───────┴─────┴─────────────┴────────────────────────┘
df.with_columns(ls: Polars.linear_spaces("start", "end", 3, as_array: true))
# =>
# shape: (2, 4)
# ┌───────┬─────┬─────────────┬──────────────────┐
# │ start ┆ end ┆ num_samples ┆ ls               │
# │ ---   ┆ --- ┆ ---         ┆ ---              │
# │ i64   ┆ i64 ┆ i64         ┆ array[f64, 3]    │
# ╞═══════╪═════╪═════════════╪══════════════════╡
# │ 1     ┆ 3   ┆ 4           ┆ [1.0, 2.0, 3.0]  │
# │ -1    ┆ 2   ┆ 5           ┆ [-1.0, 0.5, 2.0] │
# └───────┴─────┴─────────────┴──────────────────┘

Parameters:

  • start (Object)

    Lower bound of the range.

  • stop (Object)

    Upper bound of the range.

  • num_samples (Integer)

    Number of samples in the output sequence.

  • closed ('both', 'left', 'right', 'none') (defaults to: "both")

    Define which sides of the interval are closed (inclusive).

  • as_array (Boolean) (defaults to: false)

    Return result as a fixed-length Array. num_samples must be a constant.

  • eager (Boolean) (defaults to: false)

    Evaluate immediately and return a Series. If set to false (default), return an expression instead.

Returns:



53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# File 'lib/polars/functions/range/linear_space.rb', line 53

def linear_spaces(
  start,
  stop,
  num_samples,
  closed: "both",
  as_array: false,
  eager: false
)
  start_rbexpr = Utils.parse_into_expression(start)
  end_rbexpr = Utils.parse_into_expression(stop)
  num_samples_rbexpr = Utils.parse_into_expression(num_samples)
  result = Utils.wrap_expr(
    Plr.linear_spaces(
      start_rbexpr, end_rbexpr, num_samples_rbexpr, closed, as_array
    )
  )

  if eager
    return F.select(result).to_series
  end

  result
end

#lit(value, dtype: nil, allow_object: false) ⇒ Expr

Return an expression representing a literal value.

Examples:

Literal scalar values:

Polars.lit(1)
Polars.lit(5.5)
Polars.lit(nil)
Polars.lit("foo_bar")
Polars.lit(Date.new(2021, 1, 20))
Polars.lit(DateTime.new(2023, 3, 31, 10, 30, 45))

Literal list/Series data (1D):

Polars.lit([1, 2, 3])
Polars.lit(Polars::Series.new("x", [1, 2, 3]))

Literal list/Series data (2D):

Polars.lit([[1, 2], [3, 4]])
Polars.lit(Polars::Series.new("y", [[1, 2], [3, 4]]))

Returns:



22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# File 'lib/polars/functions/lit.rb', line 22

def lit(value, dtype: nil, allow_object: false)
  if value.is_a?(::Time) || value.is_a?(::DateTime)
    time_unit = dtype&.time_unit || "ns"
    time_zone = dtype.&time_zone
    e = lit(Utils.datetime_to_int(value, time_unit)).cast(Datetime.new(time_unit))
    if time_zone
      return e.dt.replace_time_zone(time_zone.to_s)
    else
      return e
    end
  elsif value.is_a?(::Date)
    return lit(::Time.utc(value.year, value.month, value.day)).cast(Date)
  elsif value.is_a?(Polars::Series)
    value = value._s
    return Utils.wrap_expr(Plr.lit(value, allow_object, false))
  elsif (defined?(Numo::NArray) && value.is_a?(Numo::NArray)) || value.is_a?(::Array)
    return Utils.wrap_expr(Plr.lit(Series.new("literal", [value.to_a], dtype: dtype)._s, allow_object, true))
  elsif dtype
    return Utils.wrap_expr(Plr.lit(value, allow_object, true)).cast(dtype)
  end

  Utils.wrap_expr(Plr.lit(value, allow_object, true))
end

#map_batches(exprs, return_dtype: nil, is_elementwise: false, returns_scalar: false, &function) ⇒ Expr

Note:

This method is much slower than the native expressions API. Only use it if you cannot implement your logic otherwise.

Note:

A UDF passed to map_batches must be pure, meaning that it cannot modify or depend on state other than its arguments. We may call the function with arbitrary input data.

Map a custom function over multiple columns/expressions.

Produces a single Series result.

Examples:

test_func = lambda do |a, b, c|
  a + b + c
end
df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 3, 4],
    "b" => [4, 5, 6, 7]
  }
)

df.with_columns(
  (
    Polars.struct(["a", "b"]).map_batches { |x| test_func.(x.struct.field("a"), x.struct.field("b"), 1) }
  ).alias("a+b+c")
)
# =>
# shape: (4, 3)
# ┌─────┬─────┬───────┐
# │ a   ┆ b   ┆ a+b+c │
# │ --- ┆ --- ┆ ---   │
# │ i64 ┆ i64 ┆ i64   │
# ╞═════╪═════╪═══════╡
# │ 1   ┆ 4   ┆ 6     │
# │ 2   ┆ 5   ┆ 8     │
# │ 3   ┆ 6   ┆ 10    │
# │ 4   ┆ 7   ┆ 12    │
# └─────┴─────┴───────┘

Parameters:

  • exprs (Array)

    Expression(s) representing the input Series to the function.

  • return_dtype (Object) (defaults to: nil)

    Datatype of the output Series.

    It is recommended to set this whenever possible. If this is nil, it tries to infer the datatype by calling the function with dummy data and looking at the output.

  • is_elementwise (Boolean) (defaults to: false)

    Set to true if the operations is elementwise for better performance and optimization.

    An elementwise operations has unit or equal length for all inputs and can be ran sequentially on slices without results being affected.

  • returns_scalar (Boolean) (defaults to: false)

    If the function returns a scalar, by default it will be wrapped in a list in the output, since the assumption is that the function always returns something Series-like. If you want to keep the result as a scalar, set this argument to True.

Returns:



940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
# File 'lib/polars/functions/lazy.rb', line 940

def map_batches(
  exprs,
  return_dtype: nil,
  is_elementwise: false,
  returns_scalar: false,
  &function
)
  rbexprs = Utils.parse_into_list_of_expressions(exprs)

  return_dtype_expr =
    if !return_dtype.nil?
      Utils.parse_into_datatype_expr(return_dtype)._rbdatatype_expr
    else
      nil
    end

  Utils.wrap_expr(
    Plr.map_expr(
      rbexprs,
      _map_batches_wrapper(function, returns_scalar: returns_scalar),
      return_dtype_expr,
      is_elementwise,
      returns_scalar
    )
  )
end

#map_groups(exprs, return_dtype: nil, is_elementwise: false, returns_scalar: false, &function) ⇒ Expr

Note:

This method is much slower than the native expressions API. Only use it if you cannot implement your logic otherwise.

Apply a custom/user-defined function (UDF) in a GroupBy context.

Examples:

df = Polars::DataFrame.new(
  {
    "group" => [1, 1, 2],
    "a" => [1, 3, 3],
    "b" => [5, 6, 7]
  }
)
(
  df.group_by("group").agg(
    Polars.map_groups(["a", "b"], return_dtype: Polars::Float64) { |list_of_series| list_of_series[0] / list_of_series[0].sum + list_of_series[1] }
    .alias("my_custom_aggregation")
  )
).sort("group")
# =>
# shape: (2, 2)
# ┌───────┬───────────────────────┐
# │ group ┆ my_custom_aggregation │
# │ ---   ┆ ---                   │
# │ i64   ┆ list[f64]             │
# ╞═══════╪═══════════════════════╡
# │ 1     ┆ [5.25, 6.75]          │
# │ 2     ┆ [8.0]                 │
# └───────┴───────────────────────┘

Parameters:

  • exprs (Object)

    Expression(s) representing the input Series to the function.

  • return_dtype (Object) (defaults to: nil)

    Datatype of the output Series.

    It is recommended to set this whenever possible. If this is nil, it tries to infer the datatype by calling the function with dummy data and looking at the output.

  • is_elementwise (Boolean) (defaults to: false)

    Set to true if the operations is elementwise for better performance and optimization.

    An elementwise operations has unit or equal length for all inputs and can be ran sequentially on slices without results being affected.

  • returns_scalar (Boolean) (defaults to: false)

    If the function returns a single scalar as output.

Returns:



1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
# File 'lib/polars/functions/lazy.rb', line 1016

def map_groups(
  exprs,
  return_dtype: nil,
  is_elementwise: false,
  returns_scalar: false,
  &function
)
  map_batches(
    exprs,
    return_dtype: return_dtype,
    is_elementwise: is_elementwise,
    returns_scalar: returns_scalar,
    &function
  )
end

#max(*names) ⇒ Expr

Get the maximum value.

Syntactic sugar for col(names).max.

Examples:

Get the maximum value of a column.

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.max("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 8   │
# └─────┘

Get the maximum value of multiple columns.

df.select(Polars.max("^a|b$"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 8   ┆ 5   │
# └─────┴─────┘
df.select(Polars.max("a", "b"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 8   ┆ 5   │
# └─────┴─────┘

Parameters:

  • names (Array)

    Name(s) of the columns to use in the aggregation.

Returns:



135
136
137
# File 'lib/polars/functions/aggregation/vertical.rb', line 135

def max(*names)
  col(*names).max
end

#max_horizontal(*exprs) ⇒ Expr

Get the maximum value horizontally across columns.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, nil],
    "c" => ["x", "y", "z"]
  }
)
df.with_columns(max: Polars.max_horizontal("a", "b"))
# =>
# shape: (3, 4)
# ┌─────┬──────┬─────┬─────┐
# │ a   ┆ b    ┆ c   ┆ max │
# │ --- ┆ ---  ┆ --- ┆ --- │
# │ i64 ┆ i64  ┆ str ┆ i64 │
# ╞═════╪══════╪═════╪═════╡
# │ 1   ┆ 4    ┆ x   ┆ 4   │
# │ 8   ┆ 5    ┆ y   ┆ 8   │
# │ 3   ┆ null ┆ z   ┆ 3   │
# └─────┴──────┴─────┴─────┘

Parameters:

  • exprs (Array)

    Column(s) to use in the aggregation. Accepts expression input. Strings are parsed as column names, other non-expression inputs are parsed as literals.

Returns:



103
104
105
106
# File 'lib/polars/functions/aggregation/horizontal.rb', line 103

def max_horizontal(*exprs)
  rbexprs = Utils.parse_into_list_of_expressions(*exprs)
  Utils.wrap_expr(Plr.max_horizontal(rbexprs))
end

#mean(*columns) ⇒ Expr

Get the mean value.

This function is syntactic sugar for col(columns).mean.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.mean("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 4.0 │
# └─────┘
df.select(Polars.mean("a", "b"))
# =>
# shape: (1, 2)
# ┌─────┬──────────┐
# │ a   ┆ b        │
# │ --- ┆ ---      │
# │ f64 ┆ f64      │
# ╞═════╪══════════╡
# │ 4.0 ┆ 3.666667 │
# └─────┴──────────┘

Parameters:

  • columns (Array)

    One or more column names.

Returns:



299
300
301
# File 'lib/polars/functions/lazy.rb', line 299

def mean(*columns)
  col(*columns).mean
end

#mean_horizontal(*exprs, ignore_nulls: true) ⇒ Expr

Compute the mean of all values horizontally across columns.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, nil],
    "c" => ["x", "y", "z"]
  }
)
df.with_columns(mean: Polars.mean_horizontal("a", "b"))
# =>
# shape: (3, 4)
# ┌─────┬──────┬─────┬──────┐
# │ a   ┆ b    ┆ c   ┆ mean │
# │ --- ┆ ---  ┆ --- ┆ ---  │
# │ i64 ┆ i64  ┆ str ┆ f64  │
# ╞═════╪══════╪═════╪══════╡
# │ 1   ┆ 4    ┆ x   ┆ 2.5  │
# │ 8   ┆ 5    ┆ y   ┆ 6.5  │
# │ 3   ┆ null ┆ z   ┆ 3.0  │
# └─────┴──────┴─────┴──────┘

Parameters:

  • exprs (Array)

    Column(s) to use in the aggregation. Accepts expression input. Strings are parsed as column names, other non-expression inputs are parsed as literals.

  • ignore_nulls (Boolean) (defaults to: true)

    Ignore null values (default). If set to false, any null value in the input will lead to a null output.

Returns:



208
209
210
211
# File 'lib/polars/functions/aggregation/horizontal.rb', line 208

def mean_horizontal(*exprs, ignore_nulls: true)
  rbexprs = Utils.parse_into_list_of_expressions(*exprs)
  Utils.wrap_expr(Plr.mean_horizontal(rbexprs, ignore_nulls))
end

#median(*columns) ⇒ Expr

Get the median value.

This function is syntactic sugar for pl.col(columns).median.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.median("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 3.0 │
# └─────┘
df.select(Polars.median("a", "b"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ f64 ┆ f64 │
# ╞═════╪═════╡
# │ 3.0 ┆ 4.0 │
# └─────┴─────┘

Parameters:

  • columns (Array)

    One or more column names.

Returns:



342
343
344
# File 'lib/polars/functions/lazy.rb', line 342

def median(*columns)
  col(*columns).median
end

#min(*names) ⇒ Expr

Get the minimum value.

Syntactic sugar for col(names).min.

Examples:

Get the minimum value of a column.

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.min("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# └─────┘

Get the minimum value of multiple columns.

df.select(Polars.min("^a|b$"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 1   ┆ 2   │
# └─────┴─────┘
df.select(Polars.min("a", "b"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 1   ┆ 2   │
# └─────┴─────┘

Parameters:

  • names (Array)

    Name(s) of the columns to use in the aggregation.

Returns:



190
191
192
# File 'lib/polars/functions/aggregation/vertical.rb', line 190

def min(*names)
  col(*names).min
end

#min_horizontal(*exprs) ⇒ Expr

Get the minimum value horizontally across columns.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, nil],
    "c" => ["x", "y", "z"]
  }
)
df.with_columns(min: Polars.min_horizontal("a", "b"))
# =>
# shape: (3, 4)
# ┌─────┬──────┬─────┬─────┐
# │ a   ┆ b    ┆ c   ┆ min │
# │ --- ┆ ---  ┆ --- ┆ --- │
# │ i64 ┆ i64  ┆ str ┆ i64 │
# ╞═════╪══════╪═════╪═════╡
# │ 1   ┆ 4    ┆ x   ┆ 1   │
# │ 8   ┆ 5    ┆ y   ┆ 5   │
# │ 3   ┆ null ┆ z   ┆ 3   │
# └─────┴──────┴─────┴─────┘

Parameters:

  • exprs (Array)

    Column(s) to use in the aggregation. Accepts expression input. Strings are parsed as column names, other non-expression inputs are parsed as literals.

Returns:



136
137
138
139
# File 'lib/polars/functions/aggregation/horizontal.rb', line 136

def min_horizontal(*exprs)
  rbexprs = Utils.parse_into_list_of_expressions(*exprs)
  Utils.wrap_expr(Plr.min_horizontal(rbexprs))
end

#n_unique(*columns) ⇒ Expr

Count unique values.

This function is syntactic sugar for col(columns).n_unique.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 1],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.n_unique("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 2   │
# └─────┘
df.select(Polars.n_unique("b", "c"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ b   ┆ c   │
# │ --- ┆ --- │
# │ u32 ┆ u32 │
# ╞═════╪═════╡
# │ 3   ┆ 2   │
# └─────┴─────┘

Parameters:

  • columns (Array)

    One or more column names.

Returns:



385
386
387
# File 'lib/polars/functions/lazy.rb', line 385

def n_unique(*columns)
  col(*columns).n_unique
end

#nth(*indices, strict: true) ⇒ Expr

Get the nth column(s) of the context.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "baz"]
  }
)
df.select(Polars.nth(1))
# =>
# shape: (3, 1)
# ┌─────┐
# │ b   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 4   │
# │ 5   │
# │ 2   │
# └─────┘
df.select(Polars.nth(2, 0))
# =>
# shape: (3, 2)
# ┌─────┬─────┐
# │ c   ┆ a   │
# │ --- ┆ --- │
# │ str ┆ i64 │
# ╞═════╪═════╡
# │ foo ┆ 1   │
# │ bar ┆ 8   │
# │ baz ┆ 3   │
# └─────┴─────┘

Parameters:

  • indices (Array)

    One or more indices representing the columns to retrieve.

Returns:



594
595
596
# File 'lib/polars/functions/lazy.rb', line 594

def nth(*indices, strict: true)
  cs.by_index(*indices, require_all: strict).as_expr
end

#ones(n, dtype: Float64, eager: false) ⇒ Object

Construct a column of length n filled with ones.

This is syntactic sugar for the repeat function.

Examples:

Polars.ones(3, dtype: Polars::Int8, eager: true)
# =>
# shape: (3,)
# Series: 'ones' [i8]
# [
#         1
#         1
#         1
# ]

Parameters:

  • n (Integer)

    Length of the resulting column.

  • dtype (Object) (defaults to: Float64)

    Data type of the resulting column. Defaults to Float64.

  • eager (Boolean) (defaults to: false)

    Evaluate immediately and return a Series. If set to false, return an expression instead.

Returns:



76
77
78
79
80
81
82
83
# File 'lib/polars/functions/repeat.rb', line 76

def ones(n, dtype: Float64, eager: false)
  if (zero = _one_or_zero_by_dtype(1, dtype)).nil?
    msg = "invalid dtype for `ones`; found #{dtype}"
    raise TypeError, msg
  end

  repeat(zero, n, dtype: dtype, eager: eager).alias("ones")
end

#quantile(column, quantile, interpolation: "nearest") ⇒ Expr

Syntactic sugar for Polars.col("foo").quantile(...).

Parameters:

  • column (String)

    Column name.

  • quantile (Float)

    Quantile between 0.0 and 1.0.

  • interpolation ("nearest", "higher", "lower", "midpoint", "linear") (defaults to: "nearest")

    Interpolation method.

Returns:



1429
1430
1431
# File 'lib/polars/functions/lazy.rb', line 1429

def quantile(column, quantile, interpolation: "nearest")
  col(column).quantile(quantile, interpolation: interpolation)
end

#reduce(exprs, returns_scalar: false, return_dtype: nil, &function) ⇒ Expr

Accumulate over multiple columns horizontally/ row wise with a left fold.

Examples:

Horizontally sum over all columns.

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 3],
    "b" => [0, 1, 2]
  }
)
df.select(
  Polars.reduce(Polars.col("*")) { |acc, x| acc + x }.alias("sum")
)
# =>
# shape: (3, 1)
# ┌─────┐
# │ sum │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 3   │
# │ 5   │
# └─────┘

Parameters:

  • exprs (Object)

    Expressions to aggregate over. May also be a wildcard expression.

  • returns_scalar (Boolean) (defaults to: false)

    Whether or not function applied returns a scalar. This must be set correctly by the user.

  • return_dtype (Object) (defaults to: nil)

    Output datatype. If not set, the dtype will be inferred based on the dtype of the input expressions.

Returns:



1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
# File 'lib/polars/functions/lazy.rb', line 1142

def reduce(
  exprs,
  returns_scalar: false,
  return_dtype: nil,
  &function
)
  if exprs.is_a?(Expr)
    exprs = [exprs]
  end

  rt = nil
  if !return_dtype.nil?
    rt = Utils.parse_into_datatype_expr(return_dtype)._rbdatatype_expr
  end

  rbexprs = Utils.parse_into_list_of_expressions(exprs)
  Utils.wrap_expr(
    Plr.reduce(
      _wrap_acc_lambda(function),
      rbexprs,
      returns_scalar,
      rt
    )
  )
end

#repeat(value, n, dtype: nil, eager: false) ⇒ Object

Repeat a single value n times.

Examples:

Construct a column with a repeated value in a lazy context.

Polars.select(Polars.repeat("z", 3)).to_series
# =>
# shape: (3,)
# Series: 'repeat' [str]
# [
#         "z"
#         "z"
#         "z"
# ]

Generate a Series directly by setting eager: true.

Polars.repeat(3, 3, dtype: Polars::Int8, eager: true)
# =>
# shape: (3,)
# Series: 'repeat' [i8]
# [
#         3
#         3
#         3
# ]

Parameters:

  • value (Object)

    Value to repeat.

  • n (Integer)

    Repeat n times.

  • dtype (Object) (defaults to: nil)

    Data type of the resulting column. If set to nil (default), data type is inferred from the given value. Defaults to Int32 for integer values, unless Int64 is required to fit the given value. Defaults to Float64 for float values.

  • eager (Boolean) (defaults to: false)

    Run eagerly and collect into a Series.

Returns:



39
40
41
42
43
44
45
46
47
48
49
50
# File 'lib/polars/functions/repeat.rb', line 39

def repeat(value, n, dtype: nil, eager: false)
  if n.is_a?(Integer)
    n = lit(n)
  end

  value = Utils.parse_into_expression(value, str_as_lit: true)
  expr = Utils.wrap_expr(Plr.repeat(value, n._rbexpr, dtype))
  if eager
    return select(expr).to_series
  end
  expr
end

#rolling_corr(a, b, window_size:, min_samples: nil, ddof: 1) ⇒ Expr

Compute the rolling correlation between two columns/ expressions.

The window at a given row includes the row itself and the window_size - 1 elements before it.

Parameters:

  • a (Object)

    Column name or Expression.

  • b (Object)

    Column name or Expression.

  • window_size (Integer)

    The length of the window.

  • min_samples (Integer) (defaults to: nil)

    The number of values in the window that should be non-null before computing a result. If nil, it will be set equal to window size.

  • ddof (Integer) (defaults to: 1)

    Delta degrees of freedom. The divisor used in calculations is N - ddof, where N represents the number of elements.

Returns:



1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
# File 'lib/polars/functions/lazy.rb', line 1829

def rolling_corr(
  a,
  b,
  window_size:,
  min_samples: nil,
  ddof: 1
)
  if min_samples.nil?
    min_samples = window_size
  end
  if Utils.strlike?(a)
    a = F.col(a)
  end
  if Utils.strlike?(b)
    b = F.col(b)
  end
  Utils.wrap_expr(
    Plr.rolling_corr(a._rbexpr, b._rbexpr, window_size, min_samples, ddof)
  )
end

#rolling_cov(a, b, window_size:, min_samples: nil, ddof: 1) ⇒ Expr

Compute the rolling covariance between two columns/ expressions.

The window at a given row includes the row itself and the window_size - 1 elements before it.

Parameters:

  • a (Object)

    Column name or Expression.

  • b (Object)

    Column name or Expression.

  • window_size (Integer)

    The length of the window.

  • min_samples (Integer) (defaults to: nil)

    The number of values in the window that should be non-null before computing a result. If nil, it will be set equal to window size.

  • ddof (Integer) (defaults to: 1)

    Delta degrees of freedom. The divisor used in calculations is N - ddof, where N represents the number of elements.

Returns:



1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
# File 'lib/polars/functions/lazy.rb', line 1789

def rolling_cov(
  a,
  b,
  window_size:,
  min_samples: nil,
  ddof: 1
)
  if min_samples.nil?
    min_samples = window_size
  end
  if Utils.strlike?(a)
    a = F.col(a)
  end
  if Utils.strlike?(b)
    b = F.col(b)
  end
  Utils.wrap_expr(
    Plr.rolling_cov(a._rbexpr, b._rbexpr, window_size, min_samples, ddof)
  )
end

#select(*exprs, eager: true, **named_exprs) ⇒ DataFrame

Run polars expressions without a context.

This is syntactic sugar for running df.select on an empty DataFrame.

Examples:

foo = Polars::Series.new("foo", [1, 2, 3])
bar = Polars::Series.new("bar", [3, 2, 1])
Polars.select(min: Polars.min_horizontal(foo, bar))
# =>
# shape: (3, 1)
# ┌─────┐
# │ min │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 2   │
# │ 1   │
# └─────┘

Parameters:

  • exprs (Array)

    Column(s) to select, specified as positional arguments. Accepts expression input. Strings are parsed as column names, other non-expression inputs are parsed as literals.

  • eager (Boolean) (defaults to: true)

    Evaluate immediately and return a DataFrame (default); if set to false, return a LazyFrame instead.

  • named_exprs (Hash)

    Additional columns to select, specified as keyword arguments. The columns will be renamed to the keyword used.

Returns:



1603
1604
1605
1606
# File 'lib/polars/functions/lazy.rb', line 1603

def select(*exprs, eager: true, **named_exprs)
  empty_frame = eager ? Polars::DataFrame.new : Polars::LazyFrame.new
  empty_frame.select(*exprs, **named_exprs)
end

#self_dtypeDataTypeExpr

Note:

This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.

Get the dtype of self in map_elements and map_batches.

Returns:



28
29
30
# File 'lib/polars/functions/datatype.rb', line 28

def self_dtype
  DataTypeExpr._from_rbdatatype_expr(RbDataTypeExpr.self_dtype)
end

#set_random_seed(seed) ⇒ nil

Set the global random seed for Polars.

This random seed is used to determine things such as shuffle ordering.

Parameters:

  • seed (Integer)

    A non-negative integer < 2**64 used to seed the internal global random number generator.

Returns:

  • (nil)


12
13
14
# File 'lib/polars/functions/random.rb', line 12

def set_random_seed(seed)
  Plr.set_random_seed(seed)
end

#sql_expr(sql) ⇒ Expr

Parse one or more SQL expressions to polars expression(s).

Examples:

Parse a single SQL expression:

df = Polars::DataFrame.new({"a" => [2, 1]})
expr = Polars.sql_expr("MAX(a)")
df.select(expr)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 2   │
# └─────┘

Parse multiple SQL expressions:

df.with_columns(
  *Polars.sql_expr(["POWER(a,a) AS a_a", "CAST(a AS TEXT) AS a_txt"])
)
# =>
# shape: (2, 3)
# ┌─────┬─────┬───────┐
# │ a   ┆ a_a ┆ a_txt │
# │ --- ┆ --- ┆ ---   │
# │ i64 ┆ i64 ┆ str   │
# ╞═════╪═════╪═══════╡
# │ 2   ┆ 4   ┆ 2     │
# │ 1   ┆ 1   ┆ 1     │
# └─────┴─────┴───────┘

Parameters:

  • sql (Object)

    One or more SQL expressions.

Returns:



1885
1886
1887
1888
1889
1890
1891
# File 'lib/polars/functions/lazy.rb', line 1885

def sql_expr(sql)
  if sql.is_a?(::String)
    Utils.wrap_expr(Plr.sql_expr(sql))
  else
    sql.map { |q| Utils.wrap_expr(Plr.sql_expr(q)) }
  end
end

#std(column, ddof: 1) ⇒ Expr

Get the standard deviation.

This function is syntactic sugar for col(column).std(ddof: ddof).

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.std("a"))
# =>
# shape: (1, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 3.605551 │
# └──────────┘
df["a"].std
# => 3.605551275463989

Parameters:

  • column (Object)

    Column name.

  • ddof (Integer) (defaults to: 1)

    “Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1.

Returns:



216
217
218
# File 'lib/polars/functions/lazy.rb', line 216

def std(column, ddof: 1)
  col(column).std(ddof: ddof)
end

#struct(*exprs, schema: nil, eager: false, **named_exprs) ⇒ Object

Collect several columns into a Series of dtype Struct.

Examples:

df = Polars::DataFrame.new(
  {
    "int" => [1, 2],
    "str" => ["a", "b"],
    "bool" => [true, nil],
    "list" => [[1, 2], [3]],
  }
)
df.select([Polars.struct(Polars.all).alias("my_struct")])
# =>
# shape: (2, 1)
# ┌─────────────────────┐
# │ my_struct           │
# │ ---                 │
# │ struct[4]           │
# ╞═════════════════════╡
# │ {1,"a",true,[1, 2]} │
# │ {2,"b",null,[3]}    │
# └─────────────────────┘

Collect selected columns into a struct by either passing a list of columns, or by specifying each column as a positional argument.

df.select(Polars.struct("int", false).alias("my_struct"))
# =>
# shape: (2, 1)
# ┌───────────┐
# │ my_struct │
# │ ---       │
# │ struct[2] │
# ╞═══════════╡
# │ {1,false} │
# │ {2,false} │
# └───────────┘

Use keyword arguments to easily name each struct field.

df.select(Polars.struct(p: "int", q: "bool").alias("my_struct")).schema
# => Polars::Schema({"my_struct"=>Polars::Struct({"p"=>Polars::Int64, "q"=>Polars::Boolean})})

Parameters:

  • exprs (Array)

    Column(s) to collect into a struct column, specified as positional arguments. Accepts expression input. Strings are parsed as column names, other non-expression inputs are parsed as literals.

  • schema (Hash) (defaults to: nil)

    Optional schema that explicitly defines the struct field dtypes. If no columns or expressions are provided, schema keys are used to define columns.

  • eager (Boolean) (defaults to: false)

    Evaluate immediately and return a Series. If set to false (default), return an expression instead.

  • named_exprs (Hash)

    Additional columns to collect into the struct column, specified as keyword arguments. The columns will be renamed to the keyword used.

Returns:



477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
# File 'lib/polars/functions/as_datatype.rb', line 477

def struct(*exprs, schema: nil, eager: false, **named_exprs)
  rbexprs = Utils.parse_into_list_of_expressions(*exprs, **named_exprs)
  expr = Utils.wrap_expr(Plr.as_struct(rbexprs))

  if !schema.nil? && !schema.empty?
    if !exprs.any?
      # no columns or expressions provided; create one from schema keys
      expr =
        Utils.wrap_expr(
          Plr.as_struct(Utils.parse_into_list_of_expressions(schema.keys))
        )
      expr = expr.cast(Struct.new(schema), strict: false)
    end
  end

  if eager
    Polars.select(expr).to_series
  else
    expr
  end
end

#sum(*names) ⇒ Expr

Sum all values.

Syntactic sugar for col(name).sum.

Examples:

Sum a column.

df = Polars::DataFrame.new(
  {
    "a" => [1, 2],
    "b" => [3, 4],
    "c" => [5, 6]
  }
)
df.select(Polars.sum("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 3   │
# └─────┘

Sum multiple columns.

df.select(Polars.sum("a", "c"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ c   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 3   ┆ 11  │
# └─────┴─────┘
df.select(Polars.sum("^.*[bc]$"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ b   ┆ c   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 7   ┆ 11  │
# └─────┴─────┘

Parameters:

  • names (Array)

    Name(s) of the columns to use in the aggregation.

Returns:



245
246
247
# File 'lib/polars/functions/aggregation/vertical.rb', line 245

def sum(*names)
  col(*names).sum
end

#sum_horizontal(*exprs, ignore_nulls: true) ⇒ Expr

Sum all values horizontally across columns.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, nil],
    "c" => ["x", "y", "z"]
  }
)
df.with_columns(sum: Polars.sum_horizontal("a", "b"))
# =>
# shape: (3, 4)
# ┌─────┬──────┬─────┬─────┐
# │ a   ┆ b    ┆ c   ┆ sum │
# │ --- ┆ ---  ┆ --- ┆ --- │
# │ i64 ┆ i64  ┆ str ┆ i64 │
# ╞═════╪══════╪═════╪═════╡
# │ 1   ┆ 4    ┆ x   ┆ 5   │
# │ 8   ┆ 5    ┆ y   ┆ 13  │
# │ 3   ┆ null ┆ z   ┆ 3   │
# └─────┴──────┴─────┴─────┘

Parameters:

  • exprs (Array)

    Column(s) to use in the aggregation. Accepts expression input. Strings are parsed as column names, other non-expression inputs are parsed as literals.

  • ignore_nulls (Boolean) (defaults to: true)

    Ignore null values (default). If set to false, any null value in the input will lead to a null output.

Returns:



172
173
174
175
# File 'lib/polars/functions/aggregation/horizontal.rb', line 172

def sum_horizontal(*exprs, ignore_nulls: true)
  rbexprs = Utils.parse_into_list_of_expressions(*exprs)
  Utils.wrap_expr(Plr.sum_horizontal(rbexprs, ignore_nulls))
end

#tail(column, n = 10) ⇒ Expr

Get the last n rows.

This function is syntactic sugar for col(column).tail(n).

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.tail("a"))
# =>
# shape: (3, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 8   │
# │ 3   │
# └─────┘
df.select(Polars.tail("a", 2))
# =>
# shape: (2, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 8   │
# │ 3   │
# └─────┘

Parameters:

  • column (Object)

    Column name.

  • n (Integer) (defaults to: 10)

    Number of rows to return.

Returns:



690
691
692
# File 'lib/polars/functions/lazy.rb', line 690

def tail(column, n = 10)
  col(column).tail(n)
end

#time(hour = nil, minute = nil, second = nil, microsecond = nil) ⇒ Expr

Create a Polars literal expression of type Time.

Examples:

df = Polars::DataFrame.new(
  {
    "hour" => [12, 13, 14],
    "minute" => [15, 30, 45]
  }
)
df.with_columns(Polars.time(Polars.col("hour"), Polars.col("minute")))
# =>
# shape: (3, 3)
# ┌──────┬────────┬──────────┐
# │ hour ┆ minute ┆ time     │
# │ ---  ┆ ---    ┆ ---      │
# │ i64  ┆ i64    ┆ time     │
# ╞══════╪════════╪══════════╡
# │ 12   ┆ 15     ┆ 12:15:00 │
# │ 13   ┆ 30     ┆ 13:30:00 │
# │ 14   ┆ 45     ┆ 14:45:00 │
# └──────┴────────┴──────────┘

Parameters:

  • hour (Object) (defaults to: nil)

    column or literal, ranging from 0-23.

  • minute (Object) (defaults to: nil)

    column or literal, ranging from 0-59.

  • second (Object) (defaults to: nil)

    column or literal, ranging from 0-59.

  • microsecond (Object) (defaults to: nil)

    column or literal, ranging from 0-999999.

Returns:



219
220
221
222
223
224
225
226
227
228
229
# File 'lib/polars/functions/as_datatype.rb', line 219

def time(
  hour = nil,
  minute = nil,
  second = nil,
  microsecond = nil
)
  epoch_start = [1970, 1, 1]
  datetime(*epoch_start, hour, minute, second, microsecond)
    .cast(Time)
    .alias("time")
end

#time_range(start = nil, stop = nil, interval = "1h", closed: "both", eager: false) ⇒ Object

Generate a time range.

Examples:

Polars.time_range(
  Time.utc(2000, 1, 1, 14, 0),
  nil,
  "3h15m",
  eager: true
).alias("time")
# =>
# shape: (4,)
# Series: 'time' [time]
# [
#         14:00:00
#         17:15:00
#         20:30:00
#         23:45:00
# ]

Parameters:

  • start (Object) (defaults to: nil)

    Lower bound of the time range.

  • stop (Object) (defaults to: nil)

    Upper bound of the time range.

  • interval (String) (defaults to: "1h")

    Interval of the range periods, specified using the Polars duration string language.

  • closed ('both', 'left', 'right', 'none') (defaults to: "both")

    Define which sides of the range are closed (inclusive).

  • eager (Boolean) (defaults to: false)

    Evaluate immediately and return a Series. If set to false (default), return an expression instead.

Returns:



35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# File 'lib/polars/functions/range/time_range.rb', line 35

def time_range(
  start = nil,
  stop = nil,
  interval = "1h",
  closed: "both",
  eager: false
)
  interval = Utils.parse_interval_argument(interval)
  ["y", "mo", "w", "d"].each do |unit|
    if interval.include?(unit)
      msg = "invalid interval unit for time_range: found #{unit.inspect}"
      raise ArgumentError, msg
    end
  end

  if start.nil?
    # date part is ignored
    start = ::Time.utc(2000, 1, 1, 0, 0, 0)
  end
  if stop.nil?
    # date part is ignored
    stop = ::Time.utc(2000, 1, 1, 23, 59, 59, 999999)
  end

  start_rbexpr = Utils.parse_into_expression(start)
  end_rbexpr = Utils.parse_into_expression(stop)

  result = Utils.wrap_expr(Plr.time_range(start_rbexpr, end_rbexpr, interval, closed))

  if eager
    return Polars.select(result).to_series
  end

  result
end

#time_ranges(start = nil, stop = nil, interval = "1h", closed: "both", eager: false) ⇒ Object

Create a column of time ranges.

Examples:

df = Polars::DataFrame.new(
  {
    "start" => [Time.utc(2000, 1, 1, 9, 0), Time.utc(2000, 1, 1, 10, 0)],
    "end" => Time.utc(2000, 1, 1, 11, 0)
  }
)
df.select(time_range: Polars.time_ranges("start", "end"))
# =>
# shape: (2, 1)
# ┌────────────────────────────────┐
# │ time_range                     │
# │ ---                            │
# │ list[time]                     │
# ╞════════════════════════════════╡
# │ [09:00:00, 10:00:00, 11:00:00] │
# │ [10:00:00, 11:00:00]           │
# └────────────────────────────────┘

Parameters:

  • start (Object) (defaults to: nil)

    Lower bound of the time range.

  • stop (Object) (defaults to: nil)

    Upper bound of the time range.

  • interval (Integer) (defaults to: "1h")

    Interval of the range periods, specified using the Polars duration string language.

  • closed ('both', 'left', 'right', 'none') (defaults to: "both")

    Define which sides of the range are closed (inclusive).

  • eager (Boolean) (defaults to: false)

    Evaluate immediately and return a Series. If set to false (default), return an expression instead.

Returns:



105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# File 'lib/polars/functions/range/time_range.rb', line 105

def time_ranges(
  start = nil,
  stop = nil,
  interval = "1h",
  closed: "both",
  eager: false
)
  interval = Utils.parse_interval_argument(interval)
  ["y", "mo", "w", "d"].each do |unit|
    if interval.include?(unit)
      msg = "invalid interval unit for time_range: found #{unit.inspect}"
      raise ArgumentError, msg
    end
  end

  if start.nil?
    # date part is ignored
    start = ::Time.utc(2000, 1, 1, 0, 0, 0)
  end
  if stop.nil?
    # date part is ignored
    stop = ::Time.utc(2000, 1, 1, 23, 59, 59, 999999)
  end

  start_rbexpr = Utils.parse_into_expression(start)
  end_rbexpr = Utils.parse_into_expression(stop)

  result = Utils.wrap_expr(Plr.time_ranges(start_rbexpr, end_rbexpr, interval, closed))

  if eager
    return Polars.select(result).to_series
  end

  result
end

#union(items, how: "vertical", strict: false) ⇒ Object

Note:

This function does not guarantee any specific ordering of rows in the result. If you need predictable row ordering, use Polars.concat instead.

Combine multiple DataFrames, LazyFrames, or Series into a single object.

Examples:

df1 = Polars::DataFrame.new({"a" => [1], "b" => [3]})
df2 = Polars::DataFrame.new({"a" => [2], "b" => [4]})
Polars.union([df1, df2])
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 1   ┆ 3   │
# │ 2   ┆ 4   │
# └─────┴─────┘
df1 = Polars::DataFrame.new({"a" => [1], "b" => [3]})
df2 = Polars::DataFrame.new({"a" => [2.5], "b" => [4]})
Polars.union([df1, df2], how: "vertical_relaxed")
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ f64 ┆ i64 │
# ╞═════╪═════╡
# │ 1.0 ┆ 3   │
# │ 2.5 ┆ 4   │
# └─────┴─────┘
df_h1 = Polars::DataFrame.new({"l1" => [1, 2], "l2" => [3, 4]})
df_h2 = Polars::DataFrame.new({"r1" => [5, 6], "r2" => [7, 8], "r3" => [9, 10]})
Polars.union([df_h1, df_h2], how: "horizontal")
# =>
# shape: (2, 5)
# ┌─────┬─────┬─────┬─────┬─────┐
# │ l1  ┆ l2  ┆ r1  ┆ r2  ┆ r3  │
# │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
# │ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 │
# ╞═════╪═════╪═════╪═════╪═════╡
# │ 1   ┆ 3   ┆ 5   ┆ 7   ┆ 9   │
# │ 2   ┆ 4   ┆ 6   ┆ 8   ┆ 10  │
# └─────┴─────┴─────┴─────┴─────┘

The "diagonal" strategy allows for some frames to have missing columns, the values for which are filled with null:

df_d1 = Polars::DataFrame.new({"a" => [1], "b" => [3]})
df_d2 = Polars::DataFrame.new({"a" => [2], "c" => [4]})
Polars.union([df_d1, df_d2], how: "diagonal")
# =>
# shape: (2, 3)
# ┌─────┬──────┬──────┐
# │ a   ┆ b    ┆ c    │
# │ --- ┆ ---  ┆ ---  │
# │ i64 ┆ i64  ┆ i64  │
# ╞═════╪══════╪══════╡
# │ 1   ┆ 3    ┆ null │
# │ 2   ┆ null ┆ 4    │
# └─────┴──────┴──────┘

Parameters:

  • items (Array)

    DataFrames, LazyFrames, or Series to concatenate.

  • how ('vertical', 'vertical_relaxed', 'diagonal', 'diagonal_relaxed', 'horizontal', 'align', 'align_full', 'align_inner', 'align_left', 'align_right') (defaults to: "vertical")

    Note that Series only support the vertical strategy.

    • vertical: Applies multiple vstack operations.
    • vertical_relaxed: Same as vertical, but additionally coerces columns to their common supertype if they are mismatched (eg: Int32 → Int64).
    • diagonal: Finds a union between the column schemas and fills missing column values with null.
    • diagonal_relaxed: Same as diagonal, but additionally coerces columns to their common supertype if they are mismatched (eg: Int32 → Int64).
    • horizontal: Stacks Series from DataFrames horizontally and fills with null if the lengths don't match.
    • align, align_full, align_left, align_right: Combines frames horizontally, auto-determining the common key columns and aligning rows using the same logic as align_frames (note that "align" is an alias for "align_full"). The "align" strategy determines the type of join used to align the frames, equivalent to the "how" parameter on align_frames. Note that the common join columns are automatically coalesced, but other column collisions will raise an error (if you need more control over this you should use a suitable join method directly).
  • strict (Boolean) (defaults to: false)

    When how=horizontal, require all DataFrames to be the same height, raising an error if not.

Returns:



303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
# File 'lib/polars/functions/eager.rb', line 303

def union(
  items,
  how: "vertical",
  strict: false
)
  elems = items.to_a

  if elems.empty?
    msg = "cannot concat empty list"
    raise ArgumentError, msg
  elsif elems.length == 1 && (elems[0].is_a?(DataFrame) || elems[0].is_a?(Series) || elems[0].is_a?(LazyFrame))
    return elems[0]
  end

  if how.start_with?("align")
    raise Todo
  end

  out = nil
  first = elems[0]

  if first.is_a?(DataFrame)
    if ["vertical", "vertical_relaxed"].include?(how)
      out = Utils.wrap_ldf(
        Plr.concat_lf(
          elems.map { |df| df.lazy },
          false,
          true,
          how.end_with?("relaxed")
        )
      ).collect(optimizations: QueryOptFlags._eager)
    elsif ["diagonal", "diagonal_relaxed"].include?(how)
      out = Utils.wrap_ldf(
        Plr.concat_lf_diagonal(
          elems.map { |df| df.lazy },
          false,
          true,
          how.end_with?("relaxed")
        )
      ).collect(optimizations: QueryOptFlags._eager)
    elsif how == "horizontal"
      out = Utils.wrap_df(Plr.concat_df_horizontal(elems, strict))
    else
      raise Todo
      msg = "DataFrame `how` must be one of {{#{allowed}}}, got #{how.inspect}"
      raise ArgumentError, msg
    end

  elsif first.is_a?(LazyFrame)
    if ["vertical", "vertical_relaxed"].include?(how)
      return Utils.wrap_ldf(
        Plr.concat_lf(
          elems,
          false,
          true,
          how.end_with?("relaxed")
        )
      )
    elsif ["diagonal", "diagonal_relaxed"].include?(how)
      return Utils.wrap_ldf(
        Plr.concat_lf_diagonal(
          elems,
          false,
          true,
          how.end_with?("relaxed")
        )
      )
    elsif how == "horizontal"
      return Utils.wrap_ldf(
        Plr.concat_lf_horizontal(
          elems,
          true,
          strict
        )
      )
    else
      raise Todo
      msg = "LazyFrame `how` must be one of {{#{allowed}}}, got #{how.inspect}"
      raise ArgumentError, msg
    end

  elsif first.is_a?(Series)
    if how == "vertical"
      out = Utils.wrap_s(Plr.concat_series(elems))
    else
      msg = "Series only supports 'vertical' concat strategy"
      raise ArgumentError, msg
    end

  elsif first.is_a?(Expr)
    return Utils.wrap_expr(Plr.concat_expr(elems.map { |e| e._rbexpr }, false))
  else
    msg = "did not expect type: #{first.class.name.inspect} in `concat`"
    raise TypeError, msg
  end

  out
end

#using_string_cacheBoolean

Check whether the global string cache is enabled.

Returns:



97
98
99
# File 'lib/polars/string_cache.rb', line 97

def using_string_cache
  Plr.using_string_cache
end

#var(column, ddof: 1) ⇒ Expr

Get the variance.

This function is syntactic sugar for col(column).var(ddof: ddof).

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.var("a"))
# =>
# shape: (1, 1)
# ┌──────┐
# │ a    │
# │ ---  │
# │ f64  │
# ╞══════╡
# │ 13.0 │
# └──────┘
df["a"].var
# => 13.0

Parameters:

  • column (Object)

    Column name.

  • ddof (Integer) (defaults to: 1)

    “Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1.

Returns:



255
256
257
# File 'lib/polars/functions/lazy.rb', line 255

def var(column, ddof: 1)
  col(column).var(ddof: ddof)
end

#when(*predicates, **constraints) ⇒ When

Start a "when, then, otherwise" expression.

Examples:

Below we add a column with the value 1, where column "foo" > 2 and the value -1 where it isn't.

df = Polars::DataFrame.new({"foo" => [1, 3, 4], "bar" => [3, 4, 0]})
df.with_columns(Polars.when(Polars.col("foo") > 2).then(Polars.lit(1)).otherwise(Polars.lit(-1)))
# =>
# shape: (3, 3)
# ┌─────┬─────┬─────────┐
# │ foo ┆ bar ┆ literal │
# │ --- ┆ --- ┆ ---     │
# │ i64 ┆ i64 ┆ i32     │
# ╞═════╪═════╪═════════╡
# │ 1   ┆ 3   ┆ -1      │
# │ 3   ┆ 4   ┆ 1       │
# │ 4   ┆ 0   ┆ 1       │
# └─────┴─────┴─────────┘

Or with multiple when-then operations chained:

df.with_columns(
  Polars.when(Polars.col("foo") > 2)
  .then(1)
  .when(Polars.col("bar") > 2)
  .then(4)
  .otherwise(-1)
  .alias("val")
)
# =>
# shape: (3, 3)
# ┌─────┬─────┬─────┐
# │ foo ┆ bar ┆ val │
# │ --- ┆ --- ┆ --- │
# │ i64 ┆ i64 ┆ i32 │
# ╞═════╪═════╪═════╡
# │ 1   ┆ 3   ┆ 4   │
# │ 3   ┆ 4   ┆ 1   │
# │ 4   ┆ 0   ┆ 1   │
# └─────┴─────┴─────┘

The otherwise at the end is optional. If left out, any rows where none of the when expressions evaluate to true, are set to null:

df.with_columns(Polars.when(Polars.col("foo") > 2).then(1).alias("val"))
# =>
# shape: (3, 3)
# ┌─────┬─────┬──────┐
# │ foo ┆ bar ┆ val  │
# │ --- ┆ --- ┆ ---  │
# │ i64 ┆ i64 ┆ i32  │
# ╞═════╪═════╪══════╡
# │ 1   ┆ 3   ┆ null │
# │ 3   ┆ 4   ┆ 1    │
# │ 4   ┆ 0   ┆ 1    │
# └─────┴─────┴──────┘

Pass multiple predicates, each of which must be met:

df.with_columns(
  val: Polars.when(
    Polars.col("bar") > 0,
    Polars.col("foo") % 2 != 0
  )
  .then(99)
  .otherwise(-1)
)
# =>
# shape: (3, 3)
# ┌─────┬─────┬─────┐
# │ foo ┆ bar ┆ val │
# │ --- ┆ --- ┆ --- │
# │ i64 ┆ i64 ┆ i32 │
# ╞═════╪═════╪═════╡
# │ 1   ┆ 3   ┆ 99  │
# │ 3   ┆ 4   ┆ 99  │
# │ 4   ┆ 0   ┆ -1  │
# └─────┴─────┴─────┘

Pass conditions as keyword arguments:

df.with_columns(val: Polars.when(foo: 4, bar: 0).then(99).otherwise(-1))
# =>
# shape: (3, 3)
# ┌─────┬─────┬─────┐
# │ foo ┆ bar ┆ val │
# │ --- ┆ --- ┆ --- │
# │ i64 ┆ i64 ┆ i32 │
# ╞═════╪═════╪═════╡
# │ 1   ┆ 3   ┆ -1  │
# │ 3   ┆ 4   ┆ -1  │
# │ 4   ┆ 0   ┆ 99  │
# └─────┴─────┴─────┘

Returns:

  • (When)


91
92
93
94
# File 'lib/polars/functions/whenthen.rb', line 91

def when(*predicates, **constraints)
  condition = Utils.parse_predicates_constraints_into_expression(*predicates, **constraints)
  When.new(Plr.when(condition))
end

#zeros(n, dtype: Float64, eager: false) ⇒ Object

Construct a column of length n filled with zeros.

This is syntactic sugar for the repeat function.

Examples:

Polars.zeros(3, dtype: Polars::Int8, eager: true)
# =>
# shape: (3,)
# Series: 'zeros' [i8]
# [
#         0
#         0
#         0
# ]

Parameters:

  • n (Integer)

    Length of the resulting column.

  • dtype (Object) (defaults to: Float64)

    Data type of the resulting column. Defaults to Float64.

  • eager (Boolean) (defaults to: false)

    Evaluate immediately and return a Series. If set to false, return an expression instead.

Returns:



109
110
111
112
113
114
115
116
# File 'lib/polars/functions/repeat.rb', line 109

def zeros(n, dtype: Float64, eager: false)
  if (zero = _one_or_zero_by_dtype(0, dtype)).nil?
    msg = "invalid dtype for `zeros`; found #{dtype}"
    raise TypeError, msg
  end

  repeat(zero, n, dtype: dtype, eager: eager).alias("zeros")
end