Module: Polars

Extended by:
Convert, Functions, IO
Defined in:
lib/polars.rb,
lib/polars/expr.rb,
lib/polars/slice.rb,
lib/polars/utils.rb,
lib/polars/config.rb,
lib/polars/io/csv.rb,
lib/polars/io/ipc.rb,
lib/polars/schema.rb,
lib/polars/series.rb,
lib/polars/catalog.rb,
lib/polars/convert.rb,
lib/polars/io/avro.rb,
lib/polars/io/json.rb,
lib/polars/testing.rb,
lib/polars/version.rb,
lib/polars/cat_expr.rb,
lib/polars/group_by.rb,
lib/polars/io/cloud.rb,
lib/polars/io/delta.rb,
lib/polars/io/utils.rb,
lib/polars/selector.rb,
lib/polars/whenthen.rb,
lib/polars/io/ndjson.rb,
lib/polars/list_expr.rb,
lib/polars/meta_expr.rb,
lib/polars/name_expr.rb,
lib/polars/selectors.rb,
lib/polars/array_expr.rb,
lib/polars/data_frame.rb,
lib/polars/data_types.rb,
lib/polars/exceptions.rb,
lib/polars/io/iceberg.rb,
lib/polars/io/parquet.rb,
lib/polars/lazy_frame.rb,
lib/polars/utils/wrap.rb,
lib/polars/binary_expr.rb,
lib/polars/io/database.rb,
lib/polars/series_plot.rb,
lib/polars/sql_context.rb,
lib/polars/string_expr.rb,
lib/polars/struct_expr.rb,
lib/polars/utils/parse.rb,
lib/polars/utils/serde.rb,
lib/polars/string_cache.rb,
lib/polars/expr_dispatch.rb,
lib/polars/functions/col.rb,
lib/polars/functions/len.rb,
lib/polars/functions/lit.rb,
lib/polars/lazy_group_by.rb,
lib/polars/utils/convert.rb,
lib/polars/utils/various.rb,
lib/polars/cat_name_space.rb,
lib/polars/data_type_expr.rb,
lib/polars/date_time_expr.rb,
lib/polars/extension_expr.rb,
lib/polars/functions/lazy.rb,
lib/polars/utils/unstable.rb,
lib/polars/collect_batches.rb,
lib/polars/data_frame_plot.rb,
lib/polars/data_type_group.rb,
lib/polars/functions/eager.rb,
lib/polars/iceberg_dataset.rb,
lib/polars/io/scan_options.rb,
lib/polars/io/sink_options.rb,
lib/polars/list_name_space.rb,
lib/polars/query_opt_flags.rb,
lib/polars/utils/constants.rb,
lib/polars/array_name_space.rb,
lib/polars/dynamic_group_by.rb,
lib/polars/functions/random.rb,
lib/polars/functions/repeat.rb,
lib/polars/in_process_query.rb,
lib/polars/rolling_group_by.rb,
lib/polars/binary_name_space.rb,
lib/polars/scan_cast_options.rb,
lib/polars/string_name_space.rb,
lib/polars/struct_name_space.rb,
lib/polars/utils/deprecation.rb,
lib/polars/batched_csv_reader.rb,
lib/polars/functions/business.rb,
lib/polars/functions/datatype.rb,
lib/polars/functions/whenthen.rb,
lib/polars/date_time_name_space.rb,
lib/polars/extension_name_space.rb,
lib/polars/functions/as_datatype.rb,
lib/polars/functions/escape_regex.rb,
lib/polars/catalog/unity/table_info.rb,
lib/polars/utils/construction/utils.rb,
lib/polars/catalog/unity/column_info.rb,
lib/polars/functions/range/int_range.rb,
lib/polars/utils/construction/series.rb,
lib/polars/catalog/unity/catalog_info.rb,
lib/polars/functions/range/date_range.rb,
lib/polars/functions/range/time_range.rb,
lib/polars/catalog/unity/namespace_info.rb,
lib/polars/functions/range/linear_space.rb,
lib/polars/utils/construction/data_frame.rb,
lib/polars/functions/aggregation/vertical.rb,
lib/polars/functions/range/datetime_range.rb,
lib/polars/functions/aggregation/horizontal.rb

Defined Under Namespace

Modules: Convert, Functions, IO, Selectors, Testing Classes: Array, ArrayExpr, ArrayNameSpace, Binary, BinaryExpr, BinaryNameSpace, Boolean, CatExpr, CatNameSpace, Catalog, Categorical, Categories, Config, DataFrame, DataFramePlot, DataType, DataTypeExpr, Date, DateTimeExpr, DateTimeNameSpace, Datetime, Decimal, Duration, DynamicGroupBy, Enum, Expr, ExtensionExpr, ExtensionNameSpace, Field, Float16, Float32, Float64, FloatType, GroupBy, InProcessQuery, Int128, Int16, Int32, Int64, Int8, IntegerType, LazyFrame, LazyGroupBy, List, ListExpr, ListNameSpace, MetaExpr, NameExpr, NestedType, Null, NumericType, Object, QueryOptFlags, RollingGroupBy, SQLContext, ScanCastOptions, Schema, Selector, Series, SeriesPlot, SignedIntegerType, String, StringCache, StringExpr, StringNameSpace, Struct, StructExpr, StructNameSpace, TemporalType, Time, UInt128, UInt16, UInt32, UInt64, UInt8, Unknown, UnsignedIntegerType

Constant Summary collapse

SIGNED_INTEGER_DTYPES =
DataTypeGroup.new(
  [
    Int8,
    Int16,
    Int32,
    Int64
  ]
)
UNSIGNED_INTEGER_DTYPES =
DataTypeGroup.new(
  [
    UInt8,
    UInt16,
    UInt32,
    UInt64
  ]
)
INTEGER_DTYPES =
(
  SIGNED_INTEGER_DTYPES | UNSIGNED_INTEGER_DTYPES
)
FLOAT_DTYPES =
DataTypeGroup.new([Float32, Float64])
NUMERIC_DTYPES =
DataTypeGroup.new(
  FLOAT_DTYPES + INTEGER_DTYPES | [Decimal]
)
SinkOptions =
IO::SinkOptions
DEFAULT_QUERY_OPT_FLAGS =
QueryOptFlags.new

Class Method Summary collapse

Class Method Details

.align_frames(*frames, on:, how: nil, select: nil, descending: false) ⇒ Object Originally defined in module Functions

Align an array of frames using the unique values from one or more columns as a key.

Frames that do not contain the given key values have rows injected (with nulls filling the non-key columns), and each resulting frame is sorted by the key.

The original column order of input frames is not changed unless select is specified (in which case the final column order is determined from that).

Note that this does not result in a joined frame - you receive the same number of frames back that you passed in, but each is now aligned by key and has the same number of rows.

Examples:

df1 = Polars::DataFrame.new(
  {
    "dt" => [Date.new(2022, 9, 1), Date.new(2022, 9, 2), Date.new(2022, 9, 3)],
    "x" => [3.5, 4.0, 1.0],
    "y" => [10.0, 2.5, 1.5]
  }
)
df2 = Polars::DataFrame.new(
  {
    "dt" => [Date.new(2022, 9, 2), Date.new(2022, 9, 3), Date.new(2022, 9, 1)],
    "x" => [8.0, 1.0, 3.5],
    "y" => [1.5, 12.0, 5.0]
  }
)
df3 = Polars::DataFrame.new(
  {
    "dt" => [Date.new(2022, 9, 3), Date.new(2022, 9, 2)],
    "x" => [2.0, 5.0],
    "y" => [2.5, 2.0]
  }
)
af1, af2, af3 = Polars.align_frames(
  df1, df2, df3, on: "dt", how: "left", select: ["x", "y"]
)
(af1 * af2 * af3).fill_null(0).select(Polars.sum_horizontal("*").alias("dot"))
# =>
# shape: (3, 1)
# ┌───────┐
# │ dot   │
# │ ---   │
# │ f64   │
# ╞═══════╡
# │ 0.0   │
# │ 167.5 │
# │ 47.0  │
# └───────┘

.all(*names, ignore_nulls: true) ⇒ Expr Originally defined in module Functions

Either return an expression representing all columns, or evaluate a bitwise AND operation.

If no arguments are passed, this function is syntactic sugar for col("*"). Otherwise, this function is syntactic sugar for col(names).all.

Examples:

Selecting all columns.

df = Polars::DataFrame.new(
  {
    "a" => [true, false, true],
    "b" => [false, false, false]
  }
)
df.select(Polars.all.sum)
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ u32 ┆ u32 │
# ╞═════╪═════╡
# │ 2   ┆ 0   │
# └─────┴─────┘

Evaluate bitwise AND for a column.

df.select(Polars.all("a"))
# =>
# shape: (1, 1)
# ┌───────┐
# │ a     │
# │ ---   │
# │ bool  │
# ╞═══════╡
# │ false │
# └───────┘

.all_horizontal(*exprs) ⇒ Expr Originally defined in module Functions

Compute the bitwise AND horizontally across columns.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [false, false, true, true, false, nil],
    "b" => [false, true, true, nil, nil, nil],
    "c" => ["u", "v", "w", "x", "y", "z"]
  }
)
df.with_columns(all: Polars.all_horizontal("a", "b"))
# =>
# shape: (6, 4)
# ┌───────┬───────┬─────┬───────┐
# │ a     ┆ b     ┆ c   ┆ all   │
# │ ---   ┆ ---   ┆ --- ┆ ---   │
# │ bool  ┆ bool  ┆ str ┆ bool  │
# ╞═══════╪═══════╪═════╪═══════╡
# │ false ┆ false ┆ u   ┆ false │
# │ false ┆ true  ┆ v   ┆ false │
# │ true  ┆ true  ┆ w   ┆ true  │
# │ true  ┆ null  ┆ x   ┆ null  │
# │ false ┆ null  ┆ y   ┆ false │
# │ null  ┆ null  ┆ z   ┆ null  │
# └───────┴───────┴─────┴───────┘

.any(*names, ignore_nulls: true) ⇒ Expr Originally defined in module Functions

Evaluate a bitwise OR operation.

Syntactic sugar for col(names).any.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [true, false, true],
    "b" => [false, false, false]
  }
)
df.select(Polars.any("a"))
# =>
# shape: (1, 1)
# ┌──────┐
# │ a    │
# │ ---  │
# │ bool │
# ╞══════╡
# │ true │
# └──────┘

.any_horizontal(*exprs) ⇒ Expr Originally defined in module Functions

Compute the bitwise OR horizontally across columns.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [false, false, true, true, false, nil],
    "b" => [false, true, true, nil, nil, nil],
    "c" => ["u", "v", "w", "x", "y", "z"]
  }
)
df.with_columns(any: Polars.any_horizontal("a", "b"))
# =>
# shape: (6, 4)
# ┌───────┬───────┬─────┬───────┐
# │ a     ┆ b     ┆ c   ┆ any   │
# │ ---   ┆ ---   ┆ --- ┆ ---   │
# │ bool  ┆ bool  ┆ str ┆ bool  │
# ╞═══════╪═══════╪═════╪═══════╡
# │ false ┆ false ┆ u   ┆ false │
# │ false ┆ true  ┆ v   ┆ true  │
# │ true  ┆ true  ┆ w   ┆ true  │
# │ true  ┆ null  ┆ x   ┆ true  │
# │ false ┆ null  ┆ y   ┆ null  │
# │ null  ┆ null  ┆ z   ┆ null  │
# └───────┴───────┴─────┴───────┘

.approx_n_unique(*columns) ⇒ Expr Originally defined in module Functions

Approximate count of unique values.

This function is syntactic sugar for col(columns).approx_n_unique, and uses the HyperLogLog++ algorithm for cardinality estimation.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 1],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.approx_n_unique("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 2   │
# └─────┘
df.select(Polars.approx_n_unique("b", "c"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ b   ┆ c   │
# │ --- ┆ --- │
# │ u32 ┆ u32 │
# ╞═════╪═════╡
# │ 3   ┆ 2   │
# └─────┴─────┘

.arctan2(y, x) ⇒ Expr Originally defined in module Functions

Compute two argument arctan in radians.

Returns the angle (in radians) in the plane between the positive x-axis and the ray from the origin to (x,y).

Examples:

c = Math.sqrt(2) / 2
df = Polars::DataFrame.new(
  {
    "y" => [c, -c, c, -c],
    "x" => [c, c, -c, -c]
  }
)
df.with_columns(Polars.arctan2("y", "x").alias("atan2"))
# =>
# shape: (4, 3)
# ┌───────────┬───────────┬───────────┐
# │ y         ┆ x         ┆ atan2     │
# │ ---       ┆ ---       ┆ ---       │
# │ f64       ┆ f64       ┆ f64       │
# ╞═══════════╪═══════════╪═══════════╡
# │ 0.707107  ┆ 0.707107  ┆ 0.785398  │
# │ -0.707107 ┆ 0.707107  ┆ -0.785398 │
# │ 0.707107  ┆ -0.707107 ┆ 2.356194  │
# │ -0.707107 ┆ -0.707107 ┆ -2.356194 │
# └───────────┴───────────┴───────────┘

.arg_sort_by(exprs, *more_exprs, descending: false, nulls_last: false, multithreaded: true, maintain_order: false) ⇒ Expr Originally defined in module Functions

Find the indexes that would sort the columns.

Argsort by multiple columns. The first column will be used for the ordering. If there are duplicates in the first column, the second column will be used to determine the ordering and so on.

Examples:

Pass a single column name to compute the arg sort by that column.

df = Polars::DataFrame.new(
  {
    "a" => [0, 1, 1, 0],
    "b" => [3, 2, 3, 2],
    "c" => [1, 2, 3, 4]
  }
)
df.select(Polars.arg_sort_by("a"))
# =>
# shape: (4, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 0   │
# │ 3   │
# │ 1   │
# │ 2   │
# └─────┘

Compute the arg sort by multiple columns by either passing a list of columns, or by specifying each column as a positional argument.

df.select(Polars.arg_sort_by(["a", "b"], descending: true))
# =>
# shape: (4, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 2   │
# │ 1   │
# │ 0   │
# │ 3   │
# └─────┘

Use gather to apply the arg sort to other columns.

df.select(Polars.col("c").gather(Polars.arg_sort_by("a")))
# =>
# shape: (4, 1)
# ┌─────┐
# │ c   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 4   │
# │ 2   │
# │ 3   │
# └─────┘

.arg_where(condition, eager: false) ⇒ Expr, Series Originally defined in module Functions

Return indices where condition evaluates true.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3, 4, 5]})
df.select(
  [
    Polars.arg_where(Polars.col("a") % 2 == 0)
  ]
).to_series
# =>
# shape: (2,)
# Series: 'a' [u32]
# [
#         1
#         3
# ]

.build_infoHash

Return detailed Polars build information.

Examples:

Polars.build_info


159
160
161
# File 'lib/polars.rb', line 159

def self.build_info
  {"version" => VERSION}
end

.business_day_count(start, stop, week_mask: [true, true, true, true, true, false, false], holidays: []) ⇒ Expr Originally defined in module Functions

Note:

This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.

Count the number of business days between start and end (not including end).

Examples:

df = Polars::DataFrame.new(
  {
    "start" => [Date.new(2020, 1, 1), Date.new(2020, 1, 2)],
    "end" => [Date.new(2020, 1, 2), Date.new(2020, 1, 10)]
  }
)
df.with_columns(
  business_day_count: Polars.business_day_count("start", "end")
)
# =>
# shape: (2, 3)
# ┌────────────┬────────────┬────────────────────┐
# │ start      ┆ end        ┆ business_day_count │
# │ ---        ┆ ---        ┆ ---                │
# │ date       ┆ date       ┆ i32                │
# ╞════════════╪════════════╪════════════════════╡
# │ 2020-01-01 ┆ 2020-01-02 ┆ 1                  │
# │ 2020-01-02 ┆ 2020-01-10 ┆ 6                  │
# └────────────┴────────────┴────────────────────┘

You can pass a custom weekend - for example, if you only take Sunday off:

week_mask = [true, true, true, true, true, true, false]
df.with_columns(
  business_day_count: Polars.business_day_count(
    "start", "end", week_mask: week_mask
  )
)
# =>
# shape: (2, 3)
# ┌────────────┬────────────┬────────────────────┐
# │ start      ┆ end        ┆ business_day_count │
# │ ---        ┆ ---        ┆ ---                │
# │ date       ┆ date       ┆ i32                │
# ╞════════════╪════════════╪════════════════════╡
# │ 2020-01-01 ┆ 2020-01-02 ┆ 1                  │
# │ 2020-01-02 ┆ 2020-01-10 ┆ 7                  │
# └────────────┴────────────┴────────────────────┘

You can also pass a list of holidays to exclude from the count:

holidays = [Date.new(2020, 1, 1), Date.new(2020, 1, 2)]
df.with_columns(
  business_day_count: Polars.business_day_count("start", "end", holidays: holidays)
)
# =>
# shape: (2, 3)
# ┌────────────┬────────────┬────────────────────┐
# │ start      ┆ end        ┆ business_day_count │
# │ ---        ┆ ---        ┆ ---                │
# │ date       ┆ date       ┆ i32                │
# ╞════════════╪════════════╪════════════════════╡
# │ 2020-01-01 ┆ 2020-01-02 ┆ 0                  │
# │ 2020-01-02 ┆ 2020-01-10 ┆ 5                  │
# └────────────┴────────────┴────────────────────┘

.coalesce(exprs, *more_exprs, eager: false) ⇒ Expr Originally defined in module Functions

Folds the columns from left to right, keeping the first non-null value.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, nil, nil, nil],
    "b" => [1, 2, nil, nil],
    "c" => [5, nil, 3, nil]
  }
)
df.with_columns(Polars.coalesce(["a", "b", "c", 10]).alias("d"))
# =>
# shape: (4, 4)
# ┌──────┬──────┬──────┬─────┐
# │ a    ┆ b    ┆ c    ┆ d   │
# │ ---  ┆ ---  ┆ ---  ┆ --- │
# │ i64  ┆ i64  ┆ i64  ┆ i64 │
# ╞══════╪══════╪══════╪═════╡
# │ 1    ┆ 1    ┆ 5    ┆ 1   │
# │ null ┆ 2    ┆ null ┆ 2   │
# │ null ┆ null ┆ 3    ┆ 3   │
# │ null ┆ null ┆ null ┆ 10  │
# └──────┴──────┴──────┴─────┘
df.with_columns(Polars.coalesce(Polars.col(["a", "b", "c"]), 10.0).alias("d"))
# =>
# shape: (4, 4)
# ┌──────┬──────┬──────┬──────┐
# │ a    ┆ b    ┆ c    ┆ d    │
# │ ---  ┆ ---  ┆ ---  ┆ ---  │
# │ i64  ┆ i64  ┆ i64  ┆ f64  │
# ╞══════╪══════╪══════╪══════╡
# │ 1    ┆ 1    ┆ 5    ┆ 1.0  │
# │ null ┆ 2    ┆ null ┆ 2.0  │
# │ null ┆ null ┆ 3    ┆ 3.0  │
# │ null ┆ null ┆ null ┆ 10.0 │
# └──────┴──────┴──────┴──────┘
s1 = Polars::Series.new("a", [nil, 2, nil])
s2 = Polars::Series.new("b", [1, nil, 3])
Polars.coalesce(s1, s2, eager: true)
# =>
# shape: (3,)
# Series: 'a' [i64]
# [
#         1
#         2
#         3
# ]

.col(name, *more_names) ⇒ Expr Originally defined in module Functions

Return an expression representing a column in a DataFrame.

.collect_all(lazy_frames, optimizations: DEFAULT_QUERY_OPT_FLAGS, engine: "auto", lazy: false) ⇒ Array Originally defined in module Functions

Collect multiple LazyFrames at the same time.

This runs all the computation graphs in parallel on Polars threadpool.

.concat(items, rechunk: false, how: "vertical", parallel: true, strict: false) ⇒ Object Originally defined in module Functions

Aggregate multiple Dataframes/Series to a single DataFrame/Series.

Examples:

df1 = Polars::DataFrame.new({"a" => [1], "b" => [3]})
df2 = Polars::DataFrame.new({"a" => [2], "b" => [4]})
Polars.concat([df1, df2])  # default is 'vertical' strategy
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 1   ┆ 3   │
# │ 2   ┆ 4   │
# └─────┴─────┘
df1 = Polars::DataFrame.new({"a" => [1], "b" => [3]})
df2 = Polars::DataFrame.new({"a" => [2.5], "b" => [4]})
Polars.concat([df1, df2], how: "vertical_relaxed")  # 'a' coerced into f64
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ f64 ┆ i64 │
# ╞═════╪═════╡
# │ 1.0 ┆ 3   │
# │ 2.5 ┆ 4   │
# └─────┴─────┘
df_h1 = Polars::DataFrame.new({"l1" => [1, 2], "l2" => [3, 4]})
df_h2 = Polars::DataFrame.new({"r1" => [5, 6], "r2" => [7, 8], "r3" => [9, 10]})
Polars.concat([df_h1, df_h2], how: "horizontal")
# =>
# shape: (2, 5)
# ┌─────┬─────┬─────┬─────┬─────┐
# │ l1  ┆ l2  ┆ r1  ┆ r2  ┆ r3  │
# │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
# │ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 │
# ╞═════╪═════╪═════╪═════╪═════╡
# │ 1   ┆ 3   ┆ 5   ┆ 7   ┆ 9   │
# │ 2   ┆ 4   ┆ 6   ┆ 8   ┆ 10  │
# └─────┴─────┴─────┴─────┴─────┘
df_d1 = Polars::DataFrame.new({"a" => [1], "b" => [3]})
df_d2 = Polars::DataFrame.new({"a" => [2], "c" => [4]})
Polars.concat([df_d1, df_d2], how: "diagonal")
# =>
# shape: (2, 3)
# ┌─────┬──────┬──────┐
# │ a   ┆ b    ┆ c    │
# │ --- ┆ ---  ┆ ---  │
# │ i64 ┆ i64  ┆ i64  │
# ╞═════╪══════╪══════╡
# │ 1   ┆ 3    ┆ null │
# │ 2   ┆ null ┆ 4    │
# └─────┴──────┴──────┘
df_a1 = Polars::DataFrame.new({"id" => [1, 2], "x" => [3, 4]})
df_a2 = Polars::DataFrame.new({"id" => [2, 3], "y" => [5, 6]})
df_a3 = Polars::DataFrame.new({"id" => [1, 3], "z" => [7, 8]})
Polars.concat([df_a1, df_a2, df_a3], how: "align")
# =>
# shape: (3, 4)
# ┌─────┬──────┬──────┬──────┐
# │ id  ┆ x    ┆ y    ┆ z    │
# │ --- ┆ ---  ┆ ---  ┆ ---  │
# │ i64 ┆ i64  ┆ i64  ┆ i64  │
# ╞═════╪══════╪══════╪══════╡
# │ 1   ┆ 3    ┆ null ┆ 7    │
# │ 2   ┆ 4    ┆ 5    ┆ null │
# │ 3   ┆ null ┆ 6    ┆ 8    │
# └─────┴──────┴──────┴──────┘

.concat_arr(exprs, *more_exprs) ⇒ Expr Originally defined in module Functions

Note:

This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.

Horizontally concatenate columns into a single array column.

Non-array columns are reshaped to a unit-width array. All columns must have a dtype of either Polars::Array.new(<DataType>, width) or Polars::<DataType>.

Examples:

Concatenate 2 array columns:

Polars.select(
  a: Polars::Series.new([[1], [3], nil], dtype: Polars::Array.new(Polars::Int64, 1)),
  b: Polars::Series.new([[3], [nil], [5]], dtype: Polars::Array.new(Polars::Int64, 1))
).with_columns(
  Polars.concat_arr("a", "b").alias("concat_arr(a, b)"),
  Polars.concat_arr("a", Polars.first("b")).alias("concat_arr(a, first(b))")
)
# =>
# shape: (3, 4)
# ┌───────────────┬───────────────┬──────────────────┬─────────────────────────┐
# │ a             ┆ b             ┆ concat_arr(a, b) ┆ concat_arr(a, first(b)) │
# │ ---           ┆ ---           ┆ ---              ┆ ---                     │
# │ array[i64, 1] ┆ array[i64, 1] ┆ array[i64, 2]    ┆ array[i64, 2]           │
# ╞═══════════════╪═══════════════╪══════════════════╪═════════════════════════╡
# │ [1]           ┆ [3]           ┆ [1, 3]           ┆ [1, 3]                  │
# │ [3]           ┆ [null]        ┆ [3, null]        ┆ [3, 3]                  │
# │ null          ┆ [5]           ┆ null             ┆ null                    │
# └───────────────┴───────────────┴──────────────────┴─────────────────────────┘

.concat_list(exprs, *more_exprs) ⇒ Expr Originally defined in module Functions

Concat the arrays in a Series dtype List in linear time.

Examples:

Concatenate two existing list columns. Null values are propagated.

df = Polars::DataFrame.new({"a" => [[1, 2], [3], [4, 5]], "b" => [[4], [], nil]})
df.with_columns(concat_list: Polars.concat_list("a", "b"))
# =>
# shape: (3, 3)
# ┌───────────┬───────────┬─────────────┐
# │ a         ┆ b         ┆ concat_list │
# │ ---       ┆ ---       ┆ ---         │
# │ list[i64] ┆ list[i64] ┆ list[i64]   │
# ╞═══════════╪═══════════╪═════════════╡
# │ [1, 2]    ┆ [4]       ┆ [1, 2, 4]   │
# │ [3]       ┆ []        ┆ [3]         │
# │ [4, 5]    ┆ null      ┆ null        │
# └───────────┴───────────┴─────────────┘

Non-list columns are cast to a list before concatenation. The output data type is the supertype of the concatenated columns.

df.select("a", concat_list: Polars.concat_list("a", Polars.lit("x")))
# =>
# shape: (3, 2)
# ┌───────────┬─────────────────┐
# │ a         ┆ concat_list     │
# │ ---       ┆ ---             │
# │ list[i64] ┆ list[str]       │
# ╞═══════════╪═════════════════╡
# │ [1, 2]    ┆ ["1", "2", "x"] │
# │ [3]       ┆ ["3", "x"]      │
# │ [4, 5]    ┆ ["4", "5", "x"] │
# └───────────┴─────────────────┘

Create lagged columns and collect them into a list. This mimics a rolling window.

df = Polars::DataFrame.new({"A" => [1.0, 2.0, 9.0, 2.0, 13.0]})
df = df.select(3.times.map { |i| Polars.col("A").shift(i).alias("A_lag_#{i}") })
df.select(
  Polars.concat_list(3.times.map { |i| "A_lag_#{i}" }.reverse).alias("A_rolling")
)
# =>
# shape: (5, 1)
# ┌───────────────────┐
# │ A_rolling         │
# │ ---               │
# │ list[f64]         │
# ╞═══════════════════╡
# │ [null, null, 1.0] │
# │ [null, 1.0, 2.0]  │
# │ [1.0, 2.0, 9.0]   │
# │ [2.0, 9.0, 2.0]   │
# │ [9.0, 2.0, 13.0]  │
# └───────────────────┘

.concat_str(exprs, *more_exprs, separator: "", ignore_nulls: false) ⇒ Expr Originally defined in module Functions

Horizontally concat Utf8 Series in linear time. Non-Utf8 columns are cast to Utf8.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 3],
    "b" => ["dogs", "cats", nil],
    "c" => ["play", "swim", "walk"]
  }
)
df.with_columns(
  [
    Polars.concat_str(
      [
        Polars.col("a") * 2,
        Polars.col("b"),
        Polars.col("c")
      ],
      separator: " "
    ).alias("full_sentence")
  ]
)
# =>
# shape: (3, 4)
# ┌─────┬──────┬──────┬───────────────┐
# │ a   ┆ b    ┆ c    ┆ full_sentence │
# │ --- ┆ ---  ┆ ---  ┆ ---           │
# │ i64 ┆ str  ┆ str  ┆ str           │
# ╞═════╪══════╪══════╪═══════════════╡
# │ 1   ┆ dogs ┆ play ┆ 2 dogs play   │
# │ 2   ┆ cats ┆ swim ┆ 4 cats swim   │
# │ 3   ┆ null ┆ walk ┆ null          │
# └─────┴──────┴──────┴───────────────┘

.configObject



531
532
533
# File 'lib/polars/config.rb', line 531

def self.config(...)
  Config.new(...)
end

.corr(a, b, method: "pearson", ddof: nil, propagate_nans: false, eager: false) ⇒ Expr Originally defined in module Functions

Compute the Pearson's or Spearman rank correlation correlation between two columns.

Examples:

Pearson's correlation:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.corr("a", "b"))
# =>
# shape: (1, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 0.544705 │
# └──────────┘

Spearman rank correlation:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.corr("a", "b", method: "spearman"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 0.5 │
# └─────┘

Eager evaluation:

s1 = Polars::Series.new("a", [1, 8, 3])
s2 = Polars::Series.new("b", [4, 5, 2])
Polars.corr(s1, s2, eager: true)
# =>
# shape: (1,)
# Series: 'a' [f64]
# [
#         0.544705
# ]
Polars.corr(s1, s2, method: "spearman", eager: true)
# =>
# shape: (1,)
# Series: 'a' [f64]
# [
#         0.5
# ]

.count(*columns) ⇒ Expr Originally defined in module Functions

Return the number of non-null values in the column.

This function is syntactic sugar for col(columns).count.

Calling this function without any arguments returns the number of rows in the context. This way of using the function is deprecated. Please use len instead.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, nil],
    "b" => [3, nil, nil],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.count("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 2   │
# └─────┘

Return the number of non-null values in multiple columns.

df.select(Polars.count("b", "c"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ b   ┆ c   │
# │ --- ┆ --- │
# │ u32 ┆ u32 │
# ╞═════╪═════╡
# │ 1   ┆ 3   │
# └─────┴─────┘

.cov(a, b, ddof: 1, eager: false) ⇒ Expr Originally defined in module Functions

Compute the covariance between two columns/ expressions.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.cov("a", "b"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 3.0 │
# └─────┘

Eager evaluation:

s1 = Polars::Series.new("a", [1, 8, 3])
s2 = Polars::Series.new("b", [4, 5, 2])
Polars.cov(s1, s2, eager: true)
# =>
# shape: (1,)
# Series: 'a' [f64]
# [
#         3.0
# ]

.csObject



1988
1989
1990
# File 'lib/polars/selectors.rb', line 1988

def self.cs
  Selectors
end

.cum_count(*columns, reverse: false) ⇒ Expr Originally defined in module Functions

Return the cumulative count of the non-null values in the column.

This function is syntactic sugar for col(columns).cum_count.

If no arguments are passed, returns the cumulative count of a context. Rows containing null values count towards the result.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, nil], "b" => [3, nil, nil]})
df.select(Polars.cum_count("a"))
# =>
# shape: (3, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 1   │
# │ 2   │
# │ 2   │
# └─────┘

.cum_fold(acc, exprs, returns_scalar: false, return_dtype: nil, include_init: false, &function) ⇒ Object Originally defined in module Functions

Note:

If you simply want the first encountered expression as accumulator, consider using cum_reduce.

Cumulatively accumulate over multiple columns horizontally/row wise with a left fold.

Every cumulative result is added as a separate field in a Struct column.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 3],
    "b" => [3, 4, 5],
    "c" => [5, 6, 7]
  }
)
df.with_columns(
  Polars.cum_fold(Polars.lit(1), Polars.all) { |acc, x| acc + x }
)
# =>
# shape: (3, 4)
# ┌─────┬─────┬─────┬───────────┐
# │ a   ┆ b   ┆ c   ┆ cum_fold  │
# │ --- ┆ --- ┆ --- ┆ ---       │
# │ i64 ┆ i64 ┆ i64 ┆ struct[3] │
# ╞═════╪═════╪═════╪═══════════╡
# │ 1   ┆ 3   ┆ 5   ┆ {2,5,10}  │
# │ 2   ┆ 4   ┆ 6   ┆ {3,7,13}  │
# │ 3   ┆ 5   ┆ 7   ┆ {4,9,16}  │
# └─────┴─────┴─────┴───────────┘

.cum_reduce(exprs, returns_scalar: false, return_dtype: nil, &function) ⇒ Expr Originally defined in module Functions

Cumulatively reduce horizontally across columns with a left fold.

Every cumulative result is added as a separate field in a Struct column.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 3],
    "b" => [3, 4, 5],
    "c" => [5, 6, 7]
  }
)
df.with_columns(Polars.cum_reduce(Polars.all) { |acc, x| acc + x })
# =>
# shape: (3, 4)
# ┌─────┬─────┬─────┬────────────┐
# │ a   ┆ b   ┆ c   ┆ cum_reduce │
# │ --- ┆ --- ┆ --- ┆ ---        │
# │ i64 ┆ i64 ┆ i64 ┆ struct[3]  │
# ╞═════╪═════╪═════╪════════════╡
# │ 1   ┆ 3   ┆ 5   ┆ {1,4,9}    │
# │ 2   ┆ 4   ┆ 6   ┆ {2,6,12}   │
# │ 3   ┆ 5   ┆ 7   ┆ {3,8,15}   │
# └─────┴─────┴─────┴────────────┘

.cum_sum(*names) ⇒ Expr Originally defined in module Functions

Cumulatively sum all values.

Syntactic sugar for col(names).cum_sum.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 3],
    "b" => [4, 5, 6]
  }
)
df.select(Polars.cum_sum("a"))
# =>
# shape: (3, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 3   │
# │ 6   │
# └─────┘

.cum_sum_horizontal(*exprs) ⇒ Expr Originally defined in module Functions

Cumulatively sum all values horizontally across columns.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, nil],
    "c" => ["x", "y", "z"]
  }
)
df.with_columns(Polars.cum_sum_horizontal("a", "b"))
# =>
# shape: (3, 4)
# ┌─────┬──────┬─────┬───────────┐
# │ a   ┆ b    ┆ c   ┆ cum_sum   │
# │ --- ┆ ---  ┆ --- ┆ ---       │
# │ i64 ┆ i64  ┆ str ┆ struct[2] │
# ╞═════╪══════╪═════╪═══════════╡
# │ 1   ┆ 4    ┆ x   ┆ {1,5}     │
# │ 8   ┆ 5    ┆ y   ┆ {8,13}    │
# │ 3   ┆ null ┆ z   ┆ {3,null}  │
# └─────┴──────┴─────┴───────────┘

.date(year, month, day) ⇒ Expr Originally defined in module Functions

Create a Polars literal expression of type Date.

Examples:

df = Polars::DataFrame.new(
  {
    "month" => [1, 2, 3],
    "day" => [4, 5, 6]
  }
)
df.with_columns(Polars.date(2024, Polars.col("month"), Polars.col("day")))
# =>
# shape: (3, 3)
# ┌───────┬─────┬────────────┐
# │ month ┆ day ┆ date       │
# │ ---   ┆ --- ┆ ---        │
# │ i64   ┆ i64 ┆ date       │
# ╞═══════╪═════╪════════════╡
# │ 1     ┆ 4   ┆ 2024-01-04 │
# │ 2     ┆ 5   ┆ 2024-02-05 │
# │ 3     ┆ 6   ┆ 2024-03-06 │
# └───────┴─────┴────────────┘

We can also use pl.date for filtering:

df = Polars::DataFrame.new(
  {
    "start" => [Date.new(2024, 1, 1), Date.new(2024, 1, 1), Date.new(2024, 1, 1)],
    "end" => [Date.new(2024, 5, 1), Date.new(2024, 7, 1), Date.new(2024, 9, 1)]
  }
)
df.filter(Polars.col("end") > Polars.date(2024, 6, 1))
# =>
# shape: (2, 2)
# ┌────────────┬────────────┐
# │ start      ┆ end        │
# │ ---        ┆ ---        │
# │ date       ┆ date       │
# ╞════════════╪════════════╡
# │ 2024-01-01 ┆ 2024-07-01 │
# │ 2024-01-01 ┆ 2024-09-01 │
# └────────────┴────────────┘

.date_range(start, stop, interval = "1d", closed: "both", eager: false) ⇒ Object Originally defined in module Functions

Note:

If both low and high are passed as date types (not datetime), and the interval granularity is no finer than 1d, the returned range is also of type date. All other permutations return a datetime Series.

Create a range of type Datetime (or Date).

Examples:

Using polars duration string to specify the interval

Polars.date_range(Date.new(2022, 1, 1), Date.new(2022, 3, 1), "1mo", eager: true).alias(
  "date"
)
# =>
# shape: (3,)
# Series: 'date' [date]
# [
#         2022-01-01
#         2022-02-01
#         2022-03-01
# ]

.date_ranges(start, stop, interval = "1d", closed: "both", eager: false) ⇒ Object Originally defined in module Functions

Note:

interval is created according to the following string language:

  • 1ns (1 nanosecond)
  • 1us (1 microsecond)
  • 1ms (1 millisecond)
  • 1s (1 second)
  • 1m (1 minute)
  • 1h (1 hour)
  • 1d (1 calendar day)
  • 1w (1 calendar week)
  • 1mo (1 calendar month)
  • 1q (1 calendar quarter)
  • 1y (1 calendar year)

Or combine them: "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds

By "calendar day", we mean the corresponding time on the next day (which may not be 24 hours, due to daylight savings). Similarly for "calendar week", "calendar month", "calendar quarter", and "calendar year".

Create a column of date ranges.

Examples:

df = Polars::DataFrame.new(
  {
    "start" => [Date.new(2022, 1, 1), Date.new(2022, 1, 2)],
    "end" => Date.new(2022, 1, 3)
  }
)
df.with_columns(date_range: Polars.date_ranges("start", "end"))
# =>
# shape: (2, 3)
# ┌────────────┬────────────┬─────────────────────────────────┐
# │ start      ┆ end        ┆ date_range                      │
# │ ---        ┆ ---        ┆ ---                             │
# │ date       ┆ date       ┆ list[date]                      │
# ╞════════════╪════════════╪═════════════════════════════════╡
# │ 2022-01-01 ┆ 2022-01-03 ┆ [2022-01-01, 2022-01-02, 2022-… │
# │ 2022-01-02 ┆ 2022-01-03 ┆ [2022-01-02, 2022-01-03]        │
# └────────────┴────────────┴─────────────────────────────────┘

.datetime(year, month, day, hour = nil, minute = nil, second = nil, microsecond = nil, time_unit: "us", time_zone: nil, ambiguous: "raise") ⇒ Expr Originally defined in module Functions

Create a Polars literal expression of type Datetime.

Examples:

df = Polars::DataFrame.new(
  {
    "month" => [1, 2, 3],
    "day" => [4, 5, 6],
    "hour" => [12, 13, 14],
    "minute" => [15, 30, 45]
  }
)
df.with_columns(
  Polars.datetime(
    2024,
    Polars.col("month"),
    Polars.col("day"),
    Polars.col("hour"),
    Polars.col("minute"),
    time_zone: "Australia/Sydney"
  )
)
# =>
# shape: (3, 5)
# ┌───────┬─────┬──────┬────────┬────────────────────────────────┐
# │ month ┆ day ┆ hour ┆ minute ┆ datetime                       │
# │ ---   ┆ --- ┆ ---  ┆ ---    ┆ ---                            │
# │ i64   ┆ i64 ┆ i64  ┆ i64    ┆ datetime[μs, Australia/Sydney] │
# ╞═══════╪═════╪══════╪════════╪════════════════════════════════╡
# │ 1     ┆ 4   ┆ 12   ┆ 15     ┆ 2024-01-04 12:15:00 AEDT       │
# │ 2     ┆ 5   ┆ 13   ┆ 30     ┆ 2024-02-05 13:30:00 AEDT       │
# │ 3     ┆ 6   ┆ 14   ┆ 45     ┆ 2024-03-06 14:45:00 AEDT       │
# └───────┴─────┴──────┴────────┴────────────────────────────────┘

We can also use Polars.datetime for filtering:

df = Polars::DataFrame.new(
  {
    "start" => [
      DateTime.new(2024, 1, 1, 0, 0, 0),
      DateTime.new(2024, 1, 1, 0, 0, 0),
      DateTime.new(2024, 1, 1, 0, 0, 0)
    ],
    "end" => [
      DateTime.new(2024, 5, 1, 20, 15, 10),
      DateTime.new(2024, 7, 1, 21, 25, 20),
      DateTime.new(2024, 9, 1, 22, 35, 30)
    ]
  }
)
df.filter(Polars.col("end") > Polars.datetime(2024, 6, 1))
# =>
# shape: (2, 2)
# ┌─────────────────────┬─────────────────────┐
# │ start               ┆ end                 │
# │ ---                 ┆ ---                 │
# │ datetime[ns]        ┆ datetime[ns]        │
# ╞═════════════════════╪═════════════════════╡
# │ 2024-01-01 00:00:00 ┆ 2024-07-01 21:25:20 │
# │ 2024-01-01 00:00:00 ┆ 2024-09-01 22:35:30 │
# └─────────────────────┴─────────────────────┘

.datetime_range(start, stop, interval = "1d", closed: "both", time_unit: nil, time_zone: nil, eager: false) ⇒ Object Originally defined in module Functions

Generate a datetime range.

Examples:

Using Polars duration string to specify the interval:

Polars.datetime_range(
  DateTime.new(2022, 1, 1), DateTime.new(2022, 3, 1), "1mo", eager: true
).alias("datetime")
# =>
# shape: (3,)
# Series: 'datetime' [datetime[ns]]
# [
#         2022-01-01 00:00:00
#         2022-02-01 00:00:00
#         2022-03-01 00:00:00
# ]

Specifying a time zone:

Polars.datetime_range(
  DateTime.new(2022, 1, 1),
  DateTime.new(2022, 3, 1),
  "1mo",
  time_zone: "America/New_York",
  eager: true
).alias("datetime")
# =>
# shape: (3,)
# Series: 'datetime' [datetime[ns, America/New_York]]
# [
#         2022-01-01 00:00:00 EST
#         2022-02-01 00:00:00 EST
#         2022-03-01 00:00:00 EST
# ]

.datetime_ranges(start, stop, interval: "1d", closed: "both", time_unit: nil, time_zone: nil, eager: false) ⇒ Object Originally defined in module Functions

Create a column of datetime ranges.

Examples:

df = Polars::DataFrame.new(
  {
    "start" => [DateTime.new(2022, 1, 1), DateTime.new(2022, 1, 2)],
    "end" => DateTime.new(2022, 1, 3),
  }
)
df.select(datetime_range: Polars.datetime_ranges("start", "end"))
# =>
# shape: (2, 1)
# ┌─────────────────────────────────┐
# │ datetime_range                  │
# │ ---                             │
# │ list[datetime[ns]]              │
# ╞═════════════════════════════════╡
# │ [2022-01-01 00:00:00, 2022-01-… │
# │ [2022-01-02 00:00:00, 2022-01-… │
# └─────────────────────────────────┘

.disable_string_cachenil Originally defined in module Functions

Disable and clear the global string cache.

Examples:

Construct two Series using the same global string cache.

Polars.enable_string_cache
s1 = Polars::Series.new("color", ["red", "green", "red"], dtype: Polars::Categorical)
s2 = Polars::Series.new("color", ["blue", "red", "green"], dtype: Polars::Categorical)
Polars.disable_string_cache

As both Series are constructed under the same global string cache, they can be concatenated.

Polars.concat([s1, s2])
# =>
# shape: (6,)
# Series: 'color' [cat]
# [
#         "red"
#         "green"
#         "red"
#         "blue"
#         "red"
#         "green"
# ]

.dtype_of(col_or_expr) ⇒ DataTypeExpr Originally defined in module Functions

Note:

This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.

Get a lazily evaluated :class:DataType of a column or expression.

.duration(weeks: nil, days: nil, hours: nil, minutes: nil, seconds: nil, milliseconds: nil, microseconds: nil, nanoseconds: nil, time_unit: nil) ⇒ Expr Originally defined in module Functions

Create polars Duration from distinct time components.

Examples:

df = Polars::DataFrame.new(
  {
    "datetime" => [DateTime.new(2022, 1, 1), DateTime.new(2022, 1, 2)],
    "add" => [1, 2]
  }
)
df.select(
  [
    (Polars.col("datetime") + Polars.duration(weeks: "add")).alias("add_weeks"),
    (Polars.col("datetime") + Polars.duration(days: "add")).alias("add_days"),
    (Polars.col("datetime") + Polars.duration(seconds: "add")).alias("add_seconds"),
    (Polars.col("datetime") + Polars.duration(milliseconds: "add")).alias(
      "add_milliseconds"
    ),
    (Polars.col("datetime") + Polars.duration(hours: "add")).alias("add_hours")
  ]
)
# =>
# shape: (2, 5)
# ┌─────────────────────┬─────────────────────┬─────────────────────┬─────────────────────────┬─────────────────────┐
# │ add_weeks           ┆ add_days            ┆ add_seconds         ┆ add_milliseconds        ┆ add_hours           │
# │ ---                 ┆ ---                 ┆ ---                 ┆ ---                     ┆ ---                 │
# │ datetime[ns]        ┆ datetime[ns]        ┆ datetime[ns]        ┆ datetime[ns]            ┆ datetime[ns]        │
# ╞═════════════════════╪═════════════════════╪═════════════════════╪═════════════════════════╪═════════════════════╡
# │ 2022-01-08 00:00:00 ┆ 2022-01-02 00:00:00 ┆ 2022-01-01 00:00:01 ┆ 2022-01-01 00:00:00.001 ┆ 2022-01-01 01:00:00 │
# │ 2022-01-16 00:00:00 ┆ 2022-01-04 00:00:00 ┆ 2022-01-02 00:00:02 ┆ 2022-01-02 00:00:00.002 ┆ 2022-01-02 02:00:00 │
# └─────────────────────┴─────────────────────┴─────────────────────┴─────────────────────────┴─────────────────────┘

.elementExpr Originally defined in module Functions

Alias for an element in evaluated in an eval expression.

Examples:

A horizontal rank computation by taking the elements of a list

df = Polars::DataFrame.new({"a" => [1, 8, 3], "b" => [4, 5, 2]})
df.with_columns(
  Polars.concat_list(["a", "b"]).list.eval(Polars.element.rank).alias("rank")
)
# =>
# shape: (3, 3)
# ┌─────┬─────┬────────────┐
# │ a   ┆ b   ┆ rank       │
# │ --- ┆ --- ┆ ---        │
# │ i64 ┆ i64 ┆ list[f64]  │
# ╞═════╪═════╪════════════╡
# │ 1   ┆ 4   ┆ [1.0, 2.0] │
# │ 8   ┆ 5   ┆ [2.0, 1.0] │
# │ 3   ┆ 2   ┆ [2.0, 1.0] │
# └─────┴─────┴────────────┘

.enable_string_cachenil Originally defined in module Functions

Enable the global string cache.

Categorical columns created under the same global string cache have the same underlying physical value when string values are equal. This allows the columns to be concatenated or used in a join operation, for example.

Examples:

Construct two Series using the same global string cache.

Polars.enable_string_cache
s1 = Polars::Series.new("color", ["red", "green", "red"], dtype: Polars::Categorical)
s2 = Polars::Series.new("color", ["blue", "red", "green"], dtype: Polars::Categorical)
Polars.disable_string_cache

As both Series are constructed under the same global string cache, they can be concatenated.

Polars.concat([s1, s2])
# =>
# shape: (6,)
# Series: 'color' [cat]
# [
#         "red"
#         "green"
#         "red"
#         "blue"
#         "red"
#         "green"
# ]

.escape_regex(s) ⇒ String Originally defined in module Functions

Escapes string regex meta characters.

.exclude(columns, *more_columns) ⇒ Object Originally defined in module Functions

Exclude certain columns from a wildcard/regex selection.

Examples:

df = Polars::DataFrame.new(
  {
    "aa" => [1, 2, 3],
    "ba" => ["a", "b", nil],
    "cc" => [nil, 2.5, 1.5]
  }
)
# =>
# shape: (3, 3)
# ┌─────┬──────┬──────┐
# │ aa  ┆ ba   ┆ cc   │
# │ --- ┆ ---  ┆ ---  │
# │ i64 ┆ str  ┆ f64  │
# ╞═════╪══════╪══════╡
# │ 1   ┆ a    ┆ null │
# │ 2   ┆ b    ┆ 2.5  │
# │ 3   ┆ null ┆ 1.5  │
# └─────┴──────┴──────┘

Exclude by column name(s):

df.select(Polars.exclude("ba"))
# =>
# shape: (3, 2)
# ┌─────┬──────┐
# │ aa  ┆ cc   │
# │ --- ┆ ---  │
# │ i64 ┆ f64  │
# ╞═════╪══════╡
# │ 1   ┆ null │
# │ 2   ┆ 2.5  │
# │ 3   ┆ 1.5  │
# └─────┴──────┘

Exclude by regex, e.g. removing all columns whose names end with the letter "a":

df.select(Polars.exclude("^.*a$"))
# =>
# shape: (3, 1)
# ┌──────┐
# │ cc   │
# │ ---  │
# │ f64  │
# ╞══════╡
# │ null │
# │ 2.5  │
# │ 1.5  │
# └──────┘

.field(name) ⇒ Expr Originally defined in module Functions

Select a field in the current struct.with_fields scope.

Examples:

df = Polars::DataFrame.new({"a" => [{"x" => 5, "y" => 2}, {"x" => 3, "y" => 4}]})
df.select(Polars.col("a").struct.with_fields(Polars.field("x") ** 2))
# =>
# shape: (2, 1)
# ┌───────────┐
# │ a         │
# │ ---       │
# │ struct[2] │
# ╞═══════════╡
# │ {25,2}    │
# │ {9,4}     │
# └───────────┘

.first(*columns) ⇒ Expr Originally defined in module Functions

Get the first value.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "baz"]
  }
)
df.select(Polars.first)
# =>
# shape: (3, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 8   │
# │ 3   │
# └─────┘
df.select(Polars.first("b"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ b   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 4   │
# └─────┘
df.select(Polars.first("a", "c"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ c   │
# │ --- ┆ --- │
# │ i64 ┆ str │
# ╞═════╪═════╡
# │ 1   ┆ foo │
# └─────┴─────┘

.fold(acc, exprs, returns_scalar: false, return_dtype: nil, &function) ⇒ Expr Originally defined in module Functions

Accumulate over multiple columns horizontally/row wise with a left fold.

Examples:

Horizontally sum over all columns and add 1.

df = Polars::DataFrame.new(
 {
   "a" => [1, 2, 3],
   "b" => [3, 4, 5],
   "c" => [5, 6, 7]
 }
)
df.select(
  Polars.fold(Polars.lit(1), Polars.col("*")) { |acc, x| acc + x }.alias("sum")
)
# =>
# shape: (3, 1)
# ┌─────┐
# │ sum │
# │ --- │
# │ i32 │
# ╞═════╡
# │ 10  │
# │ 13  │
# │ 16  │
# └─────┘

You can also apply a condition/predicate on all columns:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 3],
    "b" => [0, 1, 2]
  }
)
df.filter(
  Polars.fold(Polars.lit(true), Polars.col("*") > 1) { |acc, x| acc & x }
)
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 3   ┆ 2   │
# └─────┴─────┘

.format(f_string, *args) ⇒ Expr Originally defined in module Functions

Format expressions as a string.

Examples:

df = Polars::DataFrame.new(
  {
    "a": ["a", "b", "c"],
    "b": [1, 2, 3]
  }
)
df.select(
  [
    Polars.format("foo_{}_bar_{}", Polars.col("a"), "b").alias("fmt")
  ]
)
# =>
# shape: (3, 1)
# ┌─────────────┐
# │ fmt         │
# │ ---         │
# │ str         │
# ╞═════════════╡
# │ foo_a_bar_1 │
# │ foo_b_bar_2 │
# │ foo_c_bar_3 │
# └─────────────┘

.from_epoch(column, time_unit: "s") ⇒ Object Originally defined in module Functions

Utility function that parses an epoch timestamp (or Unix time) to Polars Date(time).

Depending on the unit provided, this function will return a different dtype:

  • time_unit: "d" returns pl.Date
  • time_unit: "s" returns pl.Datetime"us"
  • time_unit: "ms" returns pl.Datetime["ms"]
  • time_unit: "us" returns pl.Datetime["us"]
  • time_unit: "ns" returns pl.Datetime["ns"]

Examples:

df = Polars::DataFrame.new({"timestamp" => [1666683077, 1666683099]}).lazy
df.select(Polars.from_epoch(Polars.col("timestamp"), time_unit: "s")).collect
# =>
# shape: (2, 1)
# ┌─────────────────────┐
# │ timestamp           │
# │ ---                 │
# │ datetime[μs]        │
# ╞═════════════════════╡
# │ 2022-10-25 07:31:17 │
# │ 2022-10-25 07:31:39 │
# └─────────────────────┘

.from_hash(data, schema: nil, schema_overrides: nil, strict: true) ⇒ DataFrame Originally defined in module Convert

Construct a DataFrame from a hash of arrays.

This operation clones data, unless you pass in a Hash<String, Series>.

Examples:

data = {"a" => [1, 2], "b" => [3, 4]}
Polars.from_hash(data)
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 1   ┆ 3   │
# │ 2   ┆ 4   │
# └─────┴─────┘

.from_hashes(data, schema: nil, schema_overrides: nil, strict: true, infer_schema_length: N_INFER_DEFAULT) ⇒ DataFrame Originally defined in module Convert

Construct a DataFrame from an array of hashes. This operation clones data.

Examples:

data = [{"a" => 1, "b" => 4}, {"a" => 2, "b" => 5}, {"a" => 3, "b" => 6}]
Polars.from_hashes(data)
# =>
# shape: (3, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 1   ┆ 4   │
# │ 2   ┆ 5   │
# │ 3   ┆ 6   │
# └─────┴─────┘

Declaring a partial schema will drop the omitted columns.

Polars.from_hashes(data, schema: {"a" => Polars::Int32})
# =>
# shape: (3, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i32 │
# ╞═════╡
# │ 1   │
# │ 2   │
# │ 3   │
# └─────┘

.from_numo(data, schema: nil, schema_overrides: nil, orient: nil) ⇒ DataFrame Originally defined in module Convert

Construct a DataFrame from a NumPy ndarray. This operation clones data.

Note that this is slower than creating from columnar memory.

Examples:

data = Numo::NArray.cast([[1, 2, 3], [4, 5, 6]])
Polars.from_numo(data, schema: ["a", "b"], orient: "col")
# =>
# shape: (3, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 1   ┆ 4   │
# │ 2   ┆ 5   │
# │ 3   ┆ 6   │
# └─────┴─────┘

Raises:

  • (Todo)

.from_records(data, schema: nil, schema_overrides: nil, strict: true, orient: nil, infer_schema_length: N_INFER_DEFAULT) ⇒ DataFrame Originally defined in module Convert

Construct a DataFrame from an array of arrays. This operation clones data.

Note that this is slower than creating from columnar memory.

Examples:

data = [[1, 2, 3], [4, 5, 6]]
Polars.from_records(data, schema: ["a", "b"])
# =>
# shape: (3, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 1   ┆ 4   │
# │ 2   ┆ 5   │
# │ 3   ┆ 6   │
# └─────┴─────┘

.get_index_typeObject

Return the data type used for Polars indexing.

Examples:

Polars.get_index_type
# => Polars::UInt32


149
150
151
# File 'lib/polars.rb', line 149

def self.get_index_type
  Plr.get_index_type
end

.groups(column) ⇒ Object Originally defined in module Functions

Syntactic sugar for Polars.col("foo").agg_groups.

.head(column, n = 10) ⇒ Expr Originally defined in module Functions

Get the first n rows.

This function is syntactic sugar for col(column).head(n).

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.head("a"))
# =>
# shape: (3, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 8   │
# │ 3   │
# └─────┘
df.select(Polars.head("a", 2))
# =>
# shape: (2, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 8   │
# └─────┘

.implode(*columns) ⇒ Expr Originally defined in module Functions

Aggregate all column values into a list.

This function is syntactic sugar for col(name).implode.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 3],
    "b" => [9, 8, 7],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.implode("a"))
# =>
# shape: (1, 1)
# ┌───────────┐
# │ a         │
# │ ---       │
# │ list[i64] │
# ╞═══════════╡
# │ [1, 2, 3] │
# └───────────┘
df.select(Polars.implode("b", "c"))
# =>
# shape: (1, 2)
# ┌───────────┬───────────────────────┐
# │ b         ┆ c                     │
# │ ---       ┆ ---                   │
# │ list[i64] ┆ list[str]             │
# ╞═══════════╪═══════════════════════╡
# │ [9, 8, 7] ┆ ["foo", "bar", "foo"] │
# └───────────┴───────────────────────┘

.int_range(start = 0, stop = nil, step: 1, eager: false, dtype: Int64) ⇒ Expr, Series Also known as: arange Originally defined in module Functions

Create a range expression (or Series).

This can be used in a select, with_column, etc. Be sure that the resulting range size is equal to the length of the DataFrame you are collecting.

Examples:

Polars.arange(0, 3, eager: true)
# =>
# shape: (3,)
# Series: 'arange' [i64]
# [
#         0
#         1
#         2
# ]

.int_ranges(start = 0, stop = nil, step: 1, dtype: Int64, eager: false) ⇒ Expr, Series Originally defined in module Functions

Generate a range of integers for each row of the input columns.

Examples:

df = Polars::DataFrame.new({"start" => [1, -1], "end" => [3, 2]})
df.with_columns(int_range: Polars.int_ranges("start", "end"))
# =>
# shape: (2, 3)
# ┌───────┬─────┬────────────┐
# │ start ┆ end ┆ int_range  │
# │ ---   ┆ --- ┆ ---        │
# │ i64   ┆ i64 ┆ list[i64]  │
# ╞═══════╪═════╪════════════╡
# │ 1     ┆ 3   ┆ [1, 2]     │
# │ -1    ┆ 2   ┆ [-1, 0, 1] │
# └───────┴─────┴────────────┘

end can be omitted for a shorter syntax.

df.select("end", int_range: Polars.int_ranges("end"))
# =>
# shape: (2, 2)
# ┌─────┬───────────┐
# │ end ┆ int_range │
# │ --- ┆ ---       │
# │ i64 ┆ list[i64] │
# ╞═════╪═══════════╡
# │ 3   ┆ [0, 1, 2] │
# │ 2   ┆ [0, 1]    │
# └─────┴───────────┘

.last(*columns) ⇒ Expr Originally defined in module Functions

Get the last value.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "baz"]
  }
)
df.select(Polars.last)
# =>
# shape: (3, 1)
# ┌─────┐
# │ c   │
# │ --- │
# │ str │
# ╞═════╡
# │ foo │
# │ bar │
# │ baz │
# └─────┘
df.select(Polars.last("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 3   │
# └─────┘
df.select(Polars.last("b", "c"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ b   ┆ c   │
# │ --- ┆ --- │
# │ i64 ┆ str │
# ╞═════╪═════╡
# │ 2   ┆ baz │
# └─────┴─────┘

.lenExpr Also known as: length Originally defined in module Functions

Return the number of rows in the context.

This is similar to COUNT(*) in SQL.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, nil],
    "b" => [3, nil, nil],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.len)
# =>
# shape: (1, 1)
# ┌─────┐
# │ len │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 3   │
# └─────┘

Generate an index column by using len in conjunction with int_range.

df.select([
  Polars.int_range(Polars.len, dtype: Polars::UInt32).alias("index"),
  Polars.all
])
# =>
# shape: (3, 4)
# ┌───────┬──────┬──────┬─────┐
# │ index ┆ a    ┆ b    ┆ c   │
# │ ---   ┆ ---  ┆ ---  ┆ --- │
# │ u32   ┆ i64  ┆ i64  ┆ str │
# ╞═══════╪══════╪══════╪═════╡
# │ 0     ┆ 1    ┆ 3    ┆ foo │
# │ 1     ┆ 2    ┆ null ┆ bar │
# │ 2     ┆ null ┆ null ┆ foo │
# └───────┴──────┴──────┴─────┘

.linear_spaces(start, stop, num_samples, closed: "both", as_array: false, eager: false) ⇒ Expr, Series Originally defined in module Functions

Note:

This functionality is experimental. It may be changed at any point without it being considered a breaking change.

Generate a sequence of evenly-spaced values for each row between start and end.

The number of values in each sequence is determined by num_samples.

Examples:

df = Polars::DataFrame.new({"start" => [1, -1], "end" => [3, 2], "num_samples" => [4, 5]})
df.with_columns(ls: Polars.linear_spaces("start", "end", "num_samples"))
# =>
# shape: (2, 4)
# ┌───────┬─────┬─────────────┬────────────────────────┐
# │ start ┆ end ┆ num_samples ┆ ls                     │
# │ ---   ┆ --- ┆ ---         ┆ ---                    │
# │ i64   ┆ i64 ┆ i64         ┆ list[f64]              │
# ╞═══════╪═════╪═════════════╪════════════════════════╡
# │ 1     ┆ 3   ┆ 4           ┆ [1.0, 1.666667, … 3.0] │
# │ -1    ┆ 2   ┆ 5           ┆ [-1.0, -0.25, … 2.0]   │
# └───────┴─────┴─────────────┴────────────────────────┘
df.with_columns(ls: Polars.linear_spaces("start", "end", 3, as_array: true))
# =>
# shape: (2, 4)
# ┌───────┬─────┬─────────────┬──────────────────┐
# │ start ┆ end ┆ num_samples ┆ ls               │
# │ ---   ┆ --- ┆ ---         ┆ ---              │
# │ i64   ┆ i64 ┆ i64         ┆ array[f64, 3]    │
# ╞═══════╪═════╪═════════════╪══════════════════╡
# │ 1     ┆ 3   ┆ 4           ┆ [1.0, 2.0, 3.0]  │
# │ -1    ┆ 2   ┆ 5           ┆ [-1.0, 0.5, 2.0] │
# └───────┴─────┴─────────────┴──────────────────┘

.lit(value, dtype: nil, allow_object: false) ⇒ Expr Originally defined in module Functions

Return an expression representing a literal value.

Examples:

Literal scalar values:

Polars.lit(1)
Polars.lit(5.5)
Polars.lit(nil)
Polars.lit("foo_bar")
Polars.lit(Date.new(2021, 1, 20))
Polars.lit(DateTime.new(2023, 3, 31, 10, 30, 45))

Literal list/Series data (1D):

Polars.lit([1, 2, 3])
Polars.lit(Polars::Series.new("x", [1, 2, 3]))

Literal list/Series data (2D):

Polars.lit([[1, 2], [3, 4]])
Polars.lit(Polars::Series.new("y", [[1, 2], [3, 4]]))

.map_batches(exprs, return_dtype: nil, is_elementwise: false, returns_scalar: false, &function) ⇒ Expr Originally defined in module Functions

Note:

This method is much slower than the native expressions API. Only use it if you cannot implement your logic otherwise.

Note:

A UDF passed to map_batches must be pure, meaning that it cannot modify or depend on state other than its arguments. We may call the function with arbitrary input data.

Map a custom function over multiple columns/expressions.

Produces a single Series result.

Examples:

test_func = lambda do |a, b, c|
  a + b + c
end
df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 3, 4],
    "b" => [4, 5, 6, 7]
  }
)

df.with_columns(
  (
    Polars.struct(["a", "b"]).map_batches { |x| test_func.(x.struct.field("a"), x.struct.field("b"), 1) }
  ).alias("a+b+c")
)
# =>
# shape: (4, 3)
# ┌─────┬─────┬───────┐
# │ a   ┆ b   ┆ a+b+c │
# │ --- ┆ --- ┆ ---   │
# │ i64 ┆ i64 ┆ i64   │
# ╞═════╪═════╪═══════╡
# │ 1   ┆ 4   ┆ 6     │
# │ 2   ┆ 5   ┆ 8     │
# │ 3   ┆ 6   ┆ 10    │
# │ 4   ┆ 7   ┆ 12    │
# └─────┴─────┴───────┘

.map_groups(exprs, return_dtype: nil, is_elementwise: false, returns_scalar: false, &function) ⇒ Expr Originally defined in module Functions

Note:

This method is much slower than the native expressions API. Only use it if you cannot implement your logic otherwise.

Apply a custom/user-defined function (UDF) in a GroupBy context.

Examples:

df = Polars::DataFrame.new(
  {
    "group" => [1, 1, 2],
    "a" => [1, 3, 3],
    "b" => [5, 6, 7]
  }
)
(
  df.group_by("group").agg(
    Polars.map_groups(["a", "b"], return_dtype: Polars::Float64) { |list_of_series| list_of_series[0] / list_of_series[0].sum + list_of_series[1] }
    .alias("my_custom_aggregation")
  )
).sort("group")
# =>
# shape: (2, 2)
# ┌───────┬───────────────────────┐
# │ group ┆ my_custom_aggregation │
# │ ---   ┆ ---                   │
# │ i64   ┆ list[f64]             │
# ╞═══════╪═══════════════════════╡
# │ 1     ┆ [5.25, 6.75]          │
# │ 2     ┆ [8.0]                 │
# └───────┴───────────────────────┘

.max(*names) ⇒ Expr Originally defined in module Functions

Get the maximum value.

Syntactic sugar for col(names).max.

Examples:

Get the maximum value of a column.

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.max("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 8   │
# └─────┘

Get the maximum value of multiple columns.

df.select(Polars.max("^a|b$"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 8   ┆ 5   │
# └─────┴─────┘
df.select(Polars.max("a", "b"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 8   ┆ 5   │
# └─────┴─────┘

.max_horizontal(*exprs) ⇒ Expr Originally defined in module Functions

Get the maximum value horizontally across columns.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, nil],
    "c" => ["x", "y", "z"]
  }
)
df.with_columns(max: Polars.max_horizontal("a", "b"))
# =>
# shape: (3, 4)
# ┌─────┬──────┬─────┬─────┐
# │ a   ┆ b    ┆ c   ┆ max │
# │ --- ┆ ---  ┆ --- ┆ --- │
# │ i64 ┆ i64  ┆ str ┆ i64 │
# ╞═════╪══════╪═════╪═════╡
# │ 1   ┆ 4    ┆ x   ┆ 4   │
# │ 8   ┆ 5    ┆ y   ┆ 8   │
# │ 3   ┆ null ┆ z   ┆ 3   │
# └─────┴──────┴─────┴─────┘

.mean(*columns) ⇒ Expr Originally defined in module Functions

Get the mean value.

This function is syntactic sugar for col(columns).mean.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.mean("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 4.0 │
# └─────┘
df.select(Polars.mean("a", "b"))
# =>
# shape: (1, 2)
# ┌─────┬──────────┐
# │ a   ┆ b        │
# │ --- ┆ ---      │
# │ f64 ┆ f64      │
# ╞═════╪══════════╡
# │ 4.0 ┆ 3.666667 │
# └─────┴──────────┘

.mean_horizontal(*exprs, ignore_nulls: true) ⇒ Expr Originally defined in module Functions

Compute the mean of all values horizontally across columns.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, nil],
    "c" => ["x", "y", "z"]
  }
)
df.with_columns(mean: Polars.mean_horizontal("a", "b"))
# =>
# shape: (3, 4)
# ┌─────┬──────┬─────┬──────┐
# │ a   ┆ b    ┆ c   ┆ mean │
# │ --- ┆ ---  ┆ --- ┆ ---  │
# │ i64 ┆ i64  ┆ str ┆ f64  │
# ╞═════╪══════╪═════╪══════╡
# │ 1   ┆ 4    ┆ x   ┆ 2.5  │
# │ 8   ┆ 5    ┆ y   ┆ 6.5  │
# │ 3   ┆ null ┆ z   ┆ 3.0  │
# └─────┴──────┴─────┴──────┘

.median(*columns) ⇒ Expr Originally defined in module Functions

Get the median value.

This function is syntactic sugar for pl.col(columns).median.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.median("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 3.0 │
# └─────┘
df.select(Polars.median("a", "b"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ f64 ┆ f64 │
# ╞═════╪═════╡
# │ 3.0 ┆ 4.0 │
# └─────┴─────┘

.min(*names) ⇒ Expr Originally defined in module Functions

Get the minimum value.

Syntactic sugar for col(names).min.

Examples:

Get the minimum value of a column.

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.min("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# └─────┘

Get the minimum value of multiple columns.

df.select(Polars.min("^a|b$"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 1   ┆ 2   │
# └─────┴─────┘
df.select(Polars.min("a", "b"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 1   ┆ 2   │
# └─────┴─────┘

.min_horizontal(*exprs) ⇒ Expr Originally defined in module Functions

Get the minimum value horizontally across columns.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, nil],
    "c" => ["x", "y", "z"]
  }
)
df.with_columns(min: Polars.min_horizontal("a", "b"))
# =>
# shape: (3, 4)
# ┌─────┬──────┬─────┬─────┐
# │ a   ┆ b    ┆ c   ┆ min │
# │ --- ┆ ---  ┆ --- ┆ --- │
# │ i64 ┆ i64  ┆ str ┆ i64 │
# ╞═════╪══════╪═════╪═════╡
# │ 1   ┆ 4    ┆ x   ┆ 1   │
# │ 8   ┆ 5    ┆ y   ┆ 5   │
# │ 3   ┆ null ┆ z   ┆ 3   │
# └─────┴──────┴─────┴─────┘

.n_unique(*columns) ⇒ Expr Originally defined in module Functions

Count unique values.

This function is syntactic sugar for col(columns).n_unique.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 1],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.n_unique("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 2   │
# └─────┘
df.select(Polars.n_unique("b", "c"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ b   ┆ c   │
# │ --- ┆ --- │
# │ u32 ┆ u32 │
# ╞═════╪═════╡
# │ 3   ┆ 2   │
# └─────┴─────┘

.nth(*indices, strict: true) ⇒ Expr Originally defined in module Functions

Get the nth column(s) of the context.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "baz"]
  }
)
df.select(Polars.nth(1))
# =>
# shape: (3, 1)
# ┌─────┐
# │ b   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 4   │
# │ 5   │
# │ 2   │
# └─────┘
df.select(Polars.nth(2, 0))
# =>
# shape: (3, 2)
# ┌─────┬─────┐
# │ c   ┆ a   │
# │ --- ┆ --- │
# │ str ┆ i64 │
# ╞═════╪═════╡
# │ foo ┆ 1   │
# │ bar ┆ 8   │
# │ baz ┆ 3   │
# └─────┴─────┘

.ones(n, dtype: Float64, eager: false) ⇒ Object Originally defined in module Functions

Construct a column of length n filled with ones.

This is syntactic sugar for the repeat function.

Examples:

Polars.ones(3, dtype: Polars::Int8, eager: true)
# =>
# shape: (3,)
# Series: 'ones' [i8]
# [
#         1
#         1
#         1
# ]

.quantile(column, quantile, interpolation: "nearest") ⇒ Expr Originally defined in module Functions

Syntactic sugar for Polars.col("foo").quantile(...).

.read_avro(source, columns: nil, n_rows: nil) ⇒ DataFrame Originally defined in module IO

Read into a DataFrame from Apache Avro format.

.read_csv(source, has_header: true, columns: nil, new_columns: nil, separator: ",", comment_prefix: nil, quote_char: '"', skip_rows: 0, skip_lines: 0, schema: nil, schema_overrides: nil, null_values: nil, missing_utf8_is_empty_string: false, ignore_errors: false, try_parse_dates: false, n_threads: nil, infer_schema: true, infer_schema_length: N_INFER_DEFAULT, batch_size: 8192, n_rows: nil, encoding: "utf8", low_memory: false, rechunk: false, storage_options: nil, skip_rows_after_header: 0, row_index_name: nil, row_index_offset: 0, eol_char: "\n", raise_if_empty: true, truncate_ragged_lines: false, decimal_comma: false, glob: true) ⇒ DataFrame Originally defined in module IO

Note:

This operation defaults to a rechunk operation at the end, meaning that all data will be stored continuously in memory. Set rechunk: false if you are benchmarking the csv-reader. A rechunk is an expensive operation.

Read a CSV file into a DataFrame.

.read_csv_batched(source, has_header: true, columns: nil, new_columns: nil, separator: ",", comment_prefix: nil, quote_char: '"', skip_rows: 0, skip_lines: 0, schema_overrides: nil, null_values: nil, missing_utf8_is_empty_string: false, ignore_errors: false, try_parse_dates: false, n_threads: nil, infer_schema_length: N_INFER_DEFAULT, batch_size: 50_000, n_rows: nil, encoding: "utf8", low_memory: false, rechunk: false, skip_rows_after_header: 0, row_index_name: nil, row_index_offset: 0, eol_char: "\n", raise_if_empty: true, truncate_ragged_lines: false, decimal_comma: false) ⇒ BatchedCsvReader Originally defined in module IO

Deprecated.

Use scan_csv().collect_batches instead.

Read a CSV file in batches.

Upon creation of the BatchedCsvReader, polars will gather statistics and determine the file chunks. After that work will only be done if next_batches is called.

Examples:

reader = Polars.read_csv_batched(
  "./tpch/tables_scale_100/lineitem.tbl", separator: "|", try_parse_dates: true
)
reader.next_batches(5)

.read_database(query, schema_overrides: nil) ⇒ DataFrame Originally defined in module IO

Read a SQL query into a DataFrame.

.read_delta(source, version: nil, columns: nil, rechunk: nil, storage_options: nil, delta_table_options: nil) ⇒ DataFrame Originally defined in module IO

Reads into a DataFrame from a Delta lake table.

.read_ipc(source, columns: nil, n_rows: nil, memory_map: true, storage_options: nil, row_index_name: nil, row_index_offset: 0, rechunk: true) ⇒ DataFrame Originally defined in module IO

Read into a DataFrame from Arrow IPC (Feather v2) file.

.read_ipc_schema(source) ⇒ Hash Originally defined in module IO

Get a schema of the IPC file without reading data.

.read_ipc_stream(source, columns: nil, n_rows: nil, storage_options: nil, row_index_name: nil, row_index_offset: 0, rechunk: true) ⇒ DataFrame Originally defined in module IO

Read into a DataFrame from Arrow IPC record batch stream.

See "Streaming format" on https://arrow.apache.org/docs/python/ipc.html.

.read_json(source, schema: nil, schema_overrides: nil, infer_schema_length: N_INFER_DEFAULT) ⇒ DataFrame Originally defined in module IO

Read into a DataFrame from a JSON file.

.read_ndjson(source, schema: nil, schema_overrides: nil, infer_schema_length: N_INFER_DEFAULT, batch_size: 1024, n_rows: nil, low_memory: false, rechunk: false, row_index_name: nil, row_index_offset: 0, ignore_errors: false, storage_options: nil, credential_provider: "auto", retries: nil, file_cache_ttl: nil, include_file_paths: nil) ⇒ DataFrame Originally defined in module IO

Read into a DataFrame from a newline delimited JSON file.

.read_parquet(source, columns: nil, n_rows: nil, row_index_name: nil, row_index_offset: 0, parallel: "auto", use_statistics: true, hive_partitioning: nil, glob: true, schema: nil, hive_schema: nil, try_parse_hive_dates: true, rechunk: false, low_memory: false, storage_options: nil, credential_provider: "auto", retries: nil, include_file_paths: nil, missing_columns: "raise", allow_missing_columns: nil) ⇒ DataFrame Originally defined in module IO

Read into a DataFrame from a parquet file.

.read_parquet_metadata(source, storage_options: nil, credential_provider: "auto", retries: nil) ⇒ Hash Originally defined in module IO

Note:

This functionality is considered experimental. It may be removed or changed at any point without it being considered a breaking change.

Get file-level custom metadata of a Parquet file without reading data.

.read_parquet_schema(source) ⇒ Schema Originally defined in module IO

Get a schema of the Parquet file without reading data.

.reduce(exprs, returns_scalar: false, return_dtype: nil, &function) ⇒ Expr Originally defined in module Functions

Accumulate over multiple columns horizontally/ row wise with a left fold.

Examples:

Horizontally sum over all columns.

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 3],
    "b" => [0, 1, 2]
  }
)
df.select(
  Polars.reduce(Polars.col("*")) { |acc, x| acc + x }.alias("sum")
)
# =>
# shape: (3, 1)
# ┌─────┐
# │ sum │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 3   │
# │ 5   │
# └─────┘

.repeat(value, n, dtype: nil, eager: false) ⇒ Object Originally defined in module Functions

Repeat a single value n times.

Examples:

Construct a column with a repeated value in a lazy context.

Polars.select(Polars.repeat("z", 3)).to_series
# =>
# shape: (3,)
# Series: 'repeat' [str]
# [
#         "z"
#         "z"
#         "z"
# ]

Generate a Series directly by setting eager: true.

Polars.repeat(3, 3, dtype: Polars::Int8, eager: true)
# =>
# shape: (3,)
# Series: 'repeat' [i8]
# [
#         3
#         3
#         3
# ]

.rolling_corr(a, b, window_size:, min_samples: nil, ddof: 1) ⇒ Expr Originally defined in module Functions

Compute the rolling correlation between two columns/ expressions.

The window at a given row includes the row itself and the window_size - 1 elements before it.

.rolling_cov(a, b, window_size:, min_samples: nil, ddof: 1) ⇒ Expr Originally defined in module Functions

Compute the rolling covariance between two columns/ expressions.

The window at a given row includes the row itself and the window_size - 1 elements before it.

.scan_csv(source, has_header: true, separator: ",", comment_prefix: nil, quote_char: '"', skip_rows: 0, skip_lines: 0, schema: nil, schema_overrides: nil, null_values: nil, missing_utf8_is_empty_string: false, ignore_errors: false, cache: true, with_column_names: nil, infer_schema: true, infer_schema_length: N_INFER_DEFAULT, n_rows: nil, encoding: "utf8", low_memory: false, rechunk: false, skip_rows_after_header: 0, row_index_name: nil, row_index_offset: 0, try_parse_dates: false, eol_char: "\n", new_columns: nil, raise_if_empty: true, truncate_ragged_lines: false, decimal_comma: false, glob: true, storage_options: nil, credential_provider: "auto", retries: nil, file_cache_ttl: nil, include_file_paths: nil) ⇒ LazyFrame Originally defined in module IO

Lazily read from a CSV file or multiple files via glob patterns.

This allows the query optimizer to push down predicates and projections to the scan level, thereby potentially reducing memory overhead.

.scan_delta(source, version: nil, storage_options: nil, delta_table_options: nil, rechunk: nil) ⇒ LazyFrame Originally defined in module IO

Lazily read from a Delta lake table.

.scan_iceberg(source, snapshot_id: nil, storage_options: nil) ⇒ LazyFrame Originally defined in module IO

Lazily read from an Apache Iceberg table.

.scan_ipc(source, n_rows: nil, cache: true, rechunk: false, row_index_name: nil, row_index_offset: 0, glob: true, storage_options: nil, credential_provider: "auto", retries: nil, file_cache_ttl: nil, hive_partitioning: nil, hive_schema: nil, try_parse_hive_dates: true, include_file_paths: nil, _record_batch_statistics: false) ⇒ LazyFrame Originally defined in module IO

Lazily read from an Arrow IPC (Feather v2) file or multiple files via glob patterns.

This allows the query optimizer to push down predicates and projections to the scan level, thereby potentially reducing memory overhead.

.scan_ndjson(source, schema: nil, schema_overrides: nil, infer_schema_length: N_INFER_DEFAULT, batch_size: 1024, n_rows: nil, low_memory: false, rechunk: false, row_index_name: nil, row_index_offset: 0, ignore_errors: false, storage_options: nil, credential_provider: "auto", retries: nil, file_cache_ttl: nil, include_file_paths: nil) ⇒ LazyFrame Originally defined in module IO

Lazily read from a newline delimited JSON file.

This allows the query optimizer to push down predicates and projections to the scan level, thereby potentially reducing memory overhead.

.scan_parquet(source, n_rows: nil, row_index_name: nil, row_index_offset: 0, parallel: "auto", use_statistics: true, hive_partitioning: nil, glob: true, hidden_file_prefix: nil, schema: nil, hive_schema: nil, try_parse_hive_dates: true, rechunk: false, low_memory: false, cache: true, storage_options: nil, credential_provider: "auto", retries: nil, include_file_paths: nil, missing_columns: "raise", allow_missing_columns: nil, extra_columns: "raise", cast_options: nil, _column_mapping: nil, _default_values: nil, _deletion_files: nil, _table_statistics: nil, _row_count: nil) ⇒ LazyFrame Originally defined in module IO

Lazily read from a parquet file or multiple files via glob patterns.

This allows the query optimizer to push down predicates and projections to the scan level, thereby potentially reducing memory overhead.

.select(*exprs, eager: true, **named_exprs) ⇒ DataFrame Originally defined in module Functions

Run polars expressions without a context.

This is syntactic sugar for running df.select on an empty DataFrame.

Examples:

foo = Polars::Series.new("foo", [1, 2, 3])
bar = Polars::Series.new("bar", [3, 2, 1])
Polars.select(min: Polars.min_horizontal(foo, bar))
# =>
# shape: (3, 1)
# ┌─────┐
# │ min │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 2   │
# │ 1   │
# └─────┘

.self_dtypeDataTypeExpr Originally defined in module Functions

Note:

This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.

Get the dtype of self in map_elements and map_batches.

.set_random_seed(seed) ⇒ nil Originally defined in module Functions

Set the global random seed for Polars.

This random seed is used to determine things such as shuffle ordering.

.show_versionsnil

Print out the version of Polars and its optional dependencies.



166
167
168
169
170
171
172
173
# File 'lib/polars.rb', line 166

def self.show_versions
  puts "--------Version info---------"
  puts "Polars: #{VERSION}"
  puts "Index type: #{get_index_type}"
  puts "Platform: #{RUBY_PLATFORM}"
  puts "Ruby: #{RUBY_VERSION}"
  nil
end

.sql_expr(sql) ⇒ Expr Originally defined in module Functions

Parse one or more SQL expressions to polars expression(s).

Examples:

Parse a single SQL expression:

df = Polars::DataFrame.new({"a" => [2, 1]})
expr = Polars.sql_expr("MAX(a)")
df.select(expr)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 2   │
# └─────┘

Parse multiple SQL expressions:

df.with_columns(
  *Polars.sql_expr(["POWER(a,a) AS a_a", "CAST(a AS TEXT) AS a_txt"])
)
# =>
# shape: (2, 3)
# ┌─────┬─────┬───────┐
# │ a   ┆ a_a ┆ a_txt │
# │ --- ┆ --- ┆ ---   │
# │ i64 ┆ i64 ┆ str   │
# ╞═════╪═════╪═══════╡
# │ 2   ┆ 4   ┆ 2     │
# │ 1   ┆ 1   ┆ 1     │
# └─────┴─────┴───────┘

.std(column, ddof: 1) ⇒ Expr Originally defined in module Functions

Get the standard deviation.

This function is syntactic sugar for col(column).std(ddof: ddof).

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.std("a"))
# =>
# shape: (1, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 3.605551 │
# └──────────┘
df["a"].std
# => 3.605551275463989

.string_cacheObject



31
32
33
# File 'lib/polars/string_cache.rb', line 31

def self.string_cache(...)
  StringCache.new(...)
end

.struct(*exprs, schema: nil, eager: false, **named_exprs) ⇒ Object Originally defined in module Functions

Collect several columns into a Series of dtype Struct.

Examples:

df = Polars::DataFrame.new(
  {
    "int" => [1, 2],
    "str" => ["a", "b"],
    "bool" => [true, nil],
    "list" => [[1, 2], [3]],
  }
)
df.select([Polars.struct(Polars.all).alias("my_struct")])
# =>
# shape: (2, 1)
# ┌─────────────────────┐
# │ my_struct           │
# │ ---                 │
# │ struct[4]           │
# ╞═════════════════════╡
# │ {1,"a",true,[1, 2]} │
# │ {2,"b",null,[3]}    │
# └─────────────────────┘

Collect selected columns into a struct by either passing a list of columns, or by specifying each column as a positional argument.

df.select(Polars.struct("int", false).alias("my_struct"))
# =>
# shape: (2, 1)
# ┌───────────┐
# │ my_struct │
# │ ---       │
# │ struct[2] │
# ╞═══════════╡
# │ {1,false} │
# │ {2,false} │
# └───────────┘

Use keyword arguments to easily name each struct field.

df.select(Polars.struct(p: "int", q: "bool").alias("my_struct")).schema
# => Polars::Schema({"my_struct"=>Polars::Struct({"p"=>Polars::Int64, "q"=>Polars::Boolean})})

.sum(*names) ⇒ Expr Originally defined in module Functions

Sum all values.

Syntactic sugar for col(name).sum.

Examples:

Sum a column.

df = Polars::DataFrame.new(
  {
    "a" => [1, 2],
    "b" => [3, 4],
    "c" => [5, 6]
  }
)
df.select(Polars.sum("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 3   │
# └─────┘

Sum multiple columns.

df.select(Polars.sum("a", "c"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ c   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 3   ┆ 11  │
# └─────┴─────┘
df.select(Polars.sum("^.*[bc]$"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ b   ┆ c   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 7   ┆ 11  │
# └─────┴─────┘

.sum_horizontal(*exprs, ignore_nulls: true) ⇒ Expr Originally defined in module Functions

Sum all values horizontally across columns.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, nil],
    "c" => ["x", "y", "z"]
  }
)
df.with_columns(sum: Polars.sum_horizontal("a", "b"))
# =>
# shape: (3, 4)
# ┌─────┬──────┬─────┬─────┐
# │ a   ┆ b    ┆ c   ┆ sum │
# │ --- ┆ ---  ┆ --- ┆ --- │
# │ i64 ┆ i64  ┆ str ┆ i64 │
# ╞═════╪══════╪═════╪═════╡
# │ 1   ┆ 4    ┆ x   ┆ 5   │
# │ 8   ┆ 5    ┆ y   ┆ 13  │
# │ 3   ┆ null ┆ z   ┆ 3   │
# └─────┴──────┴─────┴─────┘

.tail(column, n = 10) ⇒ Expr Originally defined in module Functions

Get the last n rows.

This function is syntactic sugar for col(column).tail(n).

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.tail("a"))
# =>
# shape: (3, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 8   │
# │ 3   │
# └─────┘
df.select(Polars.tail("a", 2))
# =>
# shape: (2, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 8   │
# │ 3   │
# └─────┘

.thread_pool_sizeInteger

Return the number of threads in the Polars thread pool.



138
139
140
# File 'lib/polars.rb', line 138

def self.thread_pool_size
  Plr.thread_pool_size
end

.time(hour = nil, minute = nil, second = nil, microsecond = nil) ⇒ Expr Originally defined in module Functions

Create a Polars literal expression of type Time.

Examples:

df = Polars::DataFrame.new(
  {
    "hour" => [12, 13, 14],
    "minute" => [15, 30, 45]
  }
)
df.with_columns(Polars.time(Polars.col("hour"), Polars.col("minute")))
# =>
# shape: (3, 3)
# ┌──────┬────────┬──────────┐
# │ hour ┆ minute ┆ time     │
# │ ---  ┆ ---    ┆ ---      │
# │ i64  ┆ i64    ┆ time     │
# ╞══════╪════════╪══════════╡
# │ 12   ┆ 15     ┆ 12:15:00 │
# │ 13   ┆ 30     ┆ 13:30:00 │
# │ 14   ┆ 45     ┆ 14:45:00 │
# └──────┴────────┴──────────┘

.time_range(start = nil, stop = nil, interval = "1h", closed: "both", eager: false) ⇒ Object Originally defined in module Functions

Generate a time range.

Examples:

Polars.time_range(
  Time.utc(2000, 1, 1, 14, 0),
  nil,
  "3h15m",
  eager: true
).alias("time")
# =>
# shape: (4,)
# Series: 'time' [time]
# [
#         14:00:00
#         17:15:00
#         20:30:00
#         23:45:00
# ]

.time_ranges(start = nil, stop = nil, interval = "1h", closed: "both", eager: false) ⇒ Object Originally defined in module Functions

Create a column of time ranges.

Examples:

df = Polars::DataFrame.new(
  {
    "start" => [Time.utc(2000, 1, 1, 9, 0), Time.utc(2000, 1, 1, 10, 0)],
    "end" => Time.utc(2000, 1, 1, 11, 0)
  }
)
df.select(time_range: Polars.time_ranges("start", "end"))
# =>
# shape: (2, 1)
# ┌────────────────────────────────┐
# │ time_range                     │
# │ ---                            │
# │ list[time]                     │
# ╞════════════════════════════════╡
# │ [09:00:00, 10:00:00, 11:00:00] │
# │ [10:00:00, 11:00:00]           │
# └────────────────────────────────┘

.union(items, how: "vertical", strict: false) ⇒ Object Originally defined in module Functions

Note:

This function does not guarantee any specific ordering of rows in the result. If you need predictable row ordering, use Polars.concat instead.

Combine multiple DataFrames, LazyFrames, or Series into a single object.

Examples:

df1 = Polars::DataFrame.new({"a" => [1], "b" => [3]})
df2 = Polars::DataFrame.new({"a" => [2], "b" => [4]})
Polars.union([df1, df2])
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 1   ┆ 3   │
# │ 2   ┆ 4   │
# └─────┴─────┘
df1 = Polars::DataFrame.new({"a" => [1], "b" => [3]})
df2 = Polars::DataFrame.new({"a" => [2.5], "b" => [4]})
Polars.union([df1, df2], how: "vertical_relaxed")
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ f64 ┆ i64 │
# ╞═════╪═════╡
# │ 1.0 ┆ 3   │
# │ 2.5 ┆ 4   │
# └─────┴─────┘
df_h1 = Polars::DataFrame.new({"l1" => [1, 2], "l2" => [3, 4]})
df_h2 = Polars::DataFrame.new({"r1" => [5, 6], "r2" => [7, 8], "r3" => [9, 10]})
Polars.union([df_h1, df_h2], how: "horizontal")
# =>
# shape: (2, 5)
# ┌─────┬─────┬─────┬─────┬─────┐
# │ l1  ┆ l2  ┆ r1  ┆ r2  ┆ r3  │
# │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
# │ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 │
# ╞═════╪═════╪═════╪═════╪═════╡
# │ 1   ┆ 3   ┆ 5   ┆ 7   ┆ 9   │
# │ 2   ┆ 4   ┆ 6   ┆ 8   ┆ 10  │
# └─────┴─────┴─────┴─────┴─────┘

The "diagonal" strategy allows for some frames to have missing columns, the values for which are filled with null:

df_d1 = Polars::DataFrame.new({"a" => [1], "b" => [3]})
df_d2 = Polars::DataFrame.new({"a" => [2], "c" => [4]})
Polars.union([df_d1, df_d2], how: "diagonal")
# =>
# shape: (2, 3)
# ┌─────┬──────┬──────┐
# │ a   ┆ b    ┆ c    │
# │ --- ┆ ---  ┆ ---  │
# │ i64 ┆ i64  ┆ i64  │
# ╞═════╪══════╪══════╡
# │ 1   ┆ 3    ┆ null │
# │ 2   ┆ null ┆ 4    │
# └─────┴──────┴──────┘

.using_string_cacheBoolean Originally defined in module Functions

Check whether the global string cache is enabled.

.var(column, ddof: 1) ⇒ Expr Originally defined in module Functions

Get the variance.

This function is syntactic sugar for col(column).var(ddof: ddof).

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.var("a"))
# =>
# shape: (1, 1)
# ┌──────┐
# │ a    │
# │ ---  │
# │ f64  │
# ╞══════╡
# │ 13.0 │
# └──────┘
df["a"].var
# => 13.0

.when(*predicates, **constraints) ⇒ When Originally defined in module Functions

Start a "when, then, otherwise" expression.

Examples:

Below we add a column with the value 1, where column "foo" > 2 and the value -1 where it isn't.

df = Polars::DataFrame.new({"foo" => [1, 3, 4], "bar" => [3, 4, 0]})
df.with_columns(Polars.when(Polars.col("foo") > 2).then(Polars.lit(1)).otherwise(Polars.lit(-1)))
# =>
# shape: (3, 3)
# ┌─────┬─────┬─────────┐
# │ foo ┆ bar ┆ literal │
# │ --- ┆ --- ┆ ---     │
# │ i64 ┆ i64 ┆ i32     │
# ╞═════╪═════╪═════════╡
# │ 1   ┆ 3   ┆ -1      │
# │ 3   ┆ 4   ┆ 1       │
# │ 4   ┆ 0   ┆ 1       │
# └─────┴─────┴─────────┘

Or with multiple when-then operations chained:

df.with_columns(
  Polars.when(Polars.col("foo") > 2)
  .then(1)
  .when(Polars.col("bar") > 2)
  .then(4)
  .otherwise(-1)
  .alias("val")
)
# =>
# shape: (3, 3)
# ┌─────┬─────┬─────┐
# │ foo ┆ bar ┆ val │
# │ --- ┆ --- ┆ --- │
# │ i64 ┆ i64 ┆ i32 │
# ╞═════╪═════╪═════╡
# │ 1   ┆ 3   ┆ 4   │
# │ 3   ┆ 4   ┆ 1   │
# │ 4   ┆ 0   ┆ 1   │
# └─────┴─────┴─────┘

The otherwise at the end is optional. If left out, any rows where none of the when expressions evaluate to true, are set to null:

df.with_columns(Polars.when(Polars.col("foo") > 2).then(1).alias("val"))
# =>
# shape: (3, 3)
# ┌─────┬─────┬──────┐
# │ foo ┆ bar ┆ val  │
# │ --- ┆ --- ┆ ---  │
# │ i64 ┆ i64 ┆ i32  │
# ╞═════╪═════╪══════╡
# │ 1   ┆ 3   ┆ null │
# │ 3   ┆ 4   ┆ 1    │
# │ 4   ┆ 0   ┆ 1    │
# └─────┴─────┴──────┘

Pass multiple predicates, each of which must be met:

df.with_columns(
  val: Polars.when(
    Polars.col("bar") > 0,
    Polars.col("foo") % 2 != 0
  )
  .then(99)
  .otherwise(-1)
)
# =>
# shape: (3, 3)
# ┌─────┬─────┬─────┐
# │ foo ┆ bar ┆ val │
# │ --- ┆ --- ┆ --- │
# │ i64 ┆ i64 ┆ i32 │
# ╞═════╪═════╪═════╡
# │ 1   ┆ 3   ┆ 99  │
# │ 3   ┆ 4   ┆ 99  │
# │ 4   ┆ 0   ┆ -1  │
# └─────┴─────┴─────┘

Pass conditions as keyword arguments:

df.with_columns(val: Polars.when(foo: 4, bar: 0).then(99).otherwise(-1))
# =>
# shape: (3, 3)
# ┌─────┬─────┬─────┐
# │ foo ┆ bar ┆ val │
# │ --- ┆ --- ┆ --- │
# │ i64 ┆ i64 ┆ i32 │
# ╞═════╪═════╪═════╡
# │ 1   ┆ 3   ┆ -1  │
# │ 3   ┆ 4   ┆ -1  │
# │ 4   ┆ 0   ┆ 99  │
# └─────┴─────┴─────┘

.zeros(n, dtype: Float64, eager: false) ⇒ Object Originally defined in module Functions

Construct a column of length n filled with zeros.

This is syntactic sugar for the repeat function.

Examples:

Polars.zeros(3, dtype: Polars::Int8, eager: true)
# =>
# shape: (3,)
# Series: 'zeros' [i8]
# [
#         0
#         0
#         0
# ]