Module: Polars

Extended by:: Convert, Functions, IO

Defined in:: lib/polars.rb,
lib/polars/expr.rb,
lib/polars/plot.rb,
lib/polars/slice.rb,
lib/polars/utils.rb,
lib/polars/config.rb,
lib/polars/io/csv.rb,
lib/polars/io/ipc.rb,
lib/polars/series.rb,
lib/polars/convert.rb,
lib/polars/io/avro.rb,
lib/polars/io/json.rb,
lib/polars/testing.rb,
lib/polars/version.rb,
lib/polars/cat_expr.rb,
lib/polars/group_by.rb,
lib/polars/io/delta.rb,
lib/polars/whenthen.rb,
lib/polars/io/ndjson.rb,
lib/polars/list_expr.rb,
lib/polars/meta_expr.rb,
lib/polars/name_expr.rb,
lib/polars/selectors.rb,
lib/polars/array_expr.rb,
lib/polars/data_frame.rb,
lib/polars/data_types.rb,
lib/polars/exceptions.rb,
lib/polars/io/parquet.rb,
lib/polars/lazy_frame.rb,
lib/polars/utils/wrap.rb,
lib/polars/binary_expr.rb,
lib/polars/io/database.rb,
lib/polars/sql_context.rb,
lib/polars/string_expr.rb,
lib/polars/struct_expr.rb,
lib/polars/utils/parse.rb,
lib/polars/string_cache.rb,
lib/polars/expr_dispatch.rb,
lib/polars/functions/col.rb,
lib/polars/functions/len.rb,
lib/polars/functions/lit.rb,
lib/polars/lazy_group_by.rb,
lib/polars/utils/convert.rb,
lib/polars/utils/various.rb,
lib/polars/cat_name_space.rb,
lib/polars/date_time_expr.rb,
lib/polars/functions/lazy.rb,
lib/polars/data_type_group.rb,
lib/polars/functions/eager.rb,
lib/polars/list_name_space.rb,
lib/polars/utils/constants.rb,
lib/polars/array_name_space.rb,
lib/polars/dynamic_group_by.rb,
lib/polars/functions/random.rb,
lib/polars/functions/repeat.rb,
lib/polars/rolling_group_by.rb,
lib/polars/binary_name_space.rb,
lib/polars/string_name_space.rb,
lib/polars/struct_name_space.rb,
lib/polars/batched_csv_reader.rb,
lib/polars/functions/whenthen.rb,
lib/polars/date_time_name_space.rb,
lib/polars/functions/as_datatype.rb,
lib/polars/functions/range/int_range.rb,
lib/polars/functions/range/date_range.rb,
lib/polars/functions/range/time_range.rb,
lib/polars/functions/aggregation/vertical.rb,
lib/polars/functions/range/datetime_range.rb,
lib/polars/functions/aggregation/horizontal.rb more...

Defined Under Namespace

Modules: Convert, Functions, IO, Plot, Selectors, Testing Classes: Array, ArrayExpr, ArrayNameSpace, Binary, BinaryExpr, BinaryNameSpace, Boolean, CatExpr, CatNameSpace, Categorical, Config, DataFrame, DataType, DataTypeGroup, Date, DateTimeExpr, DateTimeNameSpace, Datetime, Decimal, Duration, DynamicGroupBy, Enum, Expr, Field, Float32, Float64, FloatType, GroupBy, Int128, Int16, Int32, Int64, Int8, IntegerType, LazyFrame, LazyGroupBy, List, ListExpr, ListNameSpace, MetaExpr, NameExpr, NestedType, Null, NumericType, Object, RollingGroupBy, SQLContext, Series, SignedIntegerType, String, StringCache, StringExpr, StringNameSpace, Struct, StructExpr, StructNameSpace, TemporalType, Time, UInt16, UInt32, UInt64, UInt8, Unknown, UnsignedIntegerType

Constant Summary collapse

SIGNED_INTEGER_DTYPES =

DataTypeGroup.new(
  [
    Int8,
    Int16,
    Int32,
    Int64
  ]
)

UNSIGNED_INTEGER_DTYPES =

DataTypeGroup.new(
  [
    UInt8,
    UInt16,
    UInt32,
    UInt64
  ]
)

INTEGER_DTYPES =

(
  SIGNED_INTEGER_DTYPES | UNSIGNED_INTEGER_DTYPES
)

FLOAT_DTYPES =

DataTypeGroup.new([Float32, Float64])

NUMERIC_DTYPES =

DataTypeGroup.new(
  FLOAT_DTYPES + INTEGER_DTYPES | [Decimal]
)

Class Method Summary collapse

.align_frames(*frames, on:, select: nil, reverse: false) ⇒ Object extended from Functions
Align a sequence of frames using the uique values from one or more columns as a key.
.all(*names, ignore_nulls: true) ⇒ Expr extended from Functions
Either return an expression representing all columns, or evaluate a bitwise AND operation.
.all_horizontal(*exprs) ⇒ Expr extended from Functions
Compute the bitwise AND horizontally across columns.
.any(*names, ignore_nulls: true) ⇒ Expr extended from Functions
Evaluate a bitwise OR operation.
.any_horizontal(*exprs) ⇒ Expr extended from Functions
Compute the bitwise OR horizontally across columns.
.approx_n_unique(*columns) ⇒ Expr extended from Functions
Approximate count of unique values.
.arctan2(y, x) ⇒ Expr extended from Functions
Compute two argument arctan in radians.
.arctan2d(y, x) ⇒ Expr extended from Functions
Compute two argument arctan in degrees.
.arg_sort_by(exprs, *more_exprs, reverse: false, nulls_last: false, multithreaded: true, maintain_order: false) ⇒ Expr (also: #argsort_by) extended from Functions
Find the indexes that would sort the columns.
.arg_where(condition, eager: false) ⇒ Expr, Series extended from Functions
Return indices where condition evaluates true.
.coalesce(exprs, *more_exprs) ⇒ Expr extended from Functions
Folds the columns from left to right, keeping the first non-null value.
.col(name, *more_names) ⇒ Expr extended from Functions
Return an expression representing a column in a DataFrame.
.collect_all(lazy_frames, type_coercion: true, predicate_pushdown: true, projection_pushdown: true, simplify_expression: true, string_cache: false, no_optimization: false, slice_pushdown: true, common_subplan_elimination: true, allow_streaming: false) ⇒ Array extended from Functions
Collect multiple LazyFrames at the same time.
.concat(items, rechunk: true, how: "vertical", parallel: true) ⇒ Object extended from Functions
Aggregate multiple Dataframes/Series to a single DataFrame/Series.
.concat_list(exprs, *more_exprs) ⇒ Expr extended from Functions
Concat the arrays in a Series dtype List in linear time.
.concat_str(exprs, sep: "", ignore_nulls: false) ⇒ Expr extended from Functions
Horizontally concat Utf8 Series in linear time.
.config ⇒ Object
.corr(a, b, method: "pearson", ddof: nil, propagate_nans: false) ⇒ Expr extended from Functions
Compute the Pearson's or Spearman rank correlation correlation between two columns.
.count(*columns) ⇒ Expr extended from Functions
Return the number of non-null values in the column.
.cov(a, b, ddof: 1) ⇒ Expr extended from Functions
Compute the covariance between two columns/ expressions.
.cs ⇒ Object
.cum_count(*columns, reverse: false) ⇒ Expr extended from Functions
Return the cumulative count of the non-null values in the column.
.cum_fold(acc, f, exprs, include_init: false) ⇒ Object (also: #cumfold) extended from Functions
Cumulatively accumulate over multiple columns horizontally/row wise with a left fold.
.cum_sum(*names) ⇒ Expr (also: #cumsum) extended from Functions
Cumulatively sum all values.
.cum_sum_horizontal(*exprs) ⇒ Expr (also: #cumsum_horizontal) extended from Functions
Cumulatively sum all values horizontally across columns.
.date_range(start, stop, interval = "1d", closed: "both", eager: false) ⇒ Object extended from Functions
Create a range of type Datetime (or Date).
.date_ranges(start, stop, interval = "1d", closed: "both", eager: false) ⇒ Object extended from Functions
Create a column of date ranges.
.datetime_range(start, stop, interval = "1d", closed: "both", time_unit: nil, time_zone: nil, eager: false) ⇒ Object extended from Functions
Generate a datetime range.
.datetime_ranges(start, stop, interval: "1d", closed: "both", time_unit: nil, time_zone: nil, eager: false) ⇒ Object extended from Functions
Create a column of datetime ranges.
.disable_string_cache ⇒ nil extended from Functions
Disable and clear the global string cache.
.duration(weeks: nil, days: nil, hours: nil, minutes: nil, seconds: nil, milliseconds: nil, microseconds: nil, nanoseconds: nil, time_unit: "us") ⇒ Expr extended from Functions
Create polars Duration from distinct time components.
.element ⇒ Expr extended from Functions
Alias for an element in evaluated in an eval expression.
.enable_string_cache ⇒ nil extended from Functions
Enable the global string cache.
.exclude(columns) ⇒ Object extended from Functions
Exclude certain columns from a wildcard/regex selection.
.first(*columns) ⇒ Expr extended from Functions
Get the first value.
.fold(acc, f, exprs) ⇒ Expr extended from Functions
Accumulate over multiple columns horizontally/row wise with a left fold.
.format(f_string, *args) ⇒ Expr extended from Functions
Format expressions as a string.
.from_epoch(column, unit: "s", eager: false) ⇒ Object extended from Functions
Utility function that parses an epoch timestamp (or Unix time) to Polars Date(time).
.from_hash(data, schema: nil, columns: nil) ⇒ DataFrame extended from Convert
Construct a DataFrame from a dictionary of sequences.
.groups(column) ⇒ Object extended from Functions
Syntactic sugar for Polars.col("foo").agg_groups.
.head(column, n = 10) ⇒ Expr extended from Functions
Get the first n rows.
.implode(*columns) ⇒ Expr extended from Functions
Aggregate all column values into a list.
.int_range(start, stop = nil, step: 1, eager: false, dtype: nil) ⇒ Expr, Series (also: #arange) extended from Functions
Create a range expression (or Series).
.last(*columns) ⇒ Expr extended from Functions
Get the last value.
.len ⇒ Expr (also: #length) extended from Functions
Return the number of rows in the context.
.lit(value, dtype: nil, allow_object: nil) ⇒ Expr extended from Functions
Return an expression representing a literal value.
.max(*names) ⇒ Expr extended from Functions
Get the maximum value.
.max_horizontal(*exprs) ⇒ Expr extended from Functions
Get the maximum value horizontally across columns.
.mean(*columns) ⇒ Expr (also: #avg) extended from Functions
Get the mean value.
.mean_horizontal(*exprs, ignore_nulls: true) ⇒ Expr extended from Functions
Compute the mean of all values horizontally across columns.
.median(*columns) ⇒ Expr extended from Functions
Get the median value.
.min(*names) ⇒ Expr extended from Functions
Get the minimum value.
.min_horizontal(*exprs) ⇒ Expr extended from Functions
Get the minimum value horizontally across columns.
.n_unique(*columns) ⇒ Expr extended from Functions
Count unique values.
.nth(*indices) ⇒ Expr extended from Functions
Get the nth column(s) of the context.
.ones(n, dtype: nil, eager: true) ⇒ Object extended from Functions
Construct a column of length n filled with ones.
.quantile(column, quantile, interpolation: "nearest") ⇒ Expr extended from Functions
Syntactic sugar for Polars.col("foo").quantile(...).
.read_avro(source, columns: nil, n_rows: nil) ⇒ DataFrame extended from IO
Read into a DataFrame from Apache Avro format.
.read_csv(source, has_header: true, columns: nil, new_columns: nil, sep: ",", comment_char: nil, quote_char: '"', skip_rows: 0, dtypes: nil, null_values: nil, ignore_errors: false, parse_dates: false, n_threads: nil, infer_schema_length: N_INFER_DEFAULT, batch_size: 8192, n_rows: nil, encoding: "utf8", low_memory: false, rechunk: true, storage_options: nil, skip_rows_after_header: 0, row_count_name: nil, row_count_offset: 0, eol_char: "\n", truncate_ragged_lines: false) ⇒ DataFrame extended from IO
Read a CSV file into a DataFrame.
.read_csv_batched(source, has_header: true, columns: nil, new_columns: nil, sep: ",", comment_char: nil, quote_char: '"', skip_rows: 0, dtypes: nil, null_values: nil, missing_utf8_is_empty_string: false, ignore_errors: false, parse_dates: false, n_threads: nil, infer_schema_length: N_INFER_DEFAULT, batch_size: 50_000, n_rows: nil, encoding: "utf8", low_memory: false, rechunk: true, skip_rows_after_header: 0, row_count_name: nil, row_count_offset: 0, eol_char: "\n", raise_if_empty: true, truncate_ragged_lines: false, decimal_comma: false) ⇒ BatchedCsvReader extended from IO
Read a CSV file in batches.
.read_database(query, schema_overrides: nil) ⇒ DataFrame (also: #read_sql) extended from IO
Read a SQL query into a DataFrame.
.read_delta(source, version: nil, columns: nil, rechunk: false, storage_options: nil, delta_table_options: nil) ⇒ DataFrame extended from IO
Reads into a DataFrame from a Delta lake table.
.read_ipc(source, columns: nil, n_rows: nil, memory_map: true, storage_options: nil, row_count_name: nil, row_count_offset: 0, rechunk: true) ⇒ DataFrame extended from IO
Read into a DataFrame from Arrow IPC (Feather v2) file.
.read_ipc_schema(source) ⇒ Hash extended from IO
Get a schema of the IPC file without reading data.
.read_ipc_stream(source, columns: nil, n_rows: nil, storage_options: nil, row_index_name: nil, row_index_offset: 0, rechunk: true) ⇒ DataFrame extended from IO
Read into a DataFrame from Arrow IPC record batch stream.
.read_json(source, schema: nil, schema_overrides: nil, infer_schema_length: N_INFER_DEFAULT) ⇒ DataFrame extended from IO
Read into a DataFrame from a JSON file.
.read_ndjson(source, schema: nil, schema_overrides: nil, ignore_errors: false) ⇒ DataFrame extended from IO
Read into a DataFrame from a newline delimited JSON file.
.read_parquet(source, columns: nil, n_rows: nil, row_count_name: nil, row_count_offset: 0, parallel: "auto", use_statistics: true, hive_partitioning: nil, glob: true, schema: nil, hive_schema: nil, try_parse_hive_dates: true, rechunk: false, low_memory: false, storage_options: nil, credential_provider: nil, retries: 2, include_file_paths: nil, allow_missing_columns: false) ⇒ DataFrame extended from IO
Read into a DataFrame from a parquet file.
.read_parquet_schema(source) ⇒ Hash extended from IO
Get a schema of the Parquet file without reading data.
.repeat(value, n, dtype: nil, eager: false, name: nil) ⇒ Object extended from Functions
Repeat a single value n times.
.scan_csv(source, has_header: true, sep: ",", comment_char: nil, quote_char: '"', skip_rows: 0, dtypes: nil, null_values: nil, missing_utf8_is_empty_string: false, ignore_errors: false, cache: true, with_column_names: nil, infer_schema_length: N_INFER_DEFAULT, n_rows: nil, encoding: "utf8", low_memory: false, rechunk: true, skip_rows_after_header: 0, row_count_name: nil, row_count_offset: 0, parse_dates: false, eol_char: "\n", raise_if_empty: true, truncate_ragged_lines: false, decimal_comma: false, glob: true) ⇒ LazyFrame extended from IO
Lazily read from a CSV file or multiple files via glob patterns.
.scan_delta(source, version: nil, storage_options: nil, delta_table_options: nil) ⇒ LazyFrame extended from IO
Lazily read from a Delta lake table.
.scan_ipc(source, n_rows: nil, cache: true, rechunk: true, row_count_name: nil, row_count_offset: 0, storage_options: nil, hive_partitioning: nil, hive_schema: nil, try_parse_hive_dates: true, include_file_paths: nil) ⇒ LazyFrame extended from IO
Lazily read from an Arrow IPC (Feather v2) file or multiple files via glob patterns.
.scan_ndjson(source, infer_schema_length: N_INFER_DEFAULT, batch_size: 1024, n_rows: nil, low_memory: false, rechunk: true, row_count_name: nil, row_count_offset: 0) ⇒ LazyFrame extended from IO
Lazily read from a newline delimited JSON file.
.scan_parquet(source, n_rows: nil, row_count_name: nil, row_count_offset: 0, parallel: "auto", use_statistics: true, hive_partitioning: nil, glob: true, schema: nil, hive_schema: nil, try_parse_hive_dates: true, rechunk: false, low_memory: false, cache: true, storage_options: nil, credential_provider: nil, retries: 2, include_file_paths: nil, allow_missing_columns: false) ⇒ LazyFrame extended from IO
Lazily read from a parquet file or multiple files via glob patterns.
.select(*exprs, **named_exprs) ⇒ DataFrame extended from Functions
Run polars expressions without a context.
.set_random_seed(seed) ⇒ nil extended from Functions
Set the global random seed for Polars.
.sql_expr(sql) ⇒ Expr extended from Functions
Parse one or more SQL expressions to polars expression(s).
.std(column, ddof: 1) ⇒ Expr extended from Functions
Get the standard deviation.
.string_cache ⇒ Object
.struct(*exprs, schema: nil, eager: false, **named_exprs) ⇒ Object extended from Functions
Collect several columns into a Series of dtype Struct.
.sum(*names) ⇒ Expr extended from Functions
Sum all values.
.sum_horizontal(*exprs, ignore_nulls: true) ⇒ Expr extended from Functions
Sum all values horizontally across columns.
.tail(column, n = 10) ⇒ Expr extended from Functions
Get the last n rows.
.thread_pool_size ⇒ Integer
Return the number of threads in the Polars thread pool.
.time_range(start = nil, stop = nil, interval = "1h", closed: "both", eager: false) ⇒ Object extended from Functions
Generate a time range.
.time_ranges(start = nil, stop = nil, interval = "1h", closed: "both", eager: false) ⇒ Object extended from Functions
Create a column of time ranges.
.using_string_cache ⇒ Boolean extended from Functions
Check whether the global string cache is enabled.
.var(column, ddof: 1) ⇒ Expr extended from Functions
Get the variance.
.when(*predicates, **constraints) ⇒ When extended from Functions
Start a "when, then, otherwise" expression.
.zeros(n, dtype: nil, eager: true) ⇒ Object extended from Functions
Construct a column of length n filled with zeros.

Class Method Details

permalink .align_frames(*frames, on:, select: nil, reverse: false) ⇒ `Object` Originally defined in module Functions

Align a sequence of frames using the uique values from one or more columns as a key.

Frames that do not contain the given key values have rows injected (with nulls filling the non-key columns), and each resulting frame is sorted by the key.

The original column order of input frames is not changed unless select is specified (in which case the final column order is determined from that).

Note that this does not result in a joined frame - you receive the same number of frames back that you passed in, but each is now aligned by key and has the same number of rows.

Examples:

df1 = Polars::DataFrame.new(
  {
    "dt" => [Date.new(2022, 9, 1), Date.new(2022, 9, 2), Date.new(2022, 9, 3)],
    "x" => [3.5, 4.0, 1.0],
    "y" => [10.0, 2.5, 1.5]
  }
)
df2 = Polars::DataFrame.new(
  {
    "dt" => [Date.new(2022, 9, 2), Date.new(2022, 9, 3), Date.new(2022, 9, 1)],
    "x" => [8.0, 1.0, 3.5],
    "y" => [1.5, 12.0, 5.0]
  }
)
df3 = Polars::DataFrame.new(
  {
    "dt" => [Date.new(2022, 9, 3), Date.new(2022, 9, 2)],
    "x" => [2.0, 5.0],
    "y" => [2.5, 2.0]
  }
)
af1, af2, af3 = Polars.align_frames(
  df1, df2, df3, on: "dt", select: ["x", "y"]
)
(af1 * af2 * af3).fill_null(0).select(Polars.sum_horizontal("*").alias("dot"))
# =>
# shape: (3, 1)
# ┌───────┐
# │ dot   │
# │ ---   │
# │ f64   │
# ╞═══════╡
# │ 0.0   │
# │ 167.5 │
# │ 47.0  │
# └───────┘

Parameters:

frames (Array) —
Sequence of DataFrames or LazyFrames.
on (Object) —
One or more columns whose unique values will be used to align the frames.
select (Object) (defaults to: nil) —
Optional post-alignment column select to constrain and/or order the columns returned from the newly aligned frames.
reverse (Object) (defaults to: false) —
Sort the alignment column values in descending order; can be a single boolean or a list of booleans associated with each column in on.

Returns:

(Object)

permalink .all(*names, ignore_nulls: true) ⇒ `Expr` Originally defined in module Functions

Either return an expression representing all columns, or evaluate a bitwise AND operation.

If no arguments are passed, this function is syntactic sugar for col("*"). Otherwise, this function is syntactic sugar for col(names).all.

Examples:

Selecting all columns.

df = Polars::DataFrame.new(
  {
    "a" => [true, false, true],
    "b" => [false, false, false]
  }
)
df.select(Polars.all.sum)
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ u32 ┆ u32 │
# ╞═════╪═════╡
# │ 2   ┆ 0   │
# └─────┴─────┘

Evaluate bitwise AND for a column.

df.select(Polars.all("a"))
# =>
# shape: (1, 1)
# ┌───────┐
# │ a     │
# │ ---   │
# │ bool  │
# ╞═══════╡
# │ false │
# └───────┘

Parameters:

names (Array) —
Name(s) of the columns to use in the aggregation.
ignore_nulls (Boolean) (defaults to: true) —
Ignore null values (default).

Returns:

(Expr)

permalink .all_horizontal(*exprs) ⇒ `Expr` Originally defined in module Functions

Compute the bitwise AND horizontally across columns.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [false, false, true, true, false, nil],
    "b" => [false, true, true, nil, nil, nil],
    "c" => ["u", "v", "w", "x", "y", "z"]
  }
)
df.with_columns(all: Polars.all_horizontal("a", "b"))
# =>
# shape: (6, 4)
# ┌───────┬───────┬─────┬───────┐
# │ a     ┆ b     ┆ c   ┆ all   │
# │ ---   ┆ ---   ┆ --- ┆ ---   │
# │ bool  ┆ bool  ┆ str ┆ bool  │
# ╞═══════╪═══════╪═════╪═══════╡
# │ false ┆ false ┆ u   ┆ false │
# │ false ┆ true  ┆ v   ┆ false │
# │ true  ┆ true  ┆ w   ┆ true  │
# │ true  ┆ null  ┆ x   ┆ null  │
# │ false ┆ null  ┆ y   ┆ false │
# │ null  ┆ null  ┆ z   ┆ null  │
# └───────┴───────┴─────┴───────┘

Parameters:

exprs (Array) —
Column(s) to use in the aggregation. Accepts expression input. Strings are parsed as column names, other non-expression inputs are parsed as literals.

Returns:

(Expr)

permalink .any(*names, ignore_nulls: true) ⇒ `Expr` Originally defined in module Functions

Evaluate a bitwise OR operation.

Syntactic sugar for col(names).any.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [true, false, true],
    "b" => [false, false, false]
  }
)
df.select(Polars.any("a"))
# =>
# shape: (1, 1)
# ┌──────┐
# │ a    │
# │ ---  │
# │ bool │
# ╞══════╡
# │ true │
# └──────┘

Parameters:

names (Array) —
Name(s) of the columns to use in the aggregation.
ignore_nulls (Boolean) (defaults to: true) —
Ignore null values (default).

Returns:

(Expr)

permalink .any_horizontal(*exprs) ⇒ `Expr` Originally defined in module Functions

Compute the bitwise OR horizontally across columns.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [false, false, true, true, false, nil],
    "b" => [false, true, true, nil, nil, nil],
    "c" => ["u", "v", "w", "x", "y", "z"]
  }
)
df.with_columns(any: Polars.any_horizontal("a", "b"))
# =>
# shape: (6, 4)
# ┌───────┬───────┬─────┬───────┐
# │ a     ┆ b     ┆ c   ┆ any   │
# │ ---   ┆ ---   ┆ --- ┆ ---   │
# │ bool  ┆ bool  ┆ str ┆ bool  │
# ╞═══════╪═══════╪═════╪═══════╡
# │ false ┆ false ┆ u   ┆ false │
# │ false ┆ true  ┆ v   ┆ true  │
# │ true  ┆ true  ┆ w   ┆ true  │
# │ true  ┆ null  ┆ x   ┆ true  │
# │ false ┆ null  ┆ y   ┆ null  │
# │ null  ┆ null  ┆ z   ┆ null  │
# └───────┴───────┴─────┴───────┘

Parameters:

exprs (Array) —
Column(s) to use in the aggregation. Accepts expression input. Strings are parsed as column names, other non-expression inputs are parsed as literals.

Returns:

(Expr)

permalink .approx_n_unique(*columns) ⇒ `Expr` Originally defined in module Functions

Approximate count of unique values.

This function is syntactic sugar for col(columns).approx_n_unique, and uses the HyperLogLog++ algorithm for cardinality estimation.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 1],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.approx_n_unique("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 2   │
# └─────┘

df.select(Polars.approx_n_unique("b", "c"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ b   ┆ c   │
# │ --- ┆ --- │
# │ u32 ┆ u32 │
# ╞═════╪═════╡
# │ 3   ┆ 2   │
# └─────┴─────┘

Parameters:

columns (Array) —
One or more column names.

Returns:

(Expr)

permalink .arctan2(y, x) ⇒ `Expr` Originally defined in module Functions

Compute two argument arctan in radians.

Returns the angle (in radians) in the plane between the positive x-axis and the ray from the origin to (x,y).

Examples:

twoRootTwo = Math.sqrt(2) / 2
df = Polars::DataFrame.new(
  {
    "y" => [twoRootTwo, -twoRootTwo, twoRootTwo, -twoRootTwo],
    "x" => [twoRootTwo, twoRootTwo, -twoRootTwo, -twoRootTwo]
  }
)
df.select(
  Polars.arctan2d("y", "x").alias("atan2d"), Polars.arctan2("y", "x").alias("atan2")
)
# =>
# shape: (4, 2)
# ┌────────┬───────────┐
# │ atan2d ┆ atan2     │
# │ ---    ┆ ---       │
# │ f64    ┆ f64       │
# ╞════════╪═══════════╡
# │ 45.0   ┆ 0.785398  │
# │ -45.0  ┆ -0.785398 │
# │ 135.0  ┆ 2.356194  │
# │ -135.0 ┆ -2.356194 │
# └────────┴───────────┘

Parameters:

y (Object) —
Column name or Expression.
x (Object) —
Column name or Expression.

Returns:

(Expr)

permalink .arctan2d(y, x) ⇒ `Expr` Originally defined in module Functions

Compute two argument arctan in degrees.

Returns the angle (in degrees) in the plane between the positive x-axis and the ray from the origin to (x,y).

Examples:

twoRootTwo = Math.sqrt(2) / 2
df = Polars::DataFrame.new(
  {
    "y" => [twoRootTwo, -twoRootTwo, twoRootTwo, -twoRootTwo],
    "x" => [twoRootTwo, twoRootTwo, -twoRootTwo, -twoRootTwo]
  }
)
df.select(
  Polars.arctan2d("y", "x").alias("atan2d"), Polars.arctan2("y", "x").alias("atan2")
)
# =>
# shape: (4, 2)
# ┌────────┬───────────┐
# │ atan2d ┆ atan2     │
# │ ---    ┆ ---       │
# │ f64    ┆ f64       │
# ╞════════╪═══════════╡
# │ 45.0   ┆ 0.785398  │
# │ -45.0  ┆ -0.785398 │
# │ 135.0  ┆ 2.356194  │
# │ -135.0 ┆ -2.356194 │
# └────────┴───────────┘

Parameters:

y (Object) —
Column name or Expression.
x (Object) —
Column name or Expression.

Returns:

(Expr)

permalink .arg_sort_by(exprs, *more_exprs, reverse: false, nulls_last: false, multithreaded: true, maintain_order: false) ⇒ `Expr` Also known as: argsort_by Originally defined in module Functions

Find the indexes that would sort the columns.

Argsort by multiple columns. The first column will be used for the ordering. If there are duplicates in the first column, the second column will be used to determine the ordering and so on.

Examples:

Pass a single column name to compute the arg sort by that column.

df = Polars::DataFrame.new(
  {
    "a" => [0, 1, 1, 0],
    "b" => [3, 2, 3, 2],
    "c" => [1, 2, 3, 4]
  }
)
df.select(Polars.arg_sort_by("a"))
# =>
# shape: (4, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 0   │
# │ 3   │
# │ 1   │
# │ 2   │
# └─────┘

Compute the arg sort by multiple columns by either passing a list of columns, or by specifying each column as a positional argument.

df.select(Polars.arg_sort_by(["a", "b"], reverse: true))
# =>
# shape: (4, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 2   │
# │ 1   │
# │ 0   │
# │ 3   │
# └─────┘

Use gather to apply the arg sort to other columns.

df.select(Polars.col("c").gather(Polars.arg_sort_by("a")))
# =>
# shape: (4, 1)
# ┌─────┐
# │ c   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 4   │
# │ 2   │
# │ 3   │
# └─────┘

Parameters:

exprs (Object) —
Columns use to determine the ordering.
reverse (Boolean) (defaults to: false) —
Default is ascending.

Returns:

(Expr)

permalink .arg_where(condition, eager: false) ⇒ `Expr`, `Series` Originally defined in module Functions

Return indices where condition evaluates true.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3, 4, 5]})
df.select(
  [
    Polars.arg_where(Polars.col("a") % 2 == 0)
  ]
).to_series
# =>
# shape: (2,)
# Series: 'a' [u32]
# [
#         1
#         3
# ]

Parameters:

condition (Expr) —
Boolean expression to evaluate
eager (Boolean) (defaults to: false) —
Whether to apply this function eagerly (as opposed to lazily).

Returns:

(Expr, Series)

permalink .coalesce(exprs, *more_exprs) ⇒ `Expr` Originally defined in module Functions

Folds the columns from left to right, keeping the first non-null value.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, nil, nil, nil],
    "b" => [1, 2, nil, nil],
    "c" => [5, nil, 3, nil]
  }
)
df.with_columns(Polars.coalesce(["a", "b", "c", 10]).alias("d"))
# =>
# shape: (4, 4)
# ┌──────┬──────┬──────┬─────┐
# │ a    ┆ b    ┆ c    ┆ d   │
# │ ---  ┆ ---  ┆ ---  ┆ --- │
# │ i64  ┆ i64  ┆ i64  ┆ i64 │
# ╞══════╪══════╪══════╪═════╡
# │ 1    ┆ 1    ┆ 5    ┆ 1   │
# │ null ┆ 2    ┆ null ┆ 2   │
# │ null ┆ null ┆ 3    ┆ 3   │
# │ null ┆ null ┆ null ┆ 10  │
# └──────┴──────┴──────┴─────┘

df.with_columns(Polars.coalesce(Polars.col(["a", "b", "c"]), 10.0).alias("d"))
# =>
# shape: (4, 4)
# ┌──────┬──────┬──────┬──────┐
# │ a    ┆ b    ┆ c    ┆ d    │
# │ ---  ┆ ---  ┆ ---  ┆ ---  │
# │ i64  ┆ i64  ┆ i64  ┆ f64  │
# ╞══════╪══════╪══════╪══════╡
# │ 1    ┆ 1    ┆ 5    ┆ 1.0  │
# │ null ┆ 2    ┆ null ┆ 2.0  │
# │ null ┆ null ┆ 3    ┆ 3.0  │
# │ null ┆ null ┆ null ┆ 10.0 │
# └──────┴──────┴──────┴──────┘

Parameters:

exprs (Array) —
Columns to coalesce. Accepts expression input. Strings are parsed as column names, other non-expression inputs are parsed as literals.
more_exprs (Hash) —
Additional columns to coalesce, specified as positional arguments.

Returns:

(Expr)

permalink .col(name, *more_names) ⇒ `Expr` Originally defined in module Functions

Return an expression representing a column in a DataFrame.

Returns:

(Expr)

permalink .collect_all(lazy_frames, type_coercion: true, predicate_pushdown: true, projection_pushdown: true, simplify_expression: true, string_cache: false, no_optimization: false, slice_pushdown: true, common_subplan_elimination: true, allow_streaming: false) ⇒ `Array` Originally defined in module Functions

Collect multiple LazyFrames at the same time.

This runs all the computation graphs in parallel on Polars threadpool.

Parameters:

lazy_frames (Boolean) —
A list of LazyFrames to collect.
type_coercion (Boolean) (defaults to: true) —
Do type coercion optimization.
predicate_pushdown (Boolean) (defaults to: true) —
Do predicate pushdown optimization.
projection_pushdown (Boolean) (defaults to: true) —
Do projection pushdown optimization.
simplify_expression (Boolean) (defaults to: true) —
Run simplify expressions optimization.
string_cache (Boolean) (defaults to: false) —
This argument is deprecated and will be ignored
no_optimization (Boolean) (defaults to: false) —
Turn off optimizations.
slice_pushdown (Boolean) (defaults to: true) —
Slice pushdown optimization.
common_subplan_elimination (Boolean) (defaults to: true) —
Will try to cache branching subplans that occur on self-joins or unions.
allow_streaming (Boolean) (defaults to: false) —
Run parts of the query in a streaming fashion (this is in an alpha state)

Returns:

(Array)

permalink .concat(items, rechunk: true, how: "vertical", parallel: true) ⇒ `Object` Originally defined in module Functions

Aggregate multiple Dataframes/Series to a single DataFrame/Series.

Examples:

df1 = Polars::DataFrame.new({"a" => [1], "b" => [3]})
df2 = Polars::DataFrame.new({"a" => [2], "b" => [4]})
Polars.concat([df1, df2])
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 1   ┆ 3   │
# │ 2   ┆ 4   │
# └─────┴─────┘

Parameters:

items (Object) —
DataFrames/Series/LazyFrames to concatenate.
rechunk (Boolean) (defaults to: true) —
Make sure that all data is in contiguous memory.
how ("vertical", "vertical_relaxed", "diagonal", "horizontal") (defaults to: "vertical") —
LazyFrames do not support the horizontal strategy.
- Vertical: applies multiple vstack operations.
- Diagonal: finds a union between the column schemas and fills missing column values with null.
- Horizontal: stacks Series horizontally and fills with nulls if the lengths don't match.
parallel (Boolean) (defaults to: true) —
Only relevant for LazyFrames. This determines if the concatenated lazy computations may be executed in parallel.

Returns:

(Object)

permalink .concat_list(exprs, *more_exprs) ⇒ `Expr` Originally defined in module Functions

Concat the arrays in a Series dtype List in linear time.

Examples:

Concatenate two existing list columns. Null values are propagated.

df = Polars::DataFrame.new({"a" => [[1, 2], [3], [4, 5]], "b" => [[4], [], nil]})
df.with_columns(concat_list: Polars.concat_list("a", "b"))
# =>
# shape: (3, 3)
# ┌───────────┬───────────┬─────────────┐
# │ a         ┆ b         ┆ concat_list │
# │ ---       ┆ ---       ┆ ---         │
# │ list[i64] ┆ list[i64] ┆ list[i64]   │
# ╞═══════════╪═══════════╪═════════════╡
# │ [1, 2]    ┆ [4]       ┆ [1, 2, 4]   │
# │ [3]       ┆ []        ┆ [3]         │
# │ [4, 5]    ┆ null      ┆ null        │
# └───────────┴───────────┴─────────────┘

Non-list columns are cast to a list before concatenation. The output data type is the supertype of the concatenated columns.

df.select("a", concat_list: Polars.concat_list("a", Polars.lit("x")))
# =>
# shape: (3, 2)
# ┌───────────┬─────────────────┐
# │ a         ┆ concat_list     │
# │ ---       ┆ ---             │
# │ list[i64] ┆ list[str]       │
# ╞═══════════╪═════════════════╡
# │ [1, 2]    ┆ ["1", "2", "x"] │
# │ [3]       ┆ ["3", "x"]      │
# │ [4, 5]    ┆ ["4", "5", "x"] │
# └───────────┴─────────────────┘

Create lagged columns and collect them into a list. This mimics a rolling window.

df = Polars::DataFrame.new({"A" => [1.0, 2.0, 9.0, 2.0, 13.0]})
df = df.select(3.times.map { |i| Polars.col("A").shift(i).alias("A_lag_#{i}") })
df.select(
  Polars.concat_list(3.times.map { |i| "A_lag_#{i}" }.reverse).alias("A_rolling")
)
# =>
# shape: (5, 1)
# ┌───────────────────┐
# │ A_rolling         │
# │ ---               │
# │ list[f64]         │
# ╞═══════════════════╡
# │ [null, null, 1.0] │
# │ [null, 1.0, 2.0]  │
# │ [1.0, 2.0, 9.0]   │
# │ [2.0, 9.0, 2.0]   │
# │ [9.0, 2.0, 13.0]  │
# └───────────────────┘

Returns:

(Expr)

permalink .concat_str(exprs, sep: "", ignore_nulls: false) ⇒ `Expr` Originally defined in module Functions

Horizontally concat Utf8 Series in linear time. Non-Utf8 columns are cast to Utf8.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 3],
    "b" => ["dogs", "cats", nil],
    "c" => ["play", "swim", "walk"]
  }
)
df.with_columns(
  [
    Polars.concat_str(
      [
        Polars.col("a") * 2,
        Polars.col("b"),
        Polars.col("c")
      ],
      sep: " "
    ).alias("full_sentence")
  ]
)
# =>
# shape: (3, 4)
# ┌─────┬──────┬──────┬───────────────┐
# │ a   ┆ b    ┆ c    ┆ full_sentence │
# │ --- ┆ ---  ┆ ---  ┆ ---           │
# │ i64 ┆ str  ┆ str  ┆ str           │
# ╞═════╪══════╪══════╪═══════════════╡
# │ 1   ┆ dogs ┆ play ┆ 2 dogs play   │
# │ 2   ┆ cats ┆ swim ┆ 4 cats swim   │
# │ 3   ┆ null ┆ walk ┆ null          │
# └─────┴──────┴──────┴───────────────┘

Parameters:

exprs (Object) —
Columns to concat into a Utf8 Series.
sep (String) (defaults to: "") —
String value that will be used to separate the values.
ignore_nulls (Boolean) (defaults to: false) —
Ignore null values (default).

Returns:

(Expr)

permalink .config ⇒ `Object`

[View source]


531
532
533

# File 'lib/polars/config.rb', line 531

def self.config(...)
  Config.new(...)
end

permalink .corr(a, b, method: "pearson", ddof: nil, propagate_nans: false) ⇒ `Expr` Originally defined in module Functions

Compute the Pearson's or Spearman rank correlation correlation between two columns.

Examples:

Pearson's correlation:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.corr("a", "b"))
# =>
# shape: (1, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 0.544705 │
# └──────────┘

Spearman rank correlation:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.corr("a", "b", method: "spearman"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 0.5 │
# └─────┘

Parameters:

a (Object) —
Column name or Expression.
b (Object) —
Column name or Expression.
ddof (Integer) (defaults to: nil) —
"Delta Degrees of Freedom": the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1.
method ("pearson", "spearman") (defaults to: "pearson") —
Correlation method.
propagate_nans (Boolean) (defaults to: false) —
If true any NaN encountered will lead to NaN in the output. Defaults to False where NaN are regarded as larger than any finite number and thus lead to the highest rank.

Returns:

(Expr)

permalink .count(*columns) ⇒ `Expr` Originally defined in module Functions

Return the number of non-null values in the column.

This function is syntactic sugar for col(columns).count.

Calling this function without any arguments returns the number of rows in the context. This way of using the function is deprecated. Please use len instead.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, nil],
    "b" => [3, nil, nil],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.count("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 2   │
# └─────┘

Return the number of non-null values in multiple columns.

df.select(Polars.count("b", "c"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ b   ┆ c   │
# │ --- ┆ --- │
# │ u32 ┆ u32 │
# ╞═════╪═════╡
# │ 1   ┆ 3   │
# └─────┴─────┘

Parameters:

columns (Array) —
One or more column names.

Returns:

(Expr)

permalink .cov(a, b, ddof: 1) ⇒ `Expr` Originally defined in module Functions

Compute the covariance between two columns/ expressions.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.cov("a", "b"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 3.0 │
# └─────┘

Parameters:

a (Object) —
Column name or Expression.
b (Object) —
Column name or Expression.
ddof (Integer) (defaults to: 1) —
"Delta Degrees of Freedom": the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1.

Returns:

(Expr)

permalink .cs ⇒ `Object`

[View source]


1223
1224
1225

# File 'lib/polars/selectors.rb', line 1223

def self.cs
  Polars::Selectors
end

permalink .cum_count(*columns, reverse: false) ⇒ `Expr` Originally defined in module Functions

Return the cumulative count of the non-null values in the column.

This function is syntactic sugar for col(columns).cum_count.

If no arguments are passed, returns the cumulative count of a context. Rows containing null values count towards the result.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, nil], "b" => [3, nil, nil]})
df.select(Polars.cum_count("a"))
# =>
# shape: (3, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 1   │
# │ 2   │
# │ 2   │
# └─────┘

Parameters:

columns (Array) —
Name(s) of the columns to use.
reverse (Boolean) (defaults to: false) —
Reverse the operation.

Returns:

(Expr)

permalink .cum_fold(acc, f, exprs, include_init: false) ⇒ `Object` Also known as: cumfold Originally defined in module Functions

Note:

If you simply want the first encountered expression as accumulator, consider using cumreduce.

Cumulatively accumulate over multiple columns horizontally/row wise with a left fold.

Every cumulative result is added as a separate field in a Struct column.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 3],
    "b" => [3, 4, 5],
    "c" => [5, 6, 7]
  }
)
df.with_columns(
  Polars.cum_fold(Polars.lit(1), ->(acc, x) { acc + x }, Polars.all)
)
# =>
# shape: (3, 4)
# ┌─────┬─────┬─────┬───────────┐
# │ a   ┆ b   ┆ c   ┆ cum_fold  │
# │ --- ┆ --- ┆ --- ┆ ---       │
# │ i64 ┆ i64 ┆ i64 ┆ struct[3] │
# ╞═════╪═════╪═════╪═══════════╡
# │ 1   ┆ 3   ┆ 5   ┆ {2,5,10}  │
# │ 2   ┆ 4   ┆ 6   ┆ {3,7,13}  │
# │ 3   ┆ 5   ┆ 7   ┆ {4,9,16}  │
# └─────┴─────┴─────┴───────────┘

Parameters:

acc (Object) —
Accumulator Expression. This is the value that will be initialized when the fold starts. For a sum this could for instance be lit(0).
f (Object) —
Function to apply over the accumulator and the value. Fn(acc, value) -> new_value
exprs (Object) —
Expressions to aggregate over. May also be a wildcard expression.
include_init (Boolean) (defaults to: false) —
Include the initial accumulator state as struct field.

Returns:

(Object)

permalink .cum_sum(*names) ⇒ `Expr` Also known as: cumsum Originally defined in module Functions

Cumulatively sum all values.

Syntactic sugar for col(names).cum_sum.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 3],
    "b" => [4, 5, 6]
  }
)
df.select(Polars.cum_sum("a"))
# =>
# shape: (3, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 3   │
# │ 6   │
# └─────┘

Parameters:

names (Object) —
Name(s) of the columns to use in the aggregation.

Returns:

(Expr)

permalink .cum_sum_horizontal(*exprs) ⇒ `Expr` Also known as: cumsum_horizontal Originally defined in module Functions

Cumulatively sum all values horizontally across columns.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, nil],
    "c" => ["x", "y", "z"]
  }
)
df.with_columns(Polars.cum_sum_horizontal("a", "b"))
# =>
# shape: (3, 4)
# ┌─────┬──────┬─────┬───────────┐
# │ a   ┆ b    ┆ c   ┆ cum_sum   │
# │ --- ┆ ---  ┆ --- ┆ ---       │
# │ i64 ┆ i64  ┆ str ┆ struct[2] │
# ╞═════╪══════╪═════╪═══════════╡
# │ 1   ┆ 4    ┆ x   ┆ {1,5}     │
# │ 8   ┆ 5    ┆ y   ┆ {8,13}    │
# │ 3   ┆ null ┆ z   ┆ {3,null}  │
# └─────┴──────┴─────┴───────────┘

Parameters:

exprs (Array) —
Column(s) to use in the aggregation. Accepts expression input. Strings are parsed as column names, other non-expression inputs are parsed as literals.

Returns:

(Expr)

permalink .date_range(start, stop, interval = "1d", closed: "both", eager: false) ⇒ `Object` Originally defined in module Functions

Note:

If both low and high are passed as date types (not datetime), and the interval granularity is no finer than 1d, the returned range is also of type date. All other permutations return a datetime Series.

Create a range of type Datetime (or Date).

Examples:

Using polars duration string to specify the interval

Polars.date_range(Date.new(2022, 1, 1), Date.new(2022, 3, 1), "1mo", eager: true).alias(
  "date"
)
# =>
# shape: (3,)
# Series: 'date' [date]
# [
#         2022-01-01
#         2022-02-01
#         2022-03-01
# ]

Parameters:

start (Object) —
Lower bound of the date range.
stop (Object) —
Upper bound of the date range.
interval (Object) (defaults to: "1d") —
Interval periods. It can be a polars duration string, such as 3d12h4m25s representing 3 days, 12 hours, 4 minutes, and 25 seconds.
closed ("both", "left", "right", "none") (defaults to: "both") —
Define whether the temporal window interval is closed or not.
eager (Boolean) (defaults to: false) —
Evaluate immediately and return a Series. If set to false (default), return an expression instead.

Returns:

(Object)

permalink .date_ranges(start, stop, interval = "1d", closed: "both", eager: false) ⇒ `Object` Originally defined in module Functions

Note:

interval is created according to the following string language:

1ns (1 nanosecond)
1us (1 microsecond)
1ms (1 millisecond)
1s (1 second)
1m (1 minute)
1h (1 hour)
1d (1 calendar day)
1w (1 calendar week)
1mo (1 calendar month)
1q (1 calendar quarter)
1y (1 calendar year)

Or combine them: "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds

By "calendar day", we mean the corresponding time on the next day (which may not be 24 hours, due to daylight savings). Similarly for "calendar week", "calendar month", "calendar quarter", and "calendar year".

Create a column of date ranges.

Examples:

df = Polars::DataFrame.new(
  {
    "start" => [Date.new(2022, 1, 1), Date.new(2022, 1, 2)],
    "end" => Date.new(2022, 1, 3)
  }
)
df.with_columns(date_range: Polars.date_ranges("start", "end"))
# =>
# shape: (2, 3)
# ┌────────────┬────────────┬─────────────────────────────────┐
# │ start      ┆ end        ┆ date_range                      │
# │ ---        ┆ ---        ┆ ---                             │
# │ date       ┆ date       ┆ list[date]                      │
# ╞════════════╪════════════╪═════════════════════════════════╡
# │ 2022-01-01 ┆ 2022-01-03 ┆ [2022-01-01, 2022-01-02, 2022-… │
# │ 2022-01-02 ┆ 2022-01-03 ┆ [2022-01-02, 2022-01-03]        │
# └────────────┴────────────┴─────────────────────────────────┘

Parameters:

start (Object) —
Lower bound of the date range.
stop (Object) —
Upper bound of the date range.
interval (Object) (defaults to: "1d") —
Interval of the range periods, specified using the Polars duration string language (see "Notes" section below).
closed ("both", "left", "right", "none") (defaults to: "both") —
Define which sides of the range are closed (inclusive).
eager (Boolean) (defaults to: false) —
Evaluate immediately and return a Series. If set to false (default), return an expression instead.

Returns:

(Object)

permalink .datetime_range(start, stop, interval = "1d", closed: "both", time_unit: nil, time_zone: nil, eager: false) ⇒ `Object` Originally defined in module Functions

Generate a datetime range.

Examples:

Using Polars duration string to specify the interval:

Polars.datetime_range(
  DateTime.new(2022, 1, 1), DateTime.new(2022, 3, 1), "1mo", eager: true
).alias("datetime")
# =>
# shape: (3,)
# Series: 'datetime' [datetime[ns]]
# [
#         2022-01-01 00:00:00
#         2022-02-01 00:00:00
#         2022-03-01 00:00:00
# ]

Specifying a time zone:

Polars.datetime_range(
  DateTime.new(2022, 1, 1),
  DateTime.new(2022, 3, 1),
  "1mo",
  time_zone: "America/New_York",
  eager: true
).alias("datetime")
# =>
# shape: (3,)
# Series: 'datetime' [datetime[ns, America/New_York]]
# [
#         2022-01-01 00:00:00 EST
#         2022-02-01 00:00:00 EST
#         2022-03-01 00:00:00 EST
# ]

Parameters:

start (Object) —
Lower bound of the datetime range.
stop (Object) —
Upper bound of the datetime range.
interval (String) (defaults to: "1d") —
Interval of the range periods, specified using the Polars duration string language.
closed ('both', 'left', 'right', 'none') (defaults to: "both") —
Define which sides of the range are closed (inclusive).
time_unit (nil, 'ns', 'us', 'ms') (defaults to: nil) —
Time unit of the resulting Datetime data type.
time_zone (String) (defaults to: nil) —
Time zone of the resulting Datetime data type.
eager (Boolean) (defaults to: false) —
Evaluate immediately and return a Series. If set to false (default), return an expression instead.

Returns:

(Object)

permalink .datetime_ranges(start, stop, interval: "1d", closed: "both", time_unit: nil, time_zone: nil, eager: false) ⇒ `Object` Originally defined in module Functions

Create a column of datetime ranges.

Examples:

df = Polars::DataFrame.new(
  {
    "start" => [DateTime.new(2022, 1, 1), DateTime.new(2022, 1, 2)],
    "end" => DateTime.new(2022, 1, 3),
  }
)
df.select(datetime_range: Polars.datetime_ranges("start", "end"))
# =>
# shape: (2, 1)
# ┌─────────────────────────────────┐
# │ datetime_range                  │
# │ ---                             │
# │ list[datetime[ns]]              │
# ╞═════════════════════════════════╡
# │ [2022-01-01 00:00:00, 2022-01-… │
# │ [2022-01-02 00:00:00, 2022-01-… │
# └─────────────────────────────────┘

Parameters:

start (Object) —
Lower bound of the datetime range.
stop (Object) —
Upper bound of the datetime range.
interval (String) (defaults to: "1d") —
Interval of the range periods, specified using the Polars duration string language.
closed ('both', 'left', 'right', 'none') (defaults to: "both") —
Define which sides of the range are closed (inclusive).
time_unit (nil, 'ns', 'us', 'ms') (defaults to: nil) —
Time unit of the resulting Datetime data type.
time_zone (String) (defaults to: nil) —
Time zone of the resulting Datetime data type.
eager (Boolean) (defaults to: false) —
Evaluate immediately and return a Series. If set to false (default), return an expression instead.

Returns:

(Object)

permalink .disable_string_cache ⇒ `nil` Originally defined in module Functions

Disable and clear the global string cache.

Examples:

Construct two Series using the same global string cache.

Polars.enable_string_cache
s1 = Polars::Series.new("color", ["red", "green", "red"], dtype: Polars::Categorical)
s2 = Polars::Series.new("color", ["blue", "red", "green"], dtype: Polars::Categorical)
Polars.disable_string_cache

As both Series are constructed under the same global string cache, they can be concatenated.

Polars.concat([s1, s2])
# =>
# shape: (6,)
# Series: 'color' [cat]
# [
#         "red"
#         "green"
#         "red"
#         "blue"
#         "red"
#         "green"
# ]

Returns:

(nil)

permalink .duration(weeks: nil, days: nil, hours: nil, minutes: nil, seconds: nil, milliseconds: nil, microseconds: nil, nanoseconds: nil, time_unit: "us") ⇒ `Expr` Originally defined in module Functions

Create polars Duration from distinct time components.

Examples:

df = Polars::DataFrame.new(
  {
    "datetime" => [DateTime.new(2022, 1, 1), DateTime.new(2022, 1, 2)],
    "add" => [1, 2]
  }
)
df.select(
  [
    (Polars.col("datetime") + Polars.duration(weeks: "add")).alias("add_weeks"),
    (Polars.col("datetime") + Polars.duration(days: "add")).alias("add_days"),
    (Polars.col("datetime") + Polars.duration(seconds: "add")).alias("add_seconds"),
    (Polars.col("datetime") + Polars.duration(milliseconds: "add")).alias(
      "add_milliseconds"
    ),
    (Polars.col("datetime") + Polars.duration(hours: "add")).alias("add_hours")
  ]
)
# =>
# shape: (2, 5)
# ┌─────────────────────┬─────────────────────┬─────────────────────┬─────────────────────────┬─────────────────────┐
# │ add_weeks           ┆ add_days            ┆ add_seconds         ┆ add_milliseconds        ┆ add_hours           │
# │ ---                 ┆ ---                 ┆ ---                 ┆ ---                     ┆ ---                 │
# │ datetime[ns]        ┆ datetime[ns]        ┆ datetime[ns]        ┆ datetime[ns]            ┆ datetime[ns]        │
# ╞═════════════════════╪═════════════════════╪═════════════════════╪═════════════════════════╪═════════════════════╡
# │ 2022-01-08 00:00:00 ┆ 2022-01-02 00:00:00 ┆ 2022-01-01 00:00:01 ┆ 2022-01-01 00:00:00.001 ┆ 2022-01-01 01:00:00 │
# │ 2022-01-16 00:00:00 ┆ 2022-01-04 00:00:00 ┆ 2022-01-02 00:00:02 ┆ 2022-01-02 00:00:00.002 ┆ 2022-01-02 02:00:00 │
# └─────────────────────┴─────────────────────┴─────────────────────┴─────────────────────────┴─────────────────────┘

Returns:

(Expr)

permalink .element ⇒ `Expr` Originally defined in module Functions

Alias for an element in evaluated in an eval expression.

Examples:

A horizontal rank computation by taking the elements of a list

df = Polars::DataFrame.new({"a" => [1, 8, 3], "b" => [4, 5, 2]})
df.with_column(
  Polars.concat_list(["a", "b"]).list.eval(Polars.element.rank).alias("rank")
)
# =>
# shape: (3, 3)
# ┌─────┬─────┬────────────┐
# │ a   ┆ b   ┆ rank       │
# │ --- ┆ --- ┆ ---        │
# │ i64 ┆ i64 ┆ list[f64]  │
# ╞═════╪═════╪════════════╡
# │ 1   ┆ 4   ┆ [1.0, 2.0] │
# │ 8   ┆ 5   ┆ [2.0, 1.0] │
# │ 3   ┆ 2   ┆ [2.0, 1.0] │
# └─────┴─────┴────────────┘

Returns:

(Expr)

permalink .enable_string_cache ⇒ `nil` Originally defined in module Functions

Enable the global string cache.

Categorical columns created under the same global string cache have the same underlying physical value when string values are equal. This allows the columns to be concatenated or used in a join operation, for example.

Examples:

Construct two Series using the same global string cache.

Polars.enable_string_cache
s1 = Polars::Series.new("color", ["red", "green", "red"], dtype: Polars::Categorical)
s2 = Polars::Series.new("color", ["blue", "red", "green"], dtype: Polars::Categorical)
Polars.disable_string_cache

As both Series are constructed under the same global string cache, they can be concatenated.

Polars.concat([s1, s2])
# =>
# shape: (6,)
# Series: 'color' [cat]
# [
#         "red"
#         "green"
#         "red"
#         "blue"
#         "red"
#         "green"
# ]

Returns:

(nil)

permalink .exclude(columns) ⇒ `Object` Originally defined in module Functions

Exclude certain columns from a wildcard/regex selection.

Examples:

df = Polars::DataFrame.new(
  {
    "aa" => [1, 2, 3],
    "ba" => ["a", "b", nil],
    "cc" => [nil, 2.5, 1.5]
  }
)
# =>
# shape: (3, 3)
# ┌─────┬──────┬──────┐
# │ aa  ┆ ba   ┆ cc   │
# │ --- ┆ ---  ┆ ---  │
# │ i64 ┆ str  ┆ f64  │
# ╞═════╪══════╪══════╡
# │ 1   ┆ a    ┆ null │
# │ 2   ┆ b    ┆ 2.5  │
# │ 3   ┆ null ┆ 1.5  │
# └─────┴──────┴──────┘

Exclude by column name(s):

df.select(Polars.exclude("ba"))
# =>
# shape: (3, 2)
# ┌─────┬──────┐
# │ aa  ┆ cc   │
# │ --- ┆ ---  │
# │ i64 ┆ f64  │
# ╞═════╪══════╡
# │ 1   ┆ null │
# │ 2   ┆ 2.5  │
# │ 3   ┆ 1.5  │
# └─────┴──────┘

Exclude by regex, e.g. removing all columns whose names end with the letter "a":

df.select(Polars.exclude("^.*a$"))
# =>
# shape: (3, 1)
# ┌──────┐
# │ cc   │
# │ ---  │
# │ f64  │
# ╞══════╡
# │ null │
# │ 2.5  │
# │ 1.5  │
# └──────┘

Parameters:

columns (Object) —
Column(s) to exclude from selection This can be:
- a column name, or multiple column names
- a regular expression starting with ^ and ending with $
- a dtype or multiple dtypes

Returns:

(Object)

permalink .first(*columns) ⇒ `Expr` Originally defined in module Functions

Get the first value.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "baz"]
  }
)
df.select(Polars.first)
# =>
# shape: (3, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 8   │
# │ 3   │
# └─────┘

df.select(Polars.first("b"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ b   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 4   │
# └─────┘

df.select(Polars.first("a", "c"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ c   │
# │ --- ┆ --- │
# │ i64 ┆ str │
# ╞═════╪═════╡
# │ 1   ┆ foo │
# └─────┴─────┘

Parameters:

columns (Array) —
One or more column names. If not provided (default), returns an expression to take the first column of the context instead.

Returns:

(Expr)

permalink .fold(acc, f, exprs) ⇒ `Expr` Originally defined in module Functions

Accumulate over multiple columns horizontally/row wise with a left fold.

Returns:

(Expr)

permalink .format(f_string, *args) ⇒ `Expr` Originally defined in module Functions

Format expressions as a string.

Examples:

df = Polars::DataFrame.new(
  {
    "a": ["a", "b", "c"],
    "b": [1, 2, 3]
  }
)
df.select(
  [
    Polars.format("foo_{}_bar_{}", Polars.col("a"), "b").alias("fmt")
  ]
)
# =>
# shape: (3, 1)
# ┌─────────────┐
# │ fmt         │
# │ ---         │
# │ str         │
# ╞═════════════╡
# │ foo_a_bar_1 │
# │ foo_b_bar_2 │
# │ foo_c_bar_3 │
# └─────────────┘

Parameters:

f_string (String) —
A string that with placeholders. For example: "hello_{}" or "{}_world
args (Object) —
Expression(s) that fill the placeholders

Returns:

(Expr)

permalink .from_epoch(column, unit: "s", eager: false) ⇒ `Object` Originally defined in module Functions

Utility function that parses an epoch timestamp (or Unix time) to Polars Date(time).

Depending on the unit provided, this function will return a different dtype:

unit: "d" returns pl.Date
unit: "s" returns pl.Datetime"us"
unit: "ms" returns pl.Datetime["ms"]
unit: "us" returns pl.Datetime["us"]
unit: "ns" returns pl.Datetime["ns"]

Examples:

df = Polars::DataFrame.new({"timestamp" => [1666683077, 1666683099]}).lazy
df.select(Polars.from_epoch(Polars.col("timestamp"), unit: "s")).collect
# =>
# shape: (2, 1)
# ┌─────────────────────┐
# │ timestamp           │
# │ ---                 │
# │ datetime[μs]        │
# ╞═════════════════════╡
# │ 2022-10-25 07:31:17 │
# │ 2022-10-25 07:31:39 │
# └─────────────────────┘

Parameters:

column (Object) —
Series or expression to parse integers to pl.Datetime.
unit (String) (defaults to: "s") —
The unit of the timesteps since epoch time.
eager (Boolean) (defaults to: false) —
If eager evaluation is true, a Series is returned instead of an Expr.

Returns:

(Object)

permalink .from_hash(data, schema: nil, columns: nil) ⇒ `DataFrame` Originally defined in module Convert

Construct a DataFrame from a dictionary of sequences.

This operation clones data, unless you pass in a Hash<String, Series>.

Examples:

data = {"a" => [1, 2], "b" => [3, 4]}
Polars.from_hash(data)
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 1   ┆ 3   │
# │ 2   ┆ 4   │
# └─────┴─────┘

Parameters:

data (Hash) —
Two-dimensional data represented as a hash. Hash must contain arrays.
columns (Array) (defaults to: nil) —
Column labels to use for resulting DataFrame. If specified, overrides any labels already present in the data. Must match data dimensions.

Returns:

(DataFrame)

permalink .groups(column) ⇒ `Object` Originally defined in module Functions

Syntactic sugar for Polars.col("foo").agg_groups.

Returns:

(Object)

permalink .head(column, n = 10) ⇒ `Expr` Originally defined in module Functions

Get the first n rows.

This function is syntactic sugar for col(column).head(n).

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.head("a"))
# =>
# shape: (3, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 8   │
# │ 3   │
# └─────┘

df.select(Polars.head("a", 2))
# =>
# shape: (2, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 8   │
# └─────┘

Parameters:

column (Object) —
Column name.
n (Integer) (defaults to: 10) —
Number of rows to return.

Returns:

(Expr)

permalink .implode(*columns) ⇒ `Expr` Originally defined in module Functions

Aggregate all column values into a list.

This function is syntactic sugar for col(name).implode.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 3],
    "b" => [9, 8, 7],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.implode("a"))
# =>
# shape: (1, 1)
# ┌───────────┐
# │ a         │
# │ ---       │
# │ list[i64] │
# ╞═══════════╡
# │ [1, 2, 3] │
# └───────────┘

df.select(Polars.implode("b", "c"))
# =>
# shape: (1, 2)
# ┌───────────┬───────────────────────┐
# │ b         ┆ c                     │
# │ ---       ┆ ---                   │
# │ list[i64] ┆ list[str]             │
# ╞═══════════╪═══════════════════════╡
# │ [9, 8, 7] ┆ ["foo", "bar", "foo"] │
# └───────────┴───────────────────────┘

Parameters:

columns (Array) —
One or more column names.

Returns:

(Expr)

permalink .int_range(start, stop = nil, step: 1, eager: false, dtype: nil) ⇒ `Expr`, `Series` Also known as: arange Originally defined in module Functions

Create a range expression (or Series).

This can be used in a select, with_column, etc. Be sure that the resulting range size is equal to the length of the DataFrame you are collecting.

Examples:

Polars.arange(0, 3, eager: true)
# =>
# shape: (3,)
# Series: 'arange' [i64]
# [
#         0
#         1
#         2
# ]

Parameters:

start (Integer, Expr, Series) —
Lower bound of range.
stop (Integer, Expr, Series) (defaults to: nil) —
Upper bound of range.
step (Integer) (defaults to: 1) —
Step size of the range.
eager (Boolean) (defaults to: false) —
If eager evaluation is True, a Series is returned instead of an Expr.
dtype (Symbol) (defaults to: nil) —
Apply an explicit integer dtype to the resulting expression (default is Int64).

Returns:

(Expr, Series)

permalink .last(*columns) ⇒ `Expr` Originally defined in module Functions

Get the last value.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "baz"]
  }
)
df.select(Polars.last)
# =>
# shape: (3, 1)
# ┌─────┐
# │ c   │
# │ --- │
# │ str │
# ╞═════╡
# │ foo │
# │ bar │
# │ baz │
# └─────┘

df.select(Polars.last("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 3   │
# └─────┘

df.select(Polars.last("b", "c"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ b   ┆ c   │
# │ --- ┆ --- │
# │ i64 ┆ str │
# ╞═════╪═════╡
# │ 2   ┆ baz │
# └─────┴─────┘

Parameters:

columns (Array) —
One or more column names. If set to nil (default), returns an expression to take the last column of the context instead.

Returns:

(Expr)

permalink .len ⇒ `Expr` Also known as: length Originally defined in module Functions

Return the number of rows in the context.

This is similar to COUNT(*) in SQL.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, nil],
    "b" => [3, nil, nil],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.len)
# =>
# shape: (1, 1)
# ┌─────┐
# │ len │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 3   │
# └─────┘

Generate an index column by using `len` in conjunction with `int_range`.

df.select([
  Polars.int_range(Polars.len, dtype: Polars::UInt32).alias("index"),
  Polars.all
])
# =>
# shape: (3, 4)
# ┌───────┬──────┬──────┬─────┐
# │ index ┆ a    ┆ b    ┆ c   │
# │ ---   ┆ ---  ┆ ---  ┆ --- │
# │ u32   ┆ i64  ┆ i64  ┆ str │
# ╞═══════╪══════╪══════╪═════╡
# │ 0     ┆ 1    ┆ 3    ┆ foo │
# │ 1     ┆ 2    ┆ null ┆ bar │
# │ 2     ┆ null ┆ null ┆ foo │
# └───────┴──────┴──────┴─────┘

Returns:

(Expr)

permalink .lit(value, dtype: nil, allow_object: nil) ⇒ `Expr` Originally defined in module Functions

Return an expression representing a literal value.

Returns:

(Expr)

permalink .max(*names) ⇒ `Expr` Originally defined in module Functions

Get the maximum value.

Syntactic sugar for col(names).max.

Examples:

Get the maximum value of a column.

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.max("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 8   │
# └─────┘

Get the maximum value of multiple columns.

df.select(Polars.max("^a|b$"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 8   ┆ 5   │
# └─────┴─────┘

df.select(Polars.max("a", "b"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 8   ┆ 5   │
# └─────┴─────┘

Parameters:

names (Array) —
Name(s) of the columns to use in the aggregation.

Returns:

(Expr)

permalink .max_horizontal(*exprs) ⇒ `Expr` Originally defined in module Functions

Get the maximum value horizontally across columns.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, nil],
    "c" => ["x", "y", "z"]
  }
)
df.with_columns(max: Polars.max_horizontal("a", "b"))
# =>
# shape: (3, 4)
# ┌─────┬──────┬─────┬─────┐
# │ a   ┆ b    ┆ c   ┆ max │
# │ --- ┆ ---  ┆ --- ┆ --- │
# │ i64 ┆ i64  ┆ str ┆ i64 │
# ╞═════╪══════╪═════╪═════╡
# │ 1   ┆ 4    ┆ x   ┆ 4   │
# │ 8   ┆ 5    ┆ y   ┆ 8   │
# │ 3   ┆ null ┆ z   ┆ 3   │
# └─────┴──────┴─────┴─────┘

Parameters:

exprs (Array) —
Column(s) to use in the aggregation. Accepts expression input. Strings are parsed as column names, other non-expression inputs are parsed as literals.

Returns:

(Expr)

permalink .mean(*columns) ⇒ `Expr` Also known as: avg Originally defined in module Functions

Get the mean value.

This function is syntactic sugar for col(columns).mean.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.mean("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 4.0 │
# └─────┘

df.select(Polars.mean("a", "b"))
# =>
# shape: (1, 2)
# ┌─────┬──────────┐
# │ a   ┆ b        │
# │ --- ┆ ---      │
# │ f64 ┆ f64      │
# ╞═════╪══════════╡
# │ 4.0 ┆ 3.666667 │
# └─────┴──────────┘

Parameters:

columns (Array) —
One or more column names.

Returns:

(Expr)

permalink .mean_horizontal(*exprs, ignore_nulls: true) ⇒ `Expr` Originally defined in module Functions

Compute the mean of all values horizontally across columns.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, nil],
    "c" => ["x", "y", "z"]
  }
)
df.with_columns(mean: Polars.mean_horizontal("a", "b"))
# =>
# shape: (3, 4)
# ┌─────┬──────┬─────┬──────┐
# │ a   ┆ b    ┆ c   ┆ mean │
# │ --- ┆ ---  ┆ --- ┆ ---  │
# │ i64 ┆ i64  ┆ str ┆ f64  │
# ╞═════╪══════╪═════╪══════╡
# │ 1   ┆ 4    ┆ x   ┆ 2.5  │
# │ 8   ┆ 5    ┆ y   ┆ 6.5  │
# │ 3   ┆ null ┆ z   ┆ 3.0  │
# └─────┴──────┴─────┴──────┘

Parameters:

exprs (Array) —
Column(s) to use in the aggregation. Accepts expression input. Strings are parsed as column names, other non-expression inputs are parsed as literals.
ignore_nulls (Boolean) (defaults to: true) —
Ignore null values (default). If set to false, any null value in the input will lead to a null output.

Returns:

(Expr)

permalink .median(*columns) ⇒ `Expr` Originally defined in module Functions

Get the median value.

This function is syntactic sugar for pl.col(columns).median.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.median("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 3.0 │
# └─────┘

df.select(Polars.median("a", "b"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ f64 ┆ f64 │
# ╞═════╪═════╡
# │ 3.0 ┆ 4.0 │
# └─────┴─────┘

Parameters:

columns (Array) —
One or more column names.

Returns:

(Expr)

permalink .min(*names) ⇒ `Expr` Originally defined in module Functions

Get the minimum value.

Syntactic sugar for col(names).min.

Examples:

Get the minimum value of a column.

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.min("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# └─────┘

Get the minimum value of multiple columns.

df.select(Polars.min("^a|b$"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 1   ┆ 2   │
# └─────┴─────┘

df.select(Polars.min("a", "b"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 1   ┆ 2   │
# └─────┴─────┘

Parameters:

names (Array) —
Name(s) of the columns to use in the aggregation.

Returns:

(Expr)

permalink .min_horizontal(*exprs) ⇒ `Expr` Originally defined in module Functions

Get the minimum value horizontally across columns.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, nil],
    "c" => ["x", "y", "z"]
  }
)
df.with_columns(min: Polars.min_horizontal("a", "b"))
# =>
# shape: (3, 4)
# ┌─────┬──────┬─────┬─────┐
# │ a   ┆ b    ┆ c   ┆ min │
# │ --- ┆ ---  ┆ --- ┆ --- │
# │ i64 ┆ i64  ┆ str ┆ i64 │
# ╞═════╪══════╪═════╪═════╡
# │ 1   ┆ 4    ┆ x   ┆ 1   │
# │ 8   ┆ 5    ┆ y   ┆ 5   │
# │ 3   ┆ null ┆ z   ┆ 3   │
# └─────┴──────┴─────┴─────┘

Parameters:

exprs (Array) —
Column(s) to use in the aggregation. Accepts expression input. Strings are parsed as column names, other non-expression inputs are parsed as literals.

Returns:

(Expr)

permalink .n_unique(*columns) ⇒ `Expr` Originally defined in module Functions

Count unique values.

This function is syntactic sugar for col(columns).n_unique.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 1],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.n_unique("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 2   │
# └─────┘

df.select(Polars.n_unique("b", "c"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ b   ┆ c   │
# │ --- ┆ --- │
# │ u32 ┆ u32 │
# ╞═════╪═════╡
# │ 3   ┆ 2   │
# └─────┴─────┘

Parameters:

columns (Array) —
One or more column names.

Returns:

(Expr)

permalink .nth(*indices) ⇒ `Expr` Originally defined in module Functions

Get the nth column(s) of the context.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "baz"]
  }
)
df.select(Polars.nth(1))
# =>
# shape: (3, 1)
# ┌─────┐
# │ b   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 4   │
# │ 5   │
# │ 2   │
# └─────┘

df.select(Polars.nth(2, 0))
# =>
# shape: (3, 2)
# ┌─────┬─────┐
# │ c   ┆ a   │
# │ --- ┆ --- │
# │ str ┆ i64 │
# ╞═════╪═════╡
# │ foo ┆ 1   │
# │ bar ┆ 8   │
# │ baz ┆ 3   │
# └─────┴─────┘

Parameters:

indices (Array) —
One or more indices representing the columns to retrieve.

Returns:

(Expr)

permalink .ones(n, dtype: nil, eager: true) ⇒ `Object` Originally defined in module Functions

Construct a column of length n filled with ones.

This is syntactic sugar for the repeat function.

Examples:

Polars.ones(3, dtype: Polars::Int8, eager: true)
# =>
# shape: (3,)
# Series: 'ones' [i8]
# [
#         1
#         1
#         1
# ]

Parameters:

n (Integer) —
Length of the resulting column.
dtype (Object) (defaults to: nil) —
Data type of the resulting column. Defaults to Float64.
eager (Boolean) (defaults to: true) —
Evaluate immediately and return a Series. If set to false, return an expression instead.

Returns:

(Object)

permalink .quantile(column, quantile, interpolation: "nearest") ⇒ `Expr` Originally defined in module Functions

Syntactic sugar for Polars.col("foo").quantile(...).

Parameters:

column (String) —
Column name.
quantile (Float) —
Quantile between 0.0 and 1.0.
interpolation ("nearest", "higher", "lower", "midpoint", "linear") (defaults to: "nearest") —
Interpolation method.

Returns:

(Expr)

permalink .read_avro(source, columns: nil, n_rows: nil) ⇒ `DataFrame` Originally defined in module IO

Read into a DataFrame from Apache Avro format.

Parameters:

source (Object) —
Path to a file or a file-like object.
columns (Object) (defaults to: nil) —
Columns to select. Accepts a list of column indices (starting at zero) or a list of column names.
n_rows (Integer) (defaults to: nil) —
Stop reading from Apache Avro file after reading n_rows.

Returns:

(DataFrame)

permalink .read_csv(source, has_header: true, columns: nil, new_columns: nil, sep: ",", comment_char: nil, quote_char: '"', skip_rows: 0, dtypes: nil, null_values: nil, ignore_errors: false, parse_dates: false, n_threads: nil, infer_schema_length: N_INFER_DEFAULT, batch_size: 8192, n_rows: nil, encoding: "utf8", low_memory: false, rechunk: true, storage_options: nil, skip_rows_after_header: 0, row_count_name: nil, row_count_offset: 0, eol_char: "\n", truncate_ragged_lines: false) ⇒ `DataFrame` Originally defined in module IO

Note:

This operation defaults to a rechunk operation at the end, meaning that all data will be stored continuously in memory. Set rechunk: false if you are benchmarking the csv-reader. A rechunk is an expensive operation.

Read a CSV file into a DataFrame.

Parameters:

source (Object) —
Path to a file or a file-like object.
has_header (Boolean) (defaults to: true) —
Indicate if the first row of dataset is a header or not. If set to false, column names will be autogenerated in the following format: column_x, with x being an enumeration over every column in the dataset starting at 1.
columns (Object) (defaults to: nil) —
Columns to select. Accepts a list of column indices (starting at zero) or a list of column names.
new_columns (Object) (defaults to: nil) —
Rename columns right after parsing the CSV file. If the given list is shorter than the width of the DataFrame the remaining columns will have their original name.
sep (String) (defaults to: ",") —
Single byte character to use as delimiter in the file.
comment_char (String) (defaults to: nil) —
Single byte character that indicates the start of a comment line, for instance #.
quote_char (String) (defaults to: '"') —
Single byte character used for csv quoting. Set to nil to turn off special handling and escaping of quotes.
skip_rows (Integer) (defaults to: 0) —
Start reading after skip_rows lines.
dtypes (Object) (defaults to: nil) —
Overwrite dtypes during inference.
null_values (Object) (defaults to: nil) —
Values to interpret as null values. You can provide a:
- String: All values equal to this string will be null.
- Array: All values equal to any string in this array will be null.
- Hash: A hash that maps column name to a null value string.
ignore_errors (Boolean) (defaults to: false) —
Try to keep reading lines if some lines yield errors. First try infer_schema_length: 0 to read all columns as :str to check which values might cause an issue.
parse_dates (Boolean) (defaults to: false) —
Try to automatically parse dates. If this does not succeed, the column remains of data type :str.
n_threads (Integer) (defaults to: nil) —
Number of threads to use in csv parsing. Defaults to the number of physical cpu's of your system.
infer_schema_length (Integer) (defaults to: N_INFER_DEFAULT) —
Maximum number of lines to read to infer schema. If set to 0, all columns will be read as :utf8. If set to nil, a full table scan will be done (slow).
batch_size (Integer) (defaults to: 8192) —
Number of lines to read into the buffer at once. Modify this to change performance.
n_rows (Integer) (defaults to: nil) —
Stop reading from CSV file after reading n_rows. During multi-threaded parsing, an upper bound of n_rows rows cannot be guaranteed.
encoding ("utf8", "utf8-lossy") (defaults to: "utf8") —
Lossy means that invalid utf8 values are replaced with � characters. When using other encodings than utf8 or utf8-lossy, the input is first decoded im memory with Ruby.
low_memory (Boolean) (defaults to: false) —
Reduce memory usage at expense of performance.
rechunk (Boolean) (defaults to: true) —
Make sure that all columns are contiguous in memory by aggregating the chunks into a single array.
storage_options (Hash) (defaults to: nil) —
Extra options that make sense for a particular storage connection.
skip_rows_after_header (Integer) (defaults to: 0) —
Skip this number of rows when the header is parsed.
row_count_name (String) (defaults to: nil) —
If not nil, this will insert a row count column with the given name into the DataFrame.
row_count_offset (Integer) (defaults to: 0) —
Offset to start the row_count column (only used if the name is set).
eol_char (String) (defaults to: "\n") —
Single byte end of line character.
truncate_ragged_lines (Boolean) (defaults to: false) —
Truncate lines that are longer than the schema.

Returns:

(DataFrame)

permalink .read_csv_batched(source, has_header: true, columns: nil, new_columns: nil, sep: ",", comment_char: nil, quote_char: '"', skip_rows: 0, dtypes: nil, null_values: nil, missing_utf8_is_empty_string: false, ignore_errors: false, parse_dates: false, n_threads: nil, infer_schema_length: N_INFER_DEFAULT, batch_size: 50_000, n_rows: nil, encoding: "utf8", low_memory: false, rechunk: true, skip_rows_after_header: 0, row_count_name: nil, row_count_offset: 0, eol_char: "\n", raise_if_empty: true, truncate_ragged_lines: false, decimal_comma: false) ⇒ `BatchedCsvReader` Originally defined in module IO

Read a CSV file in batches.

Upon creation of the BatchedCsvReader, polars will gather statistics and determine the file chunks. After that work will only be done if next_batches is called.

Examples:

reader = Polars.read_csv_batched(
  "./tpch/tables_scale_100/lineitem.tbl", sep: "|", parse_dates: true
)
reader.next_batches(5)

Parameters:

source (Object) —
Path to a file or a file-like object.
has_header (Boolean) (defaults to: true) —
Indicate if the first row of dataset is a header or not. If set to False, column names will be autogenerated in the following format: column_x, with x being an enumeration over every column in the dataset starting at 1.
columns (Object) (defaults to: nil) —
Columns to select. Accepts a list of column indices (starting at zero) or a list of column names.
new_columns (Object) (defaults to: nil) —
Rename columns right after parsing the CSV file. If the given list is shorter than the width of the DataFrame the remaining columns will have their original name.
sep (String) (defaults to: ",") —
Single byte character to use as delimiter in the file.
comment_char (String) (defaults to: nil) —
Single byte character that indicates the start of a comment line, for instance #.
quote_char (String) (defaults to: '"') —
Single byte character used for csv quoting, default = ". Set to nil to turn off special handling and escaping of quotes.
skip_rows (Integer) (defaults to: 0) —
Start reading after skip_rows lines.
dtypes (Object) (defaults to: nil) —
Overwrite dtypes during inference.
null_values (Object) (defaults to: nil) —
Values to interpret as null values. You can provide a:
- String: All values equal to this string will be null.
- Array: All values equal to any string in this array will be null.
- Hash: A hash that maps column name to a null value string.
ignore_errors (Boolean) (defaults to: false) —
Try to keep reading lines if some lines yield errors. First try infer_schema_length: 0 to read all columns as :str to check which values might cause an issue.
parse_dates (Boolean) (defaults to: false) —
Try to automatically parse dates. If this does not succeed, the column remains of data type :str.
n_threads (Integer) (defaults to: nil) —
Number of threads to use in csv parsing. Defaults to the number of physical cpu's of your system.
infer_schema_length (Integer) (defaults to: N_INFER_DEFAULT) —
Maximum number of lines to read to infer schema. If set to 0, all columns will be read as :str. If set to nil, a full table scan will be done (slow).
batch_size (Integer) (defaults to: 50_000) —
Number of lines to read into the buffer at once. Modify this to change performance.
n_rows (Integer) (defaults to: nil) —
Stop reading from CSV file after reading n_rows. During multi-threaded parsing, an upper bound of n_rows rows cannot be guaranteed.
encoding ("utf8", "utf8-lossy") (defaults to: "utf8") —
Lossy means that invalid utf8 values are replaced with � characters. When using other encodings than utf8 or utf8-lossy, the input is first decoded im memory with Ruby. Defaults to utf8.
low_memory (Boolean) (defaults to: false) —
Reduce memory usage at expense of performance.
rechunk (Boolean) (defaults to: true) —
Make sure that all columns are contiguous in memory by aggregating the chunks into a single array.
skip_rows_after_header (Integer) (defaults to: 0) —
Skip this number of rows when the header is parsed.
row_count_name (String) (defaults to: nil) —
If not nil, this will insert a row count column with the given name into the DataFrame.
row_count_offset (Integer) (defaults to: 0) —
Offset to start the row_count column (only used if the name is set).
eol_char (String) (defaults to: "\n") —
Single byte end of line character.
truncate_ragged_lines (Boolean) (defaults to: false) —
Truncate lines that are longer than the schema.

Returns:

(BatchedCsvReader)

permalink .read_database(query, schema_overrides: nil) ⇒ `DataFrame` Also known as: read_sql Originally defined in module IO

Read a SQL query into a DataFrame.

Parameters:

query (Object) —
ActiveRecord::Relation or ActiveRecord::Result.
schema_overrides (Hash) (defaults to: nil) —
A hash mapping column names to dtypes, used to override the schema inferred from the query.

Returns:

(DataFrame)

permalink .read_delta(source, version: nil, columns: nil, rechunk: false, storage_options: nil, delta_table_options: nil) ⇒ `DataFrame` Originally defined in module IO

Reads into a DataFrame from a Delta lake table.

Parameters:

source (Object) —
DeltaTable or a Path or URI to the root of the Delta lake table.
version (Object) (defaults to: nil) —
Numerical version or timestamp version of the Delta lake table.
columns (Array) (defaults to: nil) —
Columns to select. Accepts a list of column names.
rechunk (Boolean) (defaults to: false) —
Make sure that all columns are contiguous in memory by aggregating the chunks into a single array.
storage_options (Hash) (defaults to: nil) —
Extra options for the storage backends supported by deltalake-rb.
delta_table_options (Hash) (defaults to: nil) —
Additional keyword arguments while reading a Delta lake Table.

Returns:

(DataFrame)

permalink .read_ipc(source, columns: nil, n_rows: nil, memory_map: true, storage_options: nil, row_count_name: nil, row_count_offset: 0, rechunk: true) ⇒ `DataFrame` Originally defined in module IO

Read into a DataFrame from Arrow IPC (Feather v2) file.

Parameters:

source (Object) —
Path to a file or a file-like object.
columns (Object) (defaults to: nil) —
Columns to select. Accepts a list of column indices (starting at zero) or a list of column names.
n_rows (Integer) (defaults to: nil) —
Stop reading from IPC file after reading n_rows.
memory_map (Boolean) (defaults to: true) —
Try to memory map the file. This can greatly improve performance on repeated queries as the OS may cache pages. Only uncompressed IPC files can be memory mapped.
storage_options (Hash) (defaults to: nil) —
Extra options that make sense for a particular storage connection.
row_count_name (String) (defaults to: nil) —
If not nil, this will insert a row count column with give name into the DataFrame.
row_count_offset (Integer) (defaults to: 0) —
Offset to start the row_count column (only use if the name is set).
rechunk (Boolean) (defaults to: true) —
Make sure that all data is contiguous.

Returns:

(DataFrame)

permalink .read_ipc_schema(source) ⇒ `Hash` Originally defined in module IO

Get a schema of the IPC file without reading data.

Parameters:

source (Object) —
Path to a file or a file-like object.

Returns:

(Hash)

permalink .read_ipc_stream(source, columns: nil, n_rows: nil, storage_options: nil, row_index_name: nil, row_index_offset: 0, rechunk: true) ⇒ `DataFrame` Originally defined in module IO

Read into a DataFrame from Arrow IPC record batch stream.

See "Streaming format" on https://arrow.apache.org/docs/python/ipc.html.

Parameters:

source (Object) —
Path to a file or a file-like object.
columns (Array) (defaults to: nil) —
Columns to select. Accepts a list of column indices (starting at zero) or a list of column names.
n_rows (Integer) (defaults to: nil) —
Stop reading from IPC stream after reading n_rows.
storage_options (Hash) (defaults to: nil) —
Extra options that make sense for a particular storage connection.
row_index_name (String) (defaults to: nil) —
Insert a row index column with the given name into the DataFrame as the first column. If set to nil (default), no row index column is created.
row_index_offset (Integer) (defaults to: 0) —
Start the row index at this offset. Cannot be negative. Only used if row_index_name is set.
rechunk (Boolean) (defaults to: true) —
Make sure that all data is contiguous.

Returns:

(DataFrame)

permalink .read_json(source, schema: nil, schema_overrides: nil, infer_schema_length: N_INFER_DEFAULT) ⇒ `DataFrame` Originally defined in module IO

Read into a DataFrame from a JSON file.

Parameters:

source (Object) —
Path to a file or a file-like object.

Returns:

(DataFrame)

permalink .read_ndjson(source, schema: nil, schema_overrides: nil, ignore_errors: false) ⇒ `DataFrame` Originally defined in module IO

Read into a DataFrame from a newline delimited JSON file.

Parameters:

source (Object) —
Path to a file or a file-like object.

Returns:

(DataFrame)

permalink .read_parquet(source, columns: nil, n_rows: nil, row_count_name: nil, row_count_offset: 0, parallel: "auto", use_statistics: true, hive_partitioning: nil, glob: true, schema: nil, hive_schema: nil, try_parse_hive_dates: true, rechunk: false, low_memory: false, storage_options: nil, credential_provider: nil, retries: 2, include_file_paths: nil, allow_missing_columns: false) ⇒ `DataFrame` Originally defined in module IO

Read into a DataFrame from a parquet file.

Parameters:

source (Object) —
Path to a file or a file-like object.
columns (Object) (defaults to: nil) —
Columns to select. Accepts a list of column indices (starting at zero) or a list of column names.
n_rows (Integer) (defaults to: nil) —
Stop reading from parquet file after reading n_rows.
row_count_name (String) (defaults to: nil) —
If not nil, this will insert a row count column with give name into the DataFrame.
row_count_offset (Integer) (defaults to: 0) —
Offset to start the row_count column (only use if the name is set).
parallel ("auto", "columns", "row_groups", "none") (defaults to: "auto") —
This determines the direction of parallelism. 'auto' will try to determine the optimal direction.
use_statistics (Boolean) (defaults to: true) —
Use statistics in the parquet to determine if pages can be skipped from reading.
hive_partitioning (Boolean) (defaults to: nil) —
Infer statistics and schema from hive partitioned URL and use them to prune reads.
glob (Boolean) (defaults to: true) —
Expand path given via globbing rules.
schema (Object) (defaults to: nil) —
Specify the datatypes of the columns. The datatypes must match the datatypes in the file(s). If there are extra columns that are not in the file(s), consider also enabling allow_missing_columns.
hive_schema (Object) (defaults to: nil) —
The column names and data types of the columns by which the data is partitioned. If set to nil (default), the schema of the Hive partitions is inferred.
try_parse_hive_dates (Boolean) (defaults to: true) —
Whether to try parsing hive values as date/datetime types.
rechunk (Boolean) (defaults to: false) —
In case of reading multiple files via a glob pattern rechunk the final DataFrame into contiguous memory chunks.
low_memory (Boolean) (defaults to: false) —
Reduce memory pressure at the expense of performance.
storage_options (Hash) (defaults to: nil) —
Extra options that make sense for a particular storage connection.
credential_provider (Object) (defaults to: nil) —
Provide a function that can be called to provide cloud storage credentials. The function is expected to return a dictionary of credential keys along with an optional credential expiry time.
retries (Integer) (defaults to: 2) —
Number of retries if accessing a cloud instance fails.
include_file_paths (String) (defaults to: nil) —
Include the path of the source file(s) as a column with this name.

Returns:

(DataFrame)

permalink .read_parquet_schema(source) ⇒ `Hash` Originally defined in module IO

Get a schema of the Parquet file without reading data.

Parameters:

source (Object) —
Path to a file or a file-like object.

Returns:

(Hash)

permalink .repeat(value, n, dtype: nil, eager: false, name: nil) ⇒ `Object` Originally defined in module Functions

Repeat a single value n times.

Examples:

Construct a column with a repeated value in a lazy context.

Polars.select(Polars.repeat("z", 3)).to_series
# =>
# shape: (3,)
# Series: 'repeat' [str]
# [
#         "z"
#         "z"
#         "z"
# ]

Generate a Series directly by setting `eager: true`.

Polars.repeat(3, 3, dtype: Polars::Int8, eager: true)
# =>
# shape: (3,)
# Series: 'repeat' [i8]
# [
#         3
#         3
#         3
# ]

Parameters:

value (Object) —
Value to repeat.
n (Integer) —
Repeat n times.
eager (Boolean) (defaults to: false) —
Run eagerly and collect into a Series.
name (String) (defaults to: nil) —
Only used in eager mode. As expression, use alias.

Returns:

(Object)

permalink .scan_csv(source, has_header: true, sep: ",", comment_char: nil, quote_char: '"', skip_rows: 0, dtypes: nil, null_values: nil, missing_utf8_is_empty_string: false, ignore_errors: false, cache: true, with_column_names: nil, infer_schema_length: N_INFER_DEFAULT, n_rows: nil, encoding: "utf8", low_memory: false, rechunk: true, skip_rows_after_header: 0, row_count_name: nil, row_count_offset: 0, parse_dates: false, eol_char: "\n", raise_if_empty: true, truncate_ragged_lines: false, decimal_comma: false, glob: true) ⇒ `LazyFrame` Originally defined in module IO

Lazily read from a CSV file or multiple files via glob patterns.

This allows the query optimizer to push down predicates and projections to the scan level, thereby potentially reducing memory overhead.

Parameters:

source (Object) —
Path to a file.
has_header (Boolean) (defaults to: true) —
Indicate if the first row of dataset is a header or not. If set to false, column names will be autogenerated in the following format: column_x, with x being an enumeration over every column in the dataset starting at 1.
sep (String) (defaults to: ",") —
Single byte character to use as delimiter in the file.
comment_char (String) (defaults to: nil) —
Single byte character that indicates the start of a comment line, for instance #.
quote_char (String) (defaults to: '"') —
Single byte character used for csv quoting. Set to None to turn off special handling and escaping of quotes.
skip_rows (Integer) (defaults to: 0) —
Start reading after skip_rows lines. The header will be parsed at this offset.
dtypes (Object) (defaults to: nil) —
Overwrite dtypes during inference.
null_values (Object) (defaults to: nil) —
Values to interpret as null values. You can provide a:
- String: All values equal to this string will be null.
- Array: All values equal to any string in this array will be null.
- Hash: A hash that maps column name to a null value string.
ignore_errors (Boolean) (defaults to: false) —
Try to keep reading lines if some lines yield errors. First try infer_schema_length: 0 to read all columns as :str to check which values might cause an issue.
cache (Boolean) (defaults to: true) —
Cache the result after reading.
with_column_names (Object) (defaults to: nil) —
Apply a function over the column names. This can be used to update a schema just in time, thus before scanning.
infer_schema_length (Integer) (defaults to: N_INFER_DEFAULT) —
Maximum number of lines to read to infer schema. If set to 0, all columns will be read as :str. If set to nil, a full table scan will be done (slow).
n_rows (Integer) (defaults to: nil) —
Stop reading from CSV file after reading n_rows.
encoding ("utf8", "utf8-lossy") (defaults to: "utf8") —
Lossy means that invalid utf8 values are replaced with � characters.
low_memory (Boolean) (defaults to: false) —
Reduce memory usage in expense of performance.
rechunk (Boolean) (defaults to: true) —
Reallocate to contiguous memory when all chunks/ files are parsed.
skip_rows_after_header (Integer) (defaults to: 0) —
Skip this number of rows when the header is parsed.
row_count_name (String) (defaults to: nil) —
If not nil, this will insert a row count column with the given name into the DataFrame.
row_count_offset (Integer) (defaults to: 0) —
Offset to start the row_count column (only used if the name is set).
parse_dates (Boolean) (defaults to: false) —
Try to automatically parse dates. If this does not succeed, the column remains of data type :str.
eol_char (String) (defaults to: "\n") —
Single byte end of line character.
truncate_ragged_lines (Boolean) (defaults to: false) —
Truncate lines that are longer than the schema.

Returns:

(LazyFrame)

permalink .scan_delta(source, version: nil, storage_options: nil, delta_table_options: nil) ⇒ `LazyFrame` Originally defined in module IO

Lazily read from a Delta lake table.

Parameters:

source (Object) —
DeltaTable or a Path or URI to the root of the Delta lake table.
version (Object) (defaults to: nil) —
Numerical version or timestamp version of the Delta lake table.
storage_options (Hash) (defaults to: nil) —
Extra options for the storage backends supported by deltalake-rb.
delta_table_options (Hash) (defaults to: nil) —
Additional keyword arguments while reading a Delta lake Table.

Returns:

(LazyFrame)

permalink .scan_ipc(source, n_rows: nil, cache: true, rechunk: true, row_count_name: nil, row_count_offset: 0, storage_options: nil, hive_partitioning: nil, hive_schema: nil, try_parse_hive_dates: true, include_file_paths: nil) ⇒ `LazyFrame` Originally defined in module IO

Lazily read from an Arrow IPC (Feather v2) file or multiple files via glob patterns.

This allows the query optimizer to push down predicates and projections to the scan level, thereby potentially reducing memory overhead.

Parameters:

source (String) —
Path to a IPC file.
n_rows (Integer) (defaults to: nil) —
Stop reading from IPC file after reading n_rows.
cache (Boolean) (defaults to: true) —
Cache the result after reading.
rechunk (Boolean) (defaults to: true) —
Reallocate to contiguous memory when all chunks/ files are parsed.
row_count_name (String) (defaults to: nil) —
If not nil, this will insert a row count column with give name into the DataFrame.
row_count_offset (Integer) (defaults to: 0) —
Offset to start the row_count column (only use if the name is set).
storage_options (Hash) (defaults to: nil) —
Extra options that make sense for a particular storage connection.
hive_partitioning (Boolean) (defaults to: nil) —
Infer statistics and schema from Hive partitioned URL and use them to prune reads. This is unset by default (i.e. nil), meaning it is automatically enabled when a single directory is passed, and otherwise disabled.
hive_schema (Hash) (defaults to: nil) —
The column names and data types of the columns by which the data is partitioned. If set to nil (default), the schema of the Hive partitions is inferred.
try_parse_hive_dates (Boolean) (defaults to: true) —
Whether to try parsing hive values as date/datetime types.
include_file_paths (String) (defaults to: nil) —
Include the path of the source file(s) as a column with this name.

Returns:

(LazyFrame)

permalink .scan_ndjson(source, infer_schema_length: N_INFER_DEFAULT, batch_size: 1024, n_rows: nil, low_memory: false, rechunk: true, row_count_name: nil, row_count_offset: 0) ⇒ `LazyFrame` Originally defined in module IO

Lazily read from a newline delimited JSON file.

This allows the query optimizer to push down predicates and projections to the scan level, thereby potentially reducing memory overhead.

Parameters:

source (String) —
Path to a file.
infer_schema_length (Integer) (defaults to: N_INFER_DEFAULT) —
Infer the schema length from the first infer_schema_length rows.
batch_size (Integer) (defaults to: 1024) —
Number of rows to read in each batch.
n_rows (Integer) (defaults to: nil) —
Stop reading from JSON file after reading n_rows.
low_memory (Boolean) (defaults to: false) —
Reduce memory pressure at the expense of performance.
rechunk (Boolean) (defaults to: true) —
Reallocate to contiguous memory when all chunks/ files are parsed.
row_count_name (String) (defaults to: nil) —
If not nil, this will insert a row count column with give name into the DataFrame.
row_count_offset (Integer) (defaults to: 0) —
Offset to start the row_count column (only use if the name is set).

Returns:

(LazyFrame)

permalink .scan_parquet(source, n_rows: nil, row_count_name: nil, row_count_offset: 0, parallel: "auto", use_statistics: true, hive_partitioning: nil, glob: true, schema: nil, hive_schema: nil, try_parse_hive_dates: true, rechunk: false, low_memory: false, cache: true, storage_options: nil, credential_provider: nil, retries: 2, include_file_paths: nil, allow_missing_columns: false) ⇒ `LazyFrame` Originally defined in module IO

Lazily read from a parquet file or multiple files via glob patterns.

This allows the query optimizer to push down predicates and projections to the scan level, thereby potentially reducing memory overhead.

Parameters:

source (Object) —
Path to a file or a file-like object.
n_rows (Integer) (defaults to: nil) —
Stop reading from parquet file after reading n_rows.
row_count_name (String) (defaults to: nil) —
If not nil, this will insert a row count column with give name into the DataFrame.
row_count_offset (Integer) (defaults to: 0) —
Offset to start the row_count column (only use if the name is set).
parallel ("auto", "columns", "row_groups", "none") (defaults to: "auto") —
This determines the direction of parallelism. 'auto' will try to determine the optimal direction.
use_statistics (Boolean) (defaults to: true) —
Use statistics in the parquet to determine if pages can be skipped from reading.
hive_partitioning (Boolean) (defaults to: nil) —
Infer statistics and schema from hive partitioned URL and use them to prune reads.
glob (Boolean) (defaults to: true) —
Expand path given via globbing rules.
schema (Object) (defaults to: nil) —
Specify the datatypes of the columns. The datatypes must match the datatypes in the file(s). If there are extra columns that are not in the file(s), consider also enabling allow_missing_columns.
hive_schema (Object) (defaults to: nil) —
The column names and data types of the columns by which the data is partitioned. If set to nil (default), the schema of the Hive partitions is inferred.
try_parse_hive_dates (Boolean) (defaults to: true) —
Whether to try parsing hive values as date/datetime types.
rechunk (Boolean) (defaults to: false) —
In case of reading multiple files via a glob pattern rechunk the final DataFrame into contiguous memory chunks.
low_memory (Boolean) (defaults to: false) —
Reduce memory pressure at the expense of performance.
cache (Boolean) (defaults to: true) —
Cache the result after reading.
storage_options (Hash) (defaults to: nil) —
Extra options that make sense for a particular storage connection.
credential_provider (Object) (defaults to: nil) —
Provide a function that can be called to provide cloud storage credentials. The function is expected to return a dictionary of credential keys along with an optional credential expiry time.
retries (Integer) (defaults to: 2) —
Number of retries if accessing a cloud instance fails.
include_file_paths (String) (defaults to: nil) —
Include the path of the source file(s) as a column with this name.

Returns:

(LazyFrame)

permalink .select(*exprs, **named_exprs) ⇒ `DataFrame` Originally defined in module Functions

Run polars expressions without a context.

This is syntactic sugar for running df.select on an empty DataFrame.

Examples:

foo = Polars::Series.new("foo", [1, 2, 3])
bar = Polars::Series.new("bar", [3, 2, 1])
Polars.select(min: Polars.min_horizontal(foo, bar))
# =>
# shape: (3, 1)
# ┌─────┐
# │ min │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 2   │
# │ 1   │
# └─────┘

Parameters:

exprs (Array) —
Column(s) to select, specified as positional arguments. Accepts expression input. Strings are parsed as column names, other non-expression inputs are parsed as literals.
named_exprs (Hash) —
Additional columns to select, specified as keyword arguments. The columns will be renamed to the keyword used.

Returns:

(DataFrame)

permalink .set_random_seed(seed) ⇒ `nil` Originally defined in module Functions

Set the global random seed for Polars.

This random seed is used to determine things such as shuffle ordering.

Parameters:

seed (Integer) —
A non-negative integer < 2**64 used to seed the internal global random number generator.

Returns:

(nil)

permalink .sql_expr(sql) ⇒ `Expr` Originally defined in module Functions

Parse one or more SQL expressions to polars expression(s).

Examples:

Parse a single SQL expression:

df = Polars::DataFrame.new({"a" => [2, 1]})
expr = Polars.sql_expr("MAX(a)")
df.select(expr)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 2   │
# └─────┘

Parse multiple SQL expressions:

df.with_columns(
  *Polars.sql_expr(["POWER(a,a) AS a_a", "CAST(a AS TEXT) AS a_txt"])
)
# =>
# shape: (2, 3)
# ┌─────┬─────┬───────┐
# │ a   ┆ a_a ┆ a_txt │
# │ --- ┆ --- ┆ ---   │
# │ i64 ┆ i64 ┆ str   │
# ╞═════╪═════╪═══════╡
# │ 2   ┆ 4   ┆ 2     │
# │ 1   ┆ 1   ┆ 1     │
# └─────┴─────┴───────┘

Parameters:

sql (Object) —
One or more SQL expressions.

Returns:

(Expr)

permalink .std(column, ddof: 1) ⇒ `Expr` Originally defined in module Functions

Get the standard deviation.

This function is syntactic sugar for col(column).std(ddof: ddof).

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.std("a"))
# =>
# shape: (1, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 3.605551 │
# └──────────┘

df["a"].std
# => 3.605551275463989

Parameters:

column (Object) —
Column name.
ddof (Integer) (defaults to: 1) —
“Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1.

Returns:

(Expr)

permalink .string_cache ⇒ `Object`

[View source]


31
32
33

# File 'lib/polars/string_cache.rb', line 31

def self.string_cache(...)
  StringCache.new(...)
end

permalink .struct(*exprs, schema: nil, eager: false, **named_exprs) ⇒ `Object` Originally defined in module Functions

Collect several columns into a Series of dtype Struct.

Examples:

df = Polars::DataFrame.new(
  {
    "int" => [1, 2],
    "str" => ["a", "b"],
    "bool" => [true, nil],
    "list" => [[1, 2], [3]],
  }
)
df.select([Polars.struct(Polars.all).alias("my_struct")])
# =>
# shape: (2, 1)
# ┌─────────────────────┐
# │ my_struct           │
# │ ---                 │
# │ struct[4]           │
# ╞═════════════════════╡
# │ {1,"a",true,[1, 2]} │
# │ {2,"b",null,[3]}    │
# └─────────────────────┘

Collect selected columns into a struct by either passing a list of columns, or by specifying each column as a positional argument.

df.select(Polars.struct("int", false).alias("my_struct"))
# =>
# shape: (2, 1)
# ┌───────────┐
# │ my_struct │
# │ ---       │
# │ struct[2] │
# ╞═══════════╡
# │ {1,false} │
# │ {2,false} │
# └───────────┘

Use keyword arguments to easily name each struct field.

df.select(Polars.struct(p: "int", q: "bool").alias("my_struct")).schema
# => {"my_struct"=>Polars::Struct({"p"=>Polars::Int64, "q"=>Polars::Boolean})}

Parameters:

exprs (Array) —
Column(s) to collect into a struct column, specified as positional arguments. Accepts expression input. Strings are parsed as column names, other non-expression inputs are parsed as literals.
schema (Hash) (defaults to: nil) —
Optional schema that explicitly defines the struct field dtypes. If no columns or expressions are provided, schema keys are used to define columns.
eager (Boolean) (defaults to: false) —
Evaluate immediately and return a Series. If set to false (default), return an expression instead.
named_exprs (Hash) —
Additional columns to collect into the struct column, specified as keyword arguments. The columns will be renamed to the keyword used.

Returns:

(Object)

permalink .sum(*names) ⇒ `Expr` Originally defined in module Functions

Sum all values.

Syntactic sugar for col(name).sum.

Examples:

Sum a column.

df = Polars::DataFrame.new(
  {
    "a" => [1, 2],
    "b" => [3, 4],
    "c" => [5, 6]
  }
)
df.select(Polars.sum("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 3   │
# └─────┘

Sum multiple columns.

df.select(Polars.sum("a", "c"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ c   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 3   ┆ 11  │
# └─────┴─────┘

df.select(Polars.sum("^.*[bc]$"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ b   ┆ c   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 7   ┆ 11  │
# └─────┴─────┘

Parameters:

names (Array) —
Name(s) of the columns to use in the aggregation.

Returns:

(Expr)

permalink .sum_horizontal(*exprs, ignore_nulls: true) ⇒ `Expr` Originally defined in module Functions

Sum all values horizontally across columns.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, nil],
    "c" => ["x", "y", "z"]
  }
)
df.with_columns(sum: Polars.sum_horizontal("a", "b"))
# =>
# shape: (3, 4)
# ┌─────┬──────┬─────┬─────┐
# │ a   ┆ b    ┆ c   ┆ sum │
# │ --- ┆ ---  ┆ --- ┆ --- │
# │ i64 ┆ i64  ┆ str ┆ i64 │
# ╞═════╪══════╪═════╪═════╡
# │ 1   ┆ 4    ┆ x   ┆ 5   │
# │ 8   ┆ 5    ┆ y   ┆ 13  │
# │ 3   ┆ null ┆ z   ┆ 3   │
# └─────┴──────┴─────┴─────┘

Parameters:

exprs (Array) —
Column(s) to use in the aggregation. Accepts expression input. Strings are parsed as column names, other non-expression inputs are parsed as literals.
ignore_nulls (Boolean) (defaults to: true) —
Ignore null values (default). If set to false, any null value in the input will lead to a null output.

Returns:

(Expr)

permalink .tail(column, n = 10) ⇒ `Expr` Originally defined in module Functions

Get the last n rows.

This function is syntactic sugar for col(column).tail(n).

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.tail("a"))
# =>
# shape: (3, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 8   │
# │ 3   │
# └─────┘

df.select(Polars.tail("a", 2))
# =>
# shape: (2, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 8   │
# │ 3   │
# └─────┘

Parameters:

column (Object) —
Column name.
n (Integer) (defaults to: 10) —
Number of rows to return.

Returns:

(Expr)

permalink .thread_pool_size ⇒ `Integer`

Return the number of threads in the Polars thread pool.

Returns:

(Integer)

[View source]


104
105
106

# File 'lib/polars.rb', line 104

def self.thread_pool_size
  Plr.thread_pool_size
end

permalink .time_range(start = nil, stop = nil, interval = "1h", closed: "both", eager: false) ⇒ `Object` Originally defined in module Functions

Generate a time range.

Examples:

Polars.time_range(
  Time.utc(2000, 1, 1, 14, 0),
  nil,
  "3h15m",
  eager: true
).alias("time")
# =>
# shape: (4,)
# Series: 'time' [time]
# [
#         14:00:00
#         17:15:00
#         20:30:00
#         23:45:00
# ]

Parameters:

start (Object) (defaults to: nil) —
Lower bound of the time range.
stop (Object) (defaults to: nil) —
Upper bound of the time range.
interval (String) (defaults to: "1h") —
Interval of the range periods, specified using the Polars duration string language.
closed ('both', 'left', 'right', 'none') (defaults to: "both") —
Define which sides of the range are closed (inclusive).
eager (Boolean) (defaults to: false) —
Evaluate immediately and return a Series. If set to False (default), return an expression instead.

Returns:

(Object)

permalink .time_ranges(start = nil, stop = nil, interval = "1h", closed: "both", eager: false) ⇒ `Object` Originally defined in module Functions

Create a column of time ranges.

Examples:

df = Polars::DataFrame.new(
  {
    "start" => [Time.utc(2000, 1, 1, 9, 0), Time.utc(2000, 1, 1, 10, 0)],
    "end" => Time.utc(2000, 1, 1, 11, 0)
  }
)
df.select(time_range: Polars.time_ranges("start", "end"))
# =>
# shape: (2, 1)
# ┌────────────────────────────────┐
# │ time_range                     │
# │ ---                            │
# │ list[time]                     │
# ╞════════════════════════════════╡
# │ [09:00:00, 10:00:00, 11:00:00] │
# │ [10:00:00, 11:00:00]           │
# └────────────────────────────────┘

Parameters:

start (Object) (defaults to: nil) —
Lower bound of the time range.
stop (Object) (defaults to: nil) —
Upper bound of the time range.
interval (Integer) (defaults to: "1h") —
Interval of the range periods, specified using the Polars duration string language.
closed ('both', 'left', 'right', 'none') (defaults to: "both") —
Define which sides of the range are closed (inclusive).
eager (Boolean) (defaults to: false) —
Evaluate immediately and return a Series. If set to false (default), return an expression instead.

Returns:

(Object)

permalink .using_string_cache ⇒ `Boolean` Originally defined in module Functions

Check whether the global string cache is enabled.

Returns:

(Boolean)

permalink .var(column, ddof: 1) ⇒ `Expr` Originally defined in module Functions

Get the variance.

This function is syntactic sugar for col(column).var(ddof: ddof).

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.var("a"))
# =>
# shape: (1, 1)
# ┌──────┐
# │ a    │
# │ ---  │
# │ f64  │
# ╞══════╡
# │ 13.0 │
# └──────┘

df["a"].var
# => 13.0

Parameters:

column (Object) —
Column name.
ddof (Integer) (defaults to: 1) —
“Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1.

Returns:

(Expr)

permalink .when(*predicates, **constraints) ⇒ `When` Originally defined in module Functions

Start a "when, then, otherwise" expression.

Examples:

Below we add a column with the value 1, where column "foo" > 2 and the value -1 where it isn't.

df = Polars::DataFrame.new({"foo" => [1, 3, 4], "bar" => [3, 4, 0]})
df.with_column(Polars.when(Polars.col("foo") > 2).then(Polars.lit(1)).otherwise(Polars.lit(-1)))
# =>
# shape: (3, 3)
# ┌─────┬─────┬─────────┐
# │ foo ┆ bar ┆ literal │
# │ --- ┆ --- ┆ ---     │
# │ i64 ┆ i64 ┆ i32     │
# ╞═════╪═════╪═════════╡
# │ 1   ┆ 3   ┆ -1      │
# │ 3   ┆ 4   ┆ 1       │
# │ 4   ┆ 0   ┆ 1       │
# └─────┴─────┴─────────┘

Or with multiple when-then operations chained:

df.with_columns(
  Polars.when(Polars.col("foo") > 2)
  .then(1)
  .when(Polars.col("bar") > 2)
  .then(4)
  .otherwise(-1)
  .alias("val")
)
# =>
# shape: (3, 3)
# ┌─────┬─────┬─────┐
# │ foo ┆ bar ┆ val │
# │ --- ┆ --- ┆ --- │
# │ i64 ┆ i64 ┆ i32 │
# ╞═════╪═════╪═════╡
# │ 1   ┆ 3   ┆ 4   │
# │ 3   ┆ 4   ┆ 1   │
# │ 4   ┆ 0   ┆ 1   │
# └─────┴─────┴─────┘

The `otherwise` at the end is optional. If left out, any rows where none of the `when` expressions evaluate to True, are set to `null`:

df.with_columns(Polars.when(Polars.col("foo") > 2).then(1).alias("val"))
# =>
# shape: (3, 3)
# ┌─────┬─────┬──────┐
# │ foo ┆ bar ┆ val  │
# │ --- ┆ --- ┆ ---  │
# │ i64 ┆ i64 ┆ i32  │
# ╞═════╪═════╪══════╡
# │ 1   ┆ 3   ┆ null │
# │ 3   ┆ 4   ┆ 1    │
# │ 4   ┆ 0   ┆ 1    │
# └─────┴─────┴──────┘

Pass multiple predicates, each of which must be met:

df.with_columns(
  val: Polars.when(
    Polars.col("bar") > 0,
    Polars.col("foo") % 2 != 0
  )
  .then(99)
  .otherwise(-1)
)
# =>
# shape: (3, 3)
# ┌─────┬─────┬─────┐
# │ foo ┆ bar ┆ val │
# │ --- ┆ --- ┆ --- │
# │ i64 ┆ i64 ┆ i32 │
# ╞═════╪═════╪═════╡
# │ 1   ┆ 3   ┆ 99  │
# │ 3   ┆ 4   ┆ 99  │
# │ 4   ┆ 0   ┆ -1  │
# └─────┴─────┴─────┘

Pass conditions as keyword arguments:

df.with_columns(val: Polars.when(foo: 4, bar: 0).then(99).otherwise(-1))
# =>
# shape: (3, 3)
# ┌─────┬─────┬─────┐
# │ foo ┆ bar ┆ val │
# │ --- ┆ --- ┆ --- │
# │ i64 ┆ i64 ┆ i32 │
# ╞═════╪═════╪═════╡
# │ 1   ┆ 3   ┆ -1  │
# │ 3   ┆ 4   ┆ -1  │
# │ 4   ┆ 0   ┆ 99  │
# └─────┴─────┴─────┘

Returns:

(When)

permalink .zeros(n, dtype: nil, eager: true) ⇒ `Object` Originally defined in module Functions

Construct a column of length n filled with zeros.

This is syntactic sugar for the repeat function.

Examples:

Polars.zeros(3, dtype: Polars::Int8, eager: true)
# =>
# shape: (3,)
# Series: 'zeros' [i8]
# [
#         0
#         0
#         0
# ]

Parameters:

n (Integer) —
Length of the resulting column.
dtype (Object) (defaults to: nil) —
Data type of the resulting column. Defaults to Float64.
eager (Boolean) (defaults to: true) —
Evaluate immediately and return a Series. If set to false, return an expression instead.

Returns:

(Object)

Module: Polars

Defined Under Namespace

Constant Summary collapse

Class Method Summary collapse

Class Method Details

permalink .align_frames(*frames, on:, select: nil, reverse: false) ⇒ Object Originally defined in module Functions

Examples:

permalink .all(*names, ignore_nulls: true) ⇒ Expr Originally defined in module Functions

Examples:

Selecting all columns.

Evaluate bitwise AND for a column.

permalink .all_horizontal(*exprs) ⇒ Expr Originally defined in module Functions

Examples:

permalink .any(*names, ignore_nulls: true) ⇒ Expr Originally defined in module Functions

Examples:

permalink .any_horizontal(*exprs) ⇒ Expr Originally defined in module Functions

Examples:

permalink .approx_n_unique(*columns) ⇒ Expr Originally defined in module Functions

Examples:

permalink .arctan2(y, x) ⇒ Expr Originally defined in module Functions

Examples:

permalink .arctan2d(y, x) ⇒ Expr Originally defined in module Functions

Examples:

permalink .arg_sort_by(exprs, *more_exprs, reverse: false, nulls_last: false, multithreaded: true, maintain_order: false) ⇒ Expr Also known as: argsort_by Originally defined in module Functions

Examples:

Pass a single column name to compute the arg sort by that column.

Compute the arg sort by multiple columns by either passing a list of columns, or by specifying each column as a positional argument.

Use gather to apply the arg sort to other columns.

permalink .arg_where(condition, eager: false) ⇒ Expr, Series Originally defined in module Functions

Examples:

permalink .coalesce(exprs, *more_exprs) ⇒ Expr Originally defined in module Functions

Examples:

permalink .col(name, *more_names) ⇒ Expr Originally defined in module Functions

permalink .concat(items, rechunk: true, how: "vertical", parallel: true) ⇒ Object Originally defined in module Functions

Examples:

permalink .concat_list(exprs, *more_exprs) ⇒ Expr Originally defined in module Functions

Examples:

Concatenate two existing list columns. Null values are propagated.

Non-list columns are cast to a list before concatenation. The output data type is the supertype of the concatenated columns.

Create lagged columns and collect them into a list. This mimics a rolling window.

permalink .concat_str(exprs, sep: "", ignore_nulls: false) ⇒ Expr Originally defined in module Functions

Examples:

permalink .config ⇒ Object

permalink .corr(a, b, method: "pearson", ddof: nil, propagate_nans: false) ⇒ Expr Originally defined in module Functions

Examples:

Pearson's correlation:

Spearman rank correlation:

permalink .count(*columns) ⇒ Expr Originally defined in module Functions

Examples:

Return the number of non-null values in multiple columns.

permalink .cov(a, b, ddof: 1) ⇒ Expr Originally defined in module Functions

Examples:

permalink .cs ⇒ Object

permalink .cum_count(*columns, reverse: false) ⇒ Expr Originally defined in module Functions

Examples:

permalink .cum_fold(acc, f, exprs, include_init: false) ⇒ Object Also known as: cumfold Originally defined in module Functions

Examples:

permalink .cum_sum(*names) ⇒ Expr Also known as: cumsum Originally defined in module Functions

Examples:

permalink .cum_sum_horizontal(*exprs) ⇒ Expr Also known as: cumsum_horizontal Originally defined in module Functions

Examples:

permalink .date_range(start, stop, interval = "1d", closed: "both", eager: false) ⇒ Object Originally defined in module Functions

Examples:

Using polars duration string to specify the interval

permalink .date_ranges(start, stop, interval = "1d", closed: "both", eager: false) ⇒ Object Originally defined in module Functions

Examples:

permalink .datetime_range(start, stop, interval = "1d", closed: "both", time_unit: nil, time_zone: nil, eager: false) ⇒ Object Originally defined in module Functions

Examples:

Using Polars duration string to specify the interval:

Specifying a time zone:

permalink .datetime_ranges(start, stop, interval: "1d", closed: "both", time_unit: nil, time_zone: nil, eager: false) ⇒ Object Originally defined in module Functions

Examples:

permalink .disable_string_cache ⇒ nil Originally defined in module Functions

Examples:

Construct two Series using the same global string cache.

As both Series are constructed under the same global string cache, they can be concatenated.

permalink .duration(weeks: nil, days: nil, hours: nil, minutes: nil, seconds: nil, milliseconds: nil, microseconds: nil, nanoseconds: nil, time_unit: "us") ⇒ Expr Originally defined in module Functions

Examples:

permalink .element ⇒ Expr Originally defined in module Functions

Examples:

permalink .align_frames(*frames, on:, select: nil, reverse: false) ⇒ `Object` Originally defined in module Functions

permalink .all(*names, ignore_nulls: true) ⇒ `Expr` Originally defined in module Functions

permalink .all_horizontal(*exprs) ⇒ `Expr` Originally defined in module Functions

permalink .any(*names, ignore_nulls: true) ⇒ `Expr` Originally defined in module Functions

permalink .any_horizontal(*exprs) ⇒ `Expr` Originally defined in module Functions

permalink .approx_n_unique(*columns) ⇒ `Expr` Originally defined in module Functions

permalink .arctan2(y, x) ⇒ `Expr` Originally defined in module Functions

permalink .arctan2d(y, x) ⇒ `Expr` Originally defined in module Functions

permalink .arg_sort_by(exprs, *more_exprs, reverse: false, nulls_last: false, multithreaded: true, maintain_order: false) ⇒ `Expr` Also known as: argsort_by Originally defined in module Functions

permalink .arg_where(condition, eager: false) ⇒ `Expr`, `Series` Originally defined in module Functions

permalink .coalesce(exprs, *more_exprs) ⇒ `Expr` Originally defined in module Functions

permalink .col(name, *more_names) ⇒ `Expr` Originally defined in module Functions

permalink .concat(items, rechunk: true, how: "vertical", parallel: true) ⇒ `Object` Originally defined in module Functions

permalink .concat_list(exprs, *more_exprs) ⇒ `Expr` Originally defined in module Functions

permalink .concat_str(exprs, sep: "", ignore_nulls: false) ⇒ `Expr` Originally defined in module Functions

permalink .config ⇒ `Object`

permalink .corr(a, b, method: "pearson", ddof: nil, propagate_nans: false) ⇒ `Expr` Originally defined in module Functions

permalink .count(*columns) ⇒ `Expr` Originally defined in module Functions

permalink .cov(a, b, ddof: 1) ⇒ `Expr` Originally defined in module Functions

permalink .cs ⇒ `Object`

permalink .cum_count(*columns, reverse: false) ⇒ `Expr` Originally defined in module Functions

permalink .cum_fold(acc, f, exprs, include_init: false) ⇒ `Object` Also known as: cumfold Originally defined in module Functions

permalink .cum_sum(*names) ⇒ `Expr` Also known as: cumsum Originally defined in module Functions

permalink .cum_sum_horizontal(*exprs) ⇒ `Expr` Also known as: cumsum_horizontal Originally defined in module Functions

permalink .date_range(start, stop, interval = "1d", closed: "both", eager: false) ⇒ `Object` Originally defined in module Functions

permalink .date_ranges(start, stop, interval = "1d", closed: "both", eager: false) ⇒ `Object` Originally defined in module Functions

permalink .datetime_range(start, stop, interval = "1d", closed: "both", time_unit: nil, time_zone: nil, eager: false) ⇒ `Object` Originally defined in module Functions

permalink .datetime_ranges(start, stop, interval: "1d", closed: "both", time_unit: nil, time_zone: nil, eager: false) ⇒ `Object` Originally defined in module Functions

permalink .disable_string_cache ⇒ `nil` Originally defined in module Functions

permalink .duration(weeks: nil, days: nil, hours: nil, minutes: nil, seconds: nil, milliseconds: nil, microseconds: nil, nanoseconds: nil, time_unit: "us") ⇒ `Expr` Originally defined in module Functions

permalink .element ⇒ `Expr` Originally defined in module Functions

permalink .enable_string_cache ⇒ `nil` Originally defined in module Functions

permalink .exclude(columns) ⇒ `Object` Originally defined in module Functions

permalink .first(*columns) ⇒ `Expr` Originally defined in module Functions

permalink .fold(acc, f, exprs) ⇒ `Expr` Originally defined in module Functions

permalink .format(f_string, *args) ⇒ `Expr` Originally defined in module Functions

permalink .from_epoch(column, unit: "s", eager: false) ⇒ `Object` Originally defined in module Functions

permalink .from_hash(data, schema: nil, columns: nil) ⇒ `DataFrame` Originally defined in module Convert

permalink .groups(column) ⇒ `Object` Originally defined in module Functions

permalink .head(column, n = 10) ⇒ `Expr` Originally defined in module Functions

permalink .implode(*columns) ⇒ `Expr` Originally defined in module Functions

permalink .int_range(start, stop = nil, step: 1, eager: false, dtype: nil) ⇒ `Expr`, `Series` Also known as: arange Originally defined in module Functions

permalink .last(*columns) ⇒ `Expr` Originally defined in module Functions

permalink .len ⇒ `Expr` Also known as: length Originally defined in module Functions

Generate an index column by using `len` in conjunction with `int_range`.

permalink .lit(value, dtype: nil, allow_object: nil) ⇒ `Expr` Originally defined in module Functions

permalink .max(*names) ⇒ `Expr` Originally defined in module Functions

permalink .max_horizontal(*exprs) ⇒ `Expr` Originally defined in module Functions

permalink .mean(*columns) ⇒ `Expr` Also known as: avg Originally defined in module Functions

permalink .mean_horizontal(*exprs, ignore_nulls: true) ⇒ `Expr` Originally defined in module Functions

permalink .median(*columns) ⇒ `Expr` Originally defined in module Functions

permalink .min(*names) ⇒ `Expr` Originally defined in module Functions

permalink .min_horizontal(*exprs) ⇒ `Expr` Originally defined in module Functions

permalink .n_unique(*columns) ⇒ `Expr` Originally defined in module Functions

permalink .nth(*indices) ⇒ `Expr` Originally defined in module Functions

permalink .ones(n, dtype: nil, eager: true) ⇒ `Object` Originally defined in module Functions

permalink .quantile(column, quantile, interpolation: "nearest") ⇒ `Expr` Originally defined in module Functions

permalink .read_avro(source, columns: nil, n_rows: nil) ⇒ `DataFrame` Originally defined in module IO

permalink .read_database(query, schema_overrides: nil) ⇒ `DataFrame` Also known as: read_sql Originally defined in module IO

permalink .read_delta(source, version: nil, columns: nil, rechunk: false, storage_options: nil, delta_table_options: nil) ⇒ `DataFrame` Originally defined in module IO

permalink .read_ipc(source, columns: nil, n_rows: nil, memory_map: true, storage_options: nil, row_count_name: nil, row_count_offset: 0, rechunk: true) ⇒ `DataFrame` Originally defined in module IO

permalink .read_ipc_schema(source) ⇒ `Hash` Originally defined in module IO

permalink .read_ipc_stream(source, columns: nil, n_rows: nil, storage_options: nil, row_index_name: nil, row_index_offset: 0, rechunk: true) ⇒ `DataFrame` Originally defined in module IO

permalink .read_json(source, schema: nil, schema_overrides: nil, infer_schema_length: N_INFER_DEFAULT) ⇒ `DataFrame` Originally defined in module IO

permalink .read_ndjson(source, schema: nil, schema_overrides: nil, ignore_errors: false) ⇒ `DataFrame` Originally defined in module IO

permalink .read_parquet_schema(source) ⇒ `Hash` Originally defined in module IO