Module: Polars::Functions

Included in:: Polars

Defined in:: lib/polars/string_cache.rb,
lib/polars/functions/col.rb,
lib/polars/functions/len.rb,
lib/polars/functions/lit.rb,
lib/polars/functions/lazy.rb,
lib/polars/functions/eager.rb,
lib/polars/functions/random.rb,
lib/polars/functions/repeat.rb,
lib/polars/functions/whenthen.rb,
lib/polars/functions/as_datatype.rb,
lib/polars/functions/range/int_range.rb,
lib/polars/functions/range/date_range.rb,
lib/polars/functions/range/time_range.rb,
lib/polars/functions/aggregation/vertical.rb,
lib/polars/functions/range/datetime_range.rb,
lib/polars/functions/aggregation/horizontal.rb more...

Instance Method Summary collapse

#align_frames(*frames, on:, select: nil, reverse: false) ⇒ Object
Align a sequence of frames using the uique values from one or more columns as a key.
#all(*names, ignore_nulls: true) ⇒ Expr
Either return an expression representing all columns, or evaluate a bitwise AND operation.
#all_horizontal(*exprs) ⇒ Expr
Compute the bitwise AND horizontally across columns.
#any(*names, ignore_nulls: true) ⇒ Expr
Evaluate a bitwise OR operation.
#any_horizontal(*exprs) ⇒ Expr
Compute the bitwise OR horizontally across columns.
#approx_n_unique(*columns) ⇒ Expr
Approximate count of unique values.
#arctan2(y, x) ⇒ Expr
Compute two argument arctan in radians.
#arctan2d(y, x) ⇒ Expr
Compute two argument arctan in degrees.
#arg_sort_by(exprs, *more_exprs, reverse: false, nulls_last: false, multithreaded: true, maintain_order: false) ⇒ Expr (also: #argsort_by)
Find the indexes that would sort the columns.
#arg_where(condition, eager: false) ⇒ Expr, Series
Return indices where condition evaluates true.
#coalesce(exprs, *more_exprs) ⇒ Expr
Folds the columns from left to right, keeping the first non-null value.
#col(name, *more_names) ⇒ Expr
Return an expression representing a column in a DataFrame.
#collect_all(lazy_frames, type_coercion: true, predicate_pushdown: true, projection_pushdown: true, simplify_expression: true, string_cache: false, no_optimization: false, slice_pushdown: true, common_subplan_elimination: true, allow_streaming: false) ⇒ Array
Collect multiple LazyFrames at the same time.
#concat(items, rechunk: true, how: "vertical", parallel: true) ⇒ Object
Aggregate multiple Dataframes/Series to a single DataFrame/Series.
#concat_list(exprs, *more_exprs) ⇒ Expr
Concat the arrays in a Series dtype List in linear time.
#concat_str(exprs, sep: "", ignore_nulls: false) ⇒ Expr
Horizontally concat Utf8 Series in linear time.
#corr(a, b, method: "pearson", ddof: nil, propagate_nans: false) ⇒ Expr
Compute the Pearson's or Spearman rank correlation correlation between two columns.
#count(*columns) ⇒ Expr
Return the number of non-null values in the column.
#cov(a, b, ddof: 1) ⇒ Expr
Compute the covariance between two columns/ expressions.
#cum_count(*columns, reverse: false) ⇒ Expr
Return the cumulative count of the non-null values in the column.
#cum_fold(acc, f, exprs, include_init: false) ⇒ Object (also: #cumfold)
Cumulatively accumulate over multiple columns horizontally/row wise with a left fold.
#cum_sum(*names) ⇒ Expr (also: #cumsum)
Cumulatively sum all values.
#cum_sum_horizontal(*exprs) ⇒ Expr (also: #cumsum_horizontal)
Cumulatively sum all values horizontally across columns.
#date_range(start, stop, interval = "1d", closed: "both", eager: false) ⇒ Object
Create a range of type Datetime (or Date).
#date_ranges(start, stop, interval = "1d", closed: "both", eager: false) ⇒ Object
Create a column of date ranges.
#datetime_range(start, stop, interval = "1d", closed: "both", time_unit: nil, time_zone: nil, eager: false) ⇒ Object
Generate a datetime range.
#datetime_ranges(start, stop, interval: "1d", closed: "both", time_unit: nil, time_zone: nil, eager: false) ⇒ Object
Create a column of datetime ranges.
#disable_string_cache ⇒ nil
Disable and clear the global string cache.
#duration(weeks: nil, days: nil, hours: nil, minutes: nil, seconds: nil, milliseconds: nil, microseconds: nil, nanoseconds: nil, time_unit: "us") ⇒ Expr
Create polars Duration from distinct time components.
#element ⇒ Expr
Alias for an element in evaluated in an eval expression.
#enable_string_cache ⇒ nil
Enable the global string cache.
#exclude(columns) ⇒ Object
Exclude certain columns from a wildcard/regex selection.
#first(*columns) ⇒ Expr
Get the first value.
#fold(acc, f, exprs) ⇒ Expr
Accumulate over multiple columns horizontally/row wise with a left fold.
#format(f_string, *args) ⇒ Expr
Format expressions as a string.
#from_epoch(column, unit: "s", eager: false) ⇒ Object
Utility function that parses an epoch timestamp (or Unix time) to Polars Date(time).
#groups(column) ⇒ Object
Syntactic sugar for Polars.col("foo").agg_groups.
#head(column, n = 10) ⇒ Expr
Get the first n rows.
#implode(*columns) ⇒ Expr
Aggregate all column values into a list.
#int_range(start, stop = nil, step: 1, eager: false, dtype: nil) ⇒ Expr, Series (also: #arange)
Create a range expression (or Series).
#last(*columns) ⇒ Expr
Get the last value.
#len ⇒ Expr (also: #length)
Return the number of rows in the context.
#lit(value, dtype: nil, allow_object: nil) ⇒ Expr
Return an expression representing a literal value.
#max(*names) ⇒ Expr
Get the maximum value.
#max_horizontal(*exprs) ⇒ Expr
Get the maximum value horizontally across columns.
#mean(*columns) ⇒ Expr (also: #avg)
Get the mean value.
#mean_horizontal(*exprs, ignore_nulls: true) ⇒ Expr
Compute the mean of all values horizontally across columns.
#median(*columns) ⇒ Expr
Get the median value.
#min(*names) ⇒ Expr
Get the minimum value.
#min_horizontal(*exprs) ⇒ Expr
Get the minimum value horizontally across columns.
#n_unique(*columns) ⇒ Expr
Count unique values.
#nth(*indices) ⇒ Expr
Get the nth column(s) of the context.
#ones(n, dtype: nil, eager: true) ⇒ Object
Construct a column of length n filled with ones.
#quantile(column, quantile, interpolation: "nearest") ⇒ Expr
Syntactic sugar for Polars.col("foo").quantile(...).
#repeat(value, n, dtype: nil, eager: false, name: nil) ⇒ Object
Repeat a single value n times.
#select(*exprs, **named_exprs) ⇒ DataFrame
Run polars expressions without a context.
#set_random_seed(seed) ⇒ nil
Set the global random seed for Polars.
#sql_expr(sql) ⇒ Expr
Parse one or more SQL expressions to polars expression(s).
#std(column, ddof: 1) ⇒ Expr
Get the standard deviation.
#struct(*exprs, schema: nil, eager: false, **named_exprs) ⇒ Object
Collect several columns into a Series of dtype Struct.
#sum(*names) ⇒ Expr
Sum all values.
#sum_horizontal(*exprs, ignore_nulls: true) ⇒ Expr
Sum all values horizontally across columns.
#tail(column, n = 10) ⇒ Expr
Get the last n rows.
#time_range(start = nil, stop = nil, interval = "1h", closed: "both", eager: false) ⇒ Object
Generate a time range.
#time_ranges(start = nil, stop = nil, interval = "1h", closed: "both", eager: false) ⇒ Object
Create a column of time ranges.
#using_string_cache ⇒ Boolean
Check whether the global string cache is enabled.
#var(column, ddof: 1) ⇒ Expr
Get the variance.
#when(*predicates, **constraints) ⇒ When
Start a "when, then, otherwise" expression.
#zeros(n, dtype: nil, eager: true) ⇒ Object
Construct a column of length n filled with zeros.

Instance Method Details

permalink #align_frames(*frames, on:, select: nil, reverse: false) ⇒ `Object`

Align a sequence of frames using the uique values from one or more columns as a key.

Frames that do not contain the given key values have rows injected (with nulls filling the non-key columns), and each resulting frame is sorted by the key.

The original column order of input frames is not changed unless select is specified (in which case the final column order is determined from that).

Note that this does not result in a joined frame - you receive the same number of frames back that you passed in, but each is now aligned by key and has the same number of rows.

Examples:

df1 = Polars::DataFrame.new(
  {
    "dt" => [Date.new(2022, 9, 1), Date.new(2022, 9, 2), Date.new(2022, 9, 3)],
    "x" => [3.5, 4.0, 1.0],
    "y" => [10.0, 2.5, 1.5]
  }
)
df2 = Polars::DataFrame.new(
  {
    "dt" => [Date.new(2022, 9, 2), Date.new(2022, 9, 3), Date.new(2022, 9, 1)],
    "x" => [8.0, 1.0, 3.5],
    "y" => [1.5, 12.0, 5.0]
  }
)
df3 = Polars::DataFrame.new(
  {
    "dt" => [Date.new(2022, 9, 3), Date.new(2022, 9, 2)],
    "x" => [2.0, 5.0],
    "y" => [2.5, 2.0]
  }
)
af1, af2, af3 = Polars.align_frames(
  df1, df2, df3, on: "dt", select: ["x", "y"]
)
(af1 * af2 * af3).fill_null(0).select(Polars.sum_horizontal("*").alias("dot"))
# =>
# shape: (3, 1)
# ┌───────┐
# │ dot   │
# │ ---   │
# │ f64   │
# ╞═══════╡
# │ 0.0   │
# │ 167.5 │
# │ 47.0  │
# └───────┘

Parameters:

frames (Array) —
Sequence of DataFrames or LazyFrames.
on (Object) —
One or more columns whose unique values will be used to align the frames.
select (Object) (defaults to: nil) —
Optional post-alignment column select to constrain and/or order the columns returned from the newly aligned frames.
reverse (Object) (defaults to: false) —
Sort the alignment column values in descending order; can be a single boolean or a list of booleans associated with each column in on.

Returns:

Module: Polars::Functions

Instance Method Summary collapse

Instance Method Details

permalink #align_frames(*frames, on:, select: nil, reverse: false) ⇒ Object

Examples:

permalink #all(*names, ignore_nulls: true) ⇒ Expr

Examples:

Selecting all columns.

Evaluate bitwise AND for a column.

permalink #all_horizontal(*exprs) ⇒ Expr

Examples:

permalink #any(*names, ignore_nulls: true) ⇒ Expr

Examples:

permalink #any_horizontal(*exprs) ⇒ Expr

Examples:

permalink #approx_n_unique(*columns) ⇒ Expr

Examples:

permalink #arctan2(y, x) ⇒ Expr

Examples:

permalink #arctan2d(y, x) ⇒ Expr

Examples:

permalink #arg_sort_by(exprs, *more_exprs, reverse: false, nulls_last: false, multithreaded: true, maintain_order: false) ⇒ Expr Also known as: argsort_by

Examples:

Pass a single column name to compute the arg sort by that column.

Compute the arg sort by multiple columns by either passing a list of columns, or by specifying each column as a positional argument.

Use gather to apply the arg sort to other columns.

permalink #arg_where(condition, eager: false) ⇒ Expr, Series

Examples:

permalink #coalesce(exprs, *more_exprs) ⇒ Expr

Examples:

permalink #col(name, *more_names) ⇒ Expr

permalink #collect_all(lazy_frames, type_coercion: true, predicate_pushdown: true, projection_pushdown: true, simplify_expression: true, string_cache: false, no_optimization: false, slice_pushdown: true, common_subplan_elimination: true, allow_streaming: false) ⇒ Array

permalink #concat(items, rechunk: true, how: "vertical", parallel: true) ⇒ Object

Examples:

permalink #concat_list(exprs, *more_exprs) ⇒ Expr

Examples:

Concatenate two existing list columns. Null values are propagated.

Non-list columns are cast to a list before concatenation. The output data type is the supertype of the concatenated columns.

Create lagged columns and collect them into a list. This mimics a rolling window.

permalink #concat_str(exprs, sep: "", ignore_nulls: false) ⇒ Expr

Examples:

permalink #corr(a, b, method: "pearson", ddof: nil, propagate_nans: false) ⇒ Expr

Examples:

Pearson's correlation:

Spearman rank correlation:

permalink #count(*columns) ⇒ Expr

Examples:

Return the number of non-null values in multiple columns.

permalink #cov(a, b, ddof: 1) ⇒ Expr

Examples:

permalink #cum_count(*columns, reverse: false) ⇒ Expr

Examples:

permalink #cum_fold(acc, f, exprs, include_init: false) ⇒ Object Also known as: cumfold

Examples:

permalink #cum_sum(*names) ⇒ Expr Also known as: cumsum

Examples:

permalink #cum_sum_horizontal(*exprs) ⇒ Expr Also known as: cumsum_horizontal

Examples:

permalink #date_range(start, stop, interval = "1d", closed: "both", eager: false) ⇒ Object

Examples:

Using polars duration string to specify the interval

permalink #date_ranges(start, stop, interval = "1d", closed: "both", eager: false) ⇒ Object

Examples:

permalink #datetime_range(start, stop, interval = "1d", closed: "both", time_unit: nil, time_zone: nil, eager: false) ⇒ Object

Examples:

Using Polars duration string to specify the interval:

Specifying a time zone:

permalink #datetime_ranges(start, stop, interval: "1d", closed: "both", time_unit: nil, time_zone: nil, eager: false) ⇒ Object

Examples:

permalink #disable_string_cache ⇒ nil

Examples:

Construct two Series using the same global string cache.

As both Series are constructed under the same global string cache, they can be concatenated.

permalink #duration(weeks: nil, days: nil, hours: nil, minutes: nil, seconds: nil, milliseconds: nil, microseconds: nil, nanoseconds: nil, time_unit: "us") ⇒ Expr

Examples:

permalink #element ⇒ Expr

Examples:

A horizontal rank computation by taking the elements of a list

permalink #enable_string_cache ⇒ nil

Examples:

permalink #align_frames(*frames, on:, select: nil, reverse: false) ⇒ `Object`

permalink #all(*names, ignore_nulls: true) ⇒ `Expr`

permalink #all_horizontal(*exprs) ⇒ `Expr`

permalink #any(*names, ignore_nulls: true) ⇒ `Expr`

permalink #any_horizontal(*exprs) ⇒ `Expr`

permalink #approx_n_unique(*columns) ⇒ `Expr`

permalink #arctan2(y, x) ⇒ `Expr`

permalink #arctan2d(y, x) ⇒ `Expr`

permalink #arg_sort_by(exprs, *more_exprs, reverse: false, nulls_last: false, multithreaded: true, maintain_order: false) ⇒ `Expr` Also known as: argsort_by

permalink #arg_where(condition, eager: false) ⇒ `Expr`, `Series`

permalink #coalesce(exprs, *more_exprs) ⇒ `Expr`

permalink #col(name, *more_names) ⇒ `Expr`

permalink #collect_all(lazy_frames, type_coercion: true, predicate_pushdown: true, projection_pushdown: true, simplify_expression: true, string_cache: false, no_optimization: false, slice_pushdown: true, common_subplan_elimination: true, allow_streaming: false) ⇒ `Array`

permalink #concat(items, rechunk: true, how: "vertical", parallel: true) ⇒ `Object`

permalink #concat_list(exprs, *more_exprs) ⇒ `Expr`

permalink #concat_str(exprs, sep: "", ignore_nulls: false) ⇒ `Expr`

permalink #corr(a, b, method: "pearson", ddof: nil, propagate_nans: false) ⇒ `Expr`

permalink #count(*columns) ⇒ `Expr`

permalink #cov(a, b, ddof: 1) ⇒ `Expr`

permalink #cum_count(*columns, reverse: false) ⇒ `Expr`

permalink #cum_fold(acc, f, exprs, include_init: false) ⇒ `Object` Also known as: cumfold

permalink #cum_sum(*names) ⇒ `Expr` Also known as: cumsum

permalink #cum_sum_horizontal(*exprs) ⇒ `Expr` Also known as: cumsum_horizontal

permalink #date_range(start, stop, interval = "1d", closed: "both", eager: false) ⇒ `Object`

permalink #date_ranges(start, stop, interval = "1d", closed: "both", eager: false) ⇒ `Object`

permalink #datetime_range(start, stop, interval = "1d", closed: "both", time_unit: nil, time_zone: nil, eager: false) ⇒ `Object`

permalink #datetime_ranges(start, stop, interval: "1d", closed: "both", time_unit: nil, time_zone: nil, eager: false) ⇒ `Object`

permalink #disable_string_cache ⇒ `nil`

permalink #duration(weeks: nil, days: nil, hours: nil, minutes: nil, seconds: nil, milliseconds: nil, microseconds: nil, nanoseconds: nil, time_unit: "us") ⇒ `Expr`

permalink #element ⇒ `Expr`

permalink #enable_string_cache ⇒ `nil`

permalink #exclude(columns) ⇒ `Object`

permalink #first(*columns) ⇒ `Expr`

permalink #fold(acc, f, exprs) ⇒ `Expr`

permalink #format(f_string, *args) ⇒ `Expr`

permalink #from_epoch(column, unit: "s", eager: false) ⇒ `Object`

permalink #groups(column) ⇒ `Object`

permalink #head(column, n = 10) ⇒ `Expr`

permalink #implode(*columns) ⇒ `Expr`

permalink #int_range(start, stop = nil, step: 1, eager: false, dtype: nil) ⇒ `Expr`, `Series` Also known as: arange

permalink #last(*columns) ⇒ `Expr`

permalink #len ⇒ `Expr` Also known as: length

Generate an index column by using `len` in conjunction with `int_range`.

permalink #lit(value, dtype: nil, allow_object: nil) ⇒ `Expr`

permalink #max(*names) ⇒ `Expr`

permalink #max_horizontal(*exprs) ⇒ `Expr`

permalink #mean(*columns) ⇒ `Expr` Also known as: avg

permalink #mean_horizontal(*exprs, ignore_nulls: true) ⇒ `Expr`

permalink #median(*columns) ⇒ `Expr`

permalink #min(*names) ⇒ `Expr`

permalink #min_horizontal(*exprs) ⇒ `Expr`

permalink #n_unique(*columns) ⇒ `Expr`

permalink #nth(*indices) ⇒ `Expr`

permalink #ones(n, dtype: nil, eager: true) ⇒ `Object`

permalink #quantile(column, quantile, interpolation: "nearest") ⇒ `Expr`

permalink #repeat(value, n, dtype: nil, eager: false, name: nil) ⇒ `Object`

Generate a Series directly by setting `eager: true`.

permalink #select(*exprs, **named_exprs) ⇒ `DataFrame`

permalink #set_random_seed(seed) ⇒ `nil`

permalink #sql_expr(sql) ⇒ `Expr`

permalink #std(column, ddof: 1) ⇒ `Expr`

permalink #struct(*exprs, schema: nil, eager: false, **named_exprs) ⇒ `Object`

permalink #sum(*names) ⇒ `Expr`