Module: Polars
- Defined in:
- lib/polars.rb,
lib/polars/expr.rb,
lib/polars/slice.rb,
lib/polars/utils.rb,
lib/polars/config.rb,
lib/polars/io/csv.rb,
lib/polars/io/ipc.rb,
lib/polars/schema.rb,
lib/polars/series.rb,
lib/polars/catalog.rb,
lib/polars/convert.rb,
lib/polars/io/avro.rb,
lib/polars/io/json.rb,
lib/polars/testing.rb,
lib/polars/version.rb,
lib/polars/cat_expr.rb,
lib/polars/group_by.rb,
lib/polars/io/cloud.rb,
lib/polars/io/delta.rb,
lib/polars/io/utils.rb,
lib/polars/selector.rb,
lib/polars/whenthen.rb,
lib/polars/io/ndjson.rb,
lib/polars/list_expr.rb,
lib/polars/meta_expr.rb,
lib/polars/name_expr.rb,
lib/polars/selectors.rb,
lib/polars/array_expr.rb,
lib/polars/data_frame.rb,
lib/polars/data_types.rb,
lib/polars/exceptions.rb,
lib/polars/io/iceberg.rb,
lib/polars/io/parquet.rb,
lib/polars/lazy_frame.rb,
lib/polars/utils/wrap.rb,
lib/polars/binary_expr.rb,
lib/polars/io/database.rb,
lib/polars/series_plot.rb,
lib/polars/sql_context.rb,
lib/polars/string_expr.rb,
lib/polars/struct_expr.rb,
lib/polars/utils/parse.rb,
lib/polars/utils/serde.rb,
lib/polars/string_cache.rb,
lib/polars/expr_dispatch.rb,
lib/polars/functions/col.rb,
lib/polars/functions/len.rb,
lib/polars/functions/lit.rb,
lib/polars/lazy_group_by.rb,
lib/polars/utils/convert.rb,
lib/polars/utils/various.rb,
lib/polars/cat_name_space.rb,
lib/polars/data_type_expr.rb,
lib/polars/date_time_expr.rb,
lib/polars/extension_expr.rb,
lib/polars/functions/lazy.rb,
lib/polars/utils/unstable.rb,
lib/polars/collect_batches.rb,
lib/polars/data_frame_plot.rb,
lib/polars/data_type_group.rb,
lib/polars/functions/eager.rb,
lib/polars/iceberg_dataset.rb,
lib/polars/io/scan_options.rb,
lib/polars/io/sink_options.rb,
lib/polars/list_name_space.rb,
lib/polars/query_opt_flags.rb,
lib/polars/utils/constants.rb,
lib/polars/array_name_space.rb,
lib/polars/dynamic_group_by.rb,
lib/polars/functions/random.rb,
lib/polars/functions/repeat.rb,
lib/polars/in_process_query.rb,
lib/polars/rolling_group_by.rb,
lib/polars/binary_name_space.rb,
lib/polars/scan_cast_options.rb,
lib/polars/string_name_space.rb,
lib/polars/struct_name_space.rb,
lib/polars/utils/deprecation.rb,
lib/polars/batched_csv_reader.rb,
lib/polars/functions/business.rb,
lib/polars/functions/datatype.rb,
lib/polars/functions/whenthen.rb,
lib/polars/date_time_name_space.rb,
lib/polars/extension_name_space.rb,
lib/polars/functions/as_datatype.rb,
lib/polars/functions/escape_regex.rb,
lib/polars/catalog/unity/table_info.rb,
lib/polars/utils/construction/utils.rb,
lib/polars/catalog/unity/column_info.rb,
lib/polars/functions/range/int_range.rb,
lib/polars/utils/construction/series.rb,
lib/polars/catalog/unity/catalog_info.rb,
lib/polars/functions/range/date_range.rb,
lib/polars/functions/range/time_range.rb,
lib/polars/catalog/unity/namespace_info.rb,
lib/polars/functions/range/linear_space.rb,
lib/polars/utils/construction/data_frame.rb,
lib/polars/functions/aggregation/vertical.rb,
lib/polars/functions/range/datetime_range.rb,
lib/polars/functions/aggregation/horizontal.rb
Defined Under Namespace
Modules: Convert, Functions, IO, Selectors, Testing Classes: Array, ArrayExpr, ArrayNameSpace, Binary, BinaryExpr, BinaryNameSpace, Boolean, CatExpr, CatNameSpace, Catalog, Categorical, Categories, Config, DataFrame, DataFramePlot, DataType, DataTypeExpr, Date, DateTimeExpr, DateTimeNameSpace, Datetime, Decimal, Duration, DynamicGroupBy, Enum, Expr, ExtensionExpr, ExtensionNameSpace, Field, Float16, Float32, Float64, FloatType, GroupBy, InProcessQuery, Int128, Int16, Int32, Int64, Int8, IntegerType, LazyFrame, LazyGroupBy, List, ListExpr, ListNameSpace, MetaExpr, NameExpr, NestedType, Null, NumericType, Object, QueryOptFlags, RollingGroupBy, SQLContext, ScanCastOptions, Schema, Selector, Series, SeriesPlot, SignedIntegerType, String, StringCache, StringExpr, StringNameSpace, Struct, StructExpr, StructNameSpace, TemporalType, Time, UInt128, UInt16, UInt32, UInt64, UInt8, Unknown, UnsignedIntegerType
Constant Summary collapse
- SIGNED_INTEGER_DTYPES =
DataTypeGroup.new( [ Int8, Int16, Int32, Int64 ] )
- UNSIGNED_INTEGER_DTYPES =
DataTypeGroup.new( [ UInt8, UInt16, UInt32, UInt64 ] )
- INTEGER_DTYPES =
( SIGNED_INTEGER_DTYPES | UNSIGNED_INTEGER_DTYPES )
- FLOAT_DTYPES =
DataTypeGroup.new([Float32, Float64])
- NUMERIC_DTYPES =
DataTypeGroup.new( FLOAT_DTYPES + INTEGER_DTYPES | [Decimal] )
- SinkOptions =
IO::SinkOptions
- DEFAULT_QUERY_OPT_FLAGS =
QueryOptFlags.new
Class Method Summary collapse
-
.align_frames(*frames, on:, how: nil, select: nil, descending: false) ⇒ Object
extended
from Functions
Align an array of frames using the unique values from one or more columns as a key.
-
.all(*names, ignore_nulls: true) ⇒ Expr
extended
from Functions
Either return an expression representing all columns, or evaluate a bitwise AND operation.
-
.all_horizontal(*exprs) ⇒ Expr
extended
from Functions
Compute the bitwise AND horizontally across columns.
-
.any(*names, ignore_nulls: true) ⇒ Expr
extended
from Functions
Evaluate a bitwise OR operation.
-
.any_horizontal(*exprs) ⇒ Expr
extended
from Functions
Compute the bitwise OR horizontally across columns.
-
.approx_n_unique(*columns) ⇒ Expr
extended
from Functions
Approximate count of unique values.
-
.arctan2(y, x) ⇒ Expr
extended
from Functions
Compute two argument arctan in radians.
-
.arg_sort_by(exprs, *more_exprs, descending: false, nulls_last: false, multithreaded: true, maintain_order: false) ⇒ Expr
extended
from Functions
Find the indexes that would sort the columns.
-
.arg_where(condition, eager: false) ⇒ Expr, Series
extended
from Functions
Return indices where
conditionevaluatestrue. -
.build_info ⇒ Hash
Return detailed Polars build information.
-
.business_day_count(start, stop, week_mask: [true, true, true, true, true, false, false], holidays: []) ⇒ Expr
extended
from Functions
Count the number of business days between
startandend(not includingend). -
.coalesce(exprs, *more_exprs, eager: false) ⇒ Expr
extended
from Functions
Folds the columns from left to right, keeping the first non-null value.
-
.col(name, *more_names) ⇒ Expr
extended
from Functions
Return an expression representing a column in a DataFrame.
-
.collect_all(lazy_frames, optimizations: DEFAULT_QUERY_OPT_FLAGS, engine: "auto", lazy: false) ⇒ Array
extended
from Functions
Collect multiple LazyFrames at the same time.
-
.concat(items, rechunk: false, how: "vertical", parallel: true, strict: false) ⇒ Object
extended
from Functions
Aggregate multiple Dataframes/Series to a single DataFrame/Series.
-
.concat_arr(exprs, *more_exprs) ⇒ Expr
extended
from Functions
Horizontally concatenate columns into a single array column.
-
.concat_list(exprs, *more_exprs) ⇒ Expr
extended
from Functions
Concat the arrays in a Series dtype List in linear time.
-
.concat_str(exprs, *more_exprs, separator: "", ignore_nulls: false) ⇒ Expr
extended
from Functions
Horizontally concat Utf8 Series in linear time.
- .config ⇒ Object
-
.corr(a, b, method: "pearson", ddof: nil, propagate_nans: false, eager: false) ⇒ Expr
extended
from Functions
Compute the Pearson's or Spearman rank correlation correlation between two columns.
-
.count(*columns) ⇒ Expr
extended
from Functions
Return the number of non-null values in the column.
-
.cov(a, b, ddof: 1, eager: false) ⇒ Expr
extended
from Functions
Compute the covariance between two columns/ expressions.
- .cs ⇒ Object
-
.cum_count(*columns, reverse: false) ⇒ Expr
extended
from Functions
Return the cumulative count of the non-null values in the column.
-
.cum_fold(acc, exprs, returns_scalar: false, return_dtype: nil, include_init: false, &function) ⇒ Object
extended
from Functions
Cumulatively accumulate over multiple columns horizontally/row wise with a left fold.
-
.cum_reduce(exprs, returns_scalar: false, return_dtype: nil, &function) ⇒ Expr
extended
from Functions
Cumulatively reduce horizontally across columns with a left fold.
-
.cum_sum(*names) ⇒ Expr
extended
from Functions
Cumulatively sum all values.
-
.cum_sum_horizontal(*exprs) ⇒ Expr
extended
from Functions
Cumulatively sum all values horizontally across columns.
-
.date(year, month, day) ⇒ Expr
extended
from Functions
Create a Polars literal expression of type Date.
-
.date_range(start, stop, interval = "1d", closed: "both", eager: false) ⇒ Object
extended
from Functions
Create a range of type
Datetime(orDate). -
.date_ranges(start, stop, interval = "1d", closed: "both", eager: false) ⇒ Object
extended
from Functions
Create a column of date ranges.
-
.datetime(year, month, day, hour = nil, minute = nil, second = nil, microsecond = nil, time_unit: "us", time_zone: nil, ambiguous: "raise") ⇒ Expr
extended
from Functions
Create a Polars literal expression of type Datetime.
-
.datetime_range(start, stop, interval = "1d", closed: "both", time_unit: nil, time_zone: nil, eager: false) ⇒ Object
extended
from Functions
Generate a datetime range.
-
.datetime_ranges(start, stop, interval: "1d", closed: "both", time_unit: nil, time_zone: nil, eager: false) ⇒ Object
extended
from Functions
Create a column of datetime ranges.
-
.disable_string_cache ⇒ nil
extended
from Functions
Disable and clear the global string cache.
-
.dtype_of(col_or_expr) ⇒ DataTypeExpr
extended
from Functions
Get a lazily evaluated :class:
DataTypeof a column or expression. -
.duration(weeks: nil, days: nil, hours: nil, minutes: nil, seconds: nil, milliseconds: nil, microseconds: nil, nanoseconds: nil, time_unit: nil) ⇒ Expr
extended
from Functions
Create polars
Durationfrom distinct time components. -
.element ⇒ Expr
extended
from Functions
Alias for an element in evaluated in an
evalexpression. -
.enable_string_cache ⇒ nil
extended
from Functions
Enable the global string cache.
-
.escape_regex(s) ⇒ String
extended
from Functions
Escapes string regex meta characters.
-
.exclude(columns, *more_columns) ⇒ Object
extended
from Functions
Exclude certain columns from a wildcard/regex selection.
-
.field(name) ⇒ Expr
extended
from Functions
Select a field in the current
struct.with_fieldsscope. -
.first(*columns) ⇒ Expr
extended
from Functions
Get the first value.
-
.fold(acc, exprs, returns_scalar: false, return_dtype: nil, &function) ⇒ Expr
extended
from Functions
Accumulate over multiple columns horizontally/row wise with a left fold.
-
.format(f_string, *args) ⇒ Expr
extended
from Functions
Format expressions as a string.
-
.from_epoch(column, time_unit: "s") ⇒ Object
extended
from Functions
Utility function that parses an epoch timestamp (or Unix time) to Polars Date(time).
-
.from_hash(data, schema: nil, schema_overrides: nil, strict: true) ⇒ DataFrame
extended
from Convert
Construct a DataFrame from a hash of arrays.
-
.from_hashes(data, schema: nil, schema_overrides: nil, strict: true, infer_schema_length: N_INFER_DEFAULT) ⇒ DataFrame
extended
from Convert
Construct a DataFrame from an array of hashes.
-
.from_numo(data, schema: nil, schema_overrides: nil, orient: nil) ⇒ DataFrame
extended
from Convert
Construct a DataFrame from a NumPy ndarray.
-
.from_records(data, schema: nil, schema_overrides: nil, strict: true, orient: nil, infer_schema_length: N_INFER_DEFAULT) ⇒ DataFrame
extended
from Convert
Construct a DataFrame from an array of arrays.
-
.get_index_type ⇒ Object
Return the data type used for Polars indexing.
-
.groups(column) ⇒ Object
extended
from Functions
Syntactic sugar for
Polars.col("foo").agg_groups. -
.head(column, n = 10) ⇒ Expr
extended
from Functions
Get the first
nrows. -
.implode(*columns) ⇒ Expr
extended
from Functions
Aggregate all column values into a list.
-
.int_range(start = 0, stop = nil, step: 1, eager: false, dtype: Int64) ⇒ Expr, Series
(also: #arange)
extended
from Functions
Create a range expression (or Series).
-
.int_ranges(start = 0, stop = nil, step: 1, dtype: Int64, eager: false) ⇒ Expr, Series
extended
from Functions
Generate a range of integers for each row of the input columns.
-
.last(*columns) ⇒ Expr
extended
from Functions
Get the last value.
-
.len ⇒ Expr
(also: #length)
extended
from Functions
Return the number of rows in the context.
-
.linear_spaces(start, stop, num_samples, closed: "both", as_array: false, eager: false) ⇒ Expr, Series
extended
from Functions
Generate a sequence of evenly-spaced values for each row between
startandend. -
.lit(value, dtype: nil, allow_object: false) ⇒ Expr
extended
from Functions
Return an expression representing a literal value.
-
.map_batches(exprs, return_dtype: nil, is_elementwise: false, returns_scalar: false, &function) ⇒ Expr
extended
from Functions
Map a custom function over multiple columns/expressions.
-
.map_groups(exprs, return_dtype: nil, is_elementwise: false, returns_scalar: false, &function) ⇒ Expr
extended
from Functions
Apply a custom/user-defined function (UDF) in a GroupBy context.
-
.max(*names) ⇒ Expr
extended
from Functions
Get the maximum value.
-
.max_horizontal(*exprs) ⇒ Expr
extended
from Functions
Get the maximum value horizontally across columns.
-
.mean(*columns) ⇒ Expr
extended
from Functions
Get the mean value.
-
.mean_horizontal(*exprs, ignore_nulls: true) ⇒ Expr
extended
from Functions
Compute the mean of all values horizontally across columns.
-
.median(*columns) ⇒ Expr
extended
from Functions
Get the median value.
-
.min(*names) ⇒ Expr
extended
from Functions
Get the minimum value.
-
.min_horizontal(*exprs) ⇒ Expr
extended
from Functions
Get the minimum value horizontally across columns.
-
.n_unique(*columns) ⇒ Expr
extended
from Functions
Count unique values.
-
.nth(*indices, strict: true) ⇒ Expr
extended
from Functions
Get the nth column(s) of the context.
-
.ones(n, dtype: Float64, eager: false) ⇒ Object
extended
from Functions
Construct a column of length
nfilled with ones. -
.quantile(column, quantile, interpolation: "nearest") ⇒ Expr
extended
from Functions
Syntactic sugar for
Polars.col("foo").quantile(...). -
.read_avro(source, columns: nil, n_rows: nil) ⇒ DataFrame
extended
from IO
Read into a DataFrame from Apache Avro format.
-
.read_csv(source, has_header: true, columns: nil, new_columns: nil, separator: ",", comment_prefix: nil, quote_char: '"', skip_rows: 0, skip_lines: 0, schema: nil, schema_overrides: nil, null_values: nil, missing_utf8_is_empty_string: false, ignore_errors: false, try_parse_dates: false, n_threads: nil, infer_schema: true, infer_schema_length: N_INFER_DEFAULT, batch_size: 8192, n_rows: nil, encoding: "utf8", low_memory: false, rechunk: false, storage_options: nil, skip_rows_after_header: 0, row_index_name: nil, row_index_offset: 0, eol_char: "\n", raise_if_empty: true, truncate_ragged_lines: false, decimal_comma: false, glob: true) ⇒ DataFrame
extended
from IO
Read a CSV file into a DataFrame.
-
.read_csv_batched(source, has_header: true, columns: nil, new_columns: nil, separator: ",", comment_prefix: nil, quote_char: '"', skip_rows: 0, skip_lines: 0, schema_overrides: nil, null_values: nil, missing_utf8_is_empty_string: false, ignore_errors: false, try_parse_dates: false, n_threads: nil, infer_schema_length: N_INFER_DEFAULT, batch_size: 50_000, n_rows: nil, encoding: "utf8", low_memory: false, rechunk: false, skip_rows_after_header: 0, row_index_name: nil, row_index_offset: 0, eol_char: "\n", raise_if_empty: true, truncate_ragged_lines: false, decimal_comma: false) ⇒ BatchedCsvReader
extended
from IO
deprecated
Deprecated.
Use
scan_csv().collect_batchesinstead. -
.read_database(query, schema_overrides: nil) ⇒ DataFrame
extended
from IO
Read a SQL query into a DataFrame.
-
.read_delta(source, version: nil, columns: nil, rechunk: nil, storage_options: nil, delta_table_options: nil) ⇒ DataFrame
extended
from IO
Reads into a DataFrame from a Delta lake table.
-
.read_ipc(source, columns: nil, n_rows: nil, memory_map: true, storage_options: nil, row_index_name: nil, row_index_offset: 0, rechunk: true) ⇒ DataFrame
extended
from IO
Read into a DataFrame from Arrow IPC (Feather v2) file.
-
.read_ipc_schema(source) ⇒ Hash
extended
from IO
Get a schema of the IPC file without reading data.
-
.read_ipc_stream(source, columns: nil, n_rows: nil, storage_options: nil, row_index_name: nil, row_index_offset: 0, rechunk: true) ⇒ DataFrame
extended
from IO
Read into a DataFrame from Arrow IPC record batch stream.
-
.read_json(source, schema: nil, schema_overrides: nil, infer_schema_length: N_INFER_DEFAULT) ⇒ DataFrame
extended
from IO
Read into a DataFrame from a JSON file.
-
.read_ndjson(source, schema: nil, schema_overrides: nil, infer_schema_length: N_INFER_DEFAULT, batch_size: 1024, n_rows: nil, low_memory: false, rechunk: false, row_index_name: nil, row_index_offset: 0, ignore_errors: false, storage_options: nil, credential_provider: "auto", retries: nil, file_cache_ttl: nil, include_file_paths: nil) ⇒ DataFrame
extended
from IO
Read into a DataFrame from a newline delimited JSON file.
-
.read_parquet(source, columns: nil, n_rows: nil, row_index_name: nil, row_index_offset: 0, parallel: "auto", use_statistics: true, hive_partitioning: nil, glob: true, schema: nil, hive_schema: nil, try_parse_hive_dates: true, rechunk: false, low_memory: false, storage_options: nil, credential_provider: "auto", retries: nil, include_file_paths: nil, missing_columns: "raise", allow_missing_columns: nil) ⇒ DataFrame
extended
from IO
Read into a DataFrame from a parquet file.
-
.read_parquet_metadata(source, storage_options: nil, credential_provider: "auto", retries: nil) ⇒ Hash
extended
from IO
Get file-level custom metadata of a Parquet file without reading data.
-
.read_parquet_schema(source) ⇒ Schema
extended
from IO
Get a schema of the Parquet file without reading data.
-
.reduce(exprs, returns_scalar: false, return_dtype: nil, &function) ⇒ Expr
extended
from Functions
Accumulate over multiple columns horizontally/ row wise with a left fold.
-
.repeat(value, n, dtype: nil, eager: false) ⇒ Object
extended
from Functions
Repeat a single value n times.
-
.rolling_corr(a, b, window_size:, min_samples: nil, ddof: 1) ⇒ Expr
extended
from Functions
Compute the rolling correlation between two columns/ expressions.
-
.rolling_cov(a, b, window_size:, min_samples: nil, ddof: 1) ⇒ Expr
extended
from Functions
Compute the rolling covariance between two columns/ expressions.
-
.scan_csv(source, has_header: true, separator: ",", comment_prefix: nil, quote_char: '"', skip_rows: 0, skip_lines: 0, schema: nil, schema_overrides: nil, null_values: nil, missing_utf8_is_empty_string: false, ignore_errors: false, cache: true, with_column_names: nil, infer_schema: true, infer_schema_length: N_INFER_DEFAULT, n_rows: nil, encoding: "utf8", low_memory: false, rechunk: false, skip_rows_after_header: 0, row_index_name: nil, row_index_offset: 0, try_parse_dates: false, eol_char: "\n", new_columns: nil, raise_if_empty: true, truncate_ragged_lines: false, decimal_comma: false, glob: true, storage_options: nil, credential_provider: "auto", retries: nil, file_cache_ttl: nil, include_file_paths: nil) ⇒ LazyFrame
extended
from IO
Lazily read from a CSV file or multiple files via glob patterns.
-
.scan_delta(source, version: nil, storage_options: nil, delta_table_options: nil, rechunk: nil) ⇒ LazyFrame
extended
from IO
Lazily read from a Delta lake table.
-
.scan_iceberg(source, snapshot_id: nil, storage_options: nil) ⇒ LazyFrame
extended
from IO
Lazily read from an Apache Iceberg table.
-
.scan_ipc(source, n_rows: nil, cache: true, rechunk: false, row_index_name: nil, row_index_offset: 0, glob: true, storage_options: nil, credential_provider: "auto", retries: nil, file_cache_ttl: nil, hive_partitioning: nil, hive_schema: nil, try_parse_hive_dates: true, include_file_paths: nil, _record_batch_statistics: false) ⇒ LazyFrame
extended
from IO
Lazily read from an Arrow IPC (Feather v2) file or multiple files via glob patterns.
-
.scan_ndjson(source, schema: nil, schema_overrides: nil, infer_schema_length: N_INFER_DEFAULT, batch_size: 1024, n_rows: nil, low_memory: false, rechunk: false, row_index_name: nil, row_index_offset: 0, ignore_errors: false, storage_options: nil, credential_provider: "auto", retries: nil, file_cache_ttl: nil, include_file_paths: nil) ⇒ LazyFrame
extended
from IO
Lazily read from a newline delimited JSON file.
-
.scan_parquet(source, n_rows: nil, row_index_name: nil, row_index_offset: 0, parallel: "auto", use_statistics: true, hive_partitioning: nil, glob: true, hidden_file_prefix: nil, schema: nil, hive_schema: nil, try_parse_hive_dates: true, rechunk: false, low_memory: false, cache: true, storage_options: nil, credential_provider: "auto", retries: nil, include_file_paths: nil, missing_columns: "raise", allow_missing_columns: nil, extra_columns: "raise", cast_options: nil, _column_mapping: nil, _default_values: nil, _deletion_files: nil, _table_statistics: nil, _row_count: nil) ⇒ LazyFrame
extended
from IO
Lazily read from a parquet file or multiple files via glob patterns.
-
.select(*exprs, eager: true, **named_exprs) ⇒ DataFrame
extended
from Functions
Run polars expressions without a context.
-
.self_dtype ⇒ DataTypeExpr
extended
from Functions
Get the dtype of
selfinmap_elementsandmap_batches. -
.set_random_seed(seed) ⇒ nil
extended
from Functions
Set the global random seed for Polars.
-
.show_versions ⇒ nil
Print out the version of Polars and its optional dependencies.
-
.sql_expr(sql) ⇒ Expr
extended
from Functions
Parse one or more SQL expressions to polars expression(s).
-
.std(column, ddof: 1) ⇒ Expr
extended
from Functions
Get the standard deviation.
- .string_cache ⇒ Object
-
.struct(*exprs, schema: nil, eager: false, **named_exprs) ⇒ Object
extended
from Functions
Collect several columns into a Series of dtype Struct.
-
.sum(*names) ⇒ Expr
extended
from Functions
Sum all values.
-
.sum_horizontal(*exprs, ignore_nulls: true) ⇒ Expr
extended
from Functions
Sum all values horizontally across columns.
-
.tail(column, n = 10) ⇒ Expr
extended
from Functions
Get the last
nrows. -
.thread_pool_size ⇒ Integer
Return the number of threads in the Polars thread pool.
-
.time(hour = nil, minute = nil, second = nil, microsecond = nil) ⇒ Expr
extended
from Functions
Create a Polars literal expression of type Time.
-
.time_range(start = nil, stop = nil, interval = "1h", closed: "both", eager: false) ⇒ Object
extended
from Functions
Generate a time range.
-
.time_ranges(start = nil, stop = nil, interval = "1h", closed: "both", eager: false) ⇒ Object
extended
from Functions
Create a column of time ranges.
-
.union(items, how: "vertical", strict: false) ⇒ Object
extended
from Functions
Combine multiple DataFrames, LazyFrames, or Series into a single object.
-
.using_string_cache ⇒ Boolean
extended
from Functions
Check whether the global string cache is enabled.
-
.var(column, ddof: 1) ⇒ Expr
extended
from Functions
Get the variance.
-
.when(*predicates, **constraints) ⇒ When
extended
from Functions
Start a "when, then, otherwise" expression.
-
.zeros(n, dtype: Float64, eager: false) ⇒ Object
extended
from Functions
Construct a column of length
nfilled with zeros.
Class Method Details
.align_frames(*frames, on:, how: nil, select: nil, descending: false) ⇒ Object Originally defined in module Functions
Align an array of frames using the unique values from one or more columns as a key.
Frames that do not contain the given key values have rows injected (with nulls filling the non-key columns), and each resulting frame is sorted by the key.
The original column order of input frames is not changed unless select is
specified (in which case the final column order is determined from that).
Note that this does not result in a joined frame - you receive the same number of frames back that you passed in, but each is now aligned by key and has the same number of rows.
.all(*names, ignore_nulls: true) ⇒ Expr Originally defined in module Functions
Either return an expression representing all columns, or evaluate a bitwise AND operation.
If no arguments are passed, this function is syntactic sugar for col("*").
Otherwise, this function is syntactic sugar for col(names).all.
.all_horizontal(*exprs) ⇒ Expr Originally defined in module Functions
Compute the bitwise AND horizontally across columns.
.any(*names, ignore_nulls: true) ⇒ Expr Originally defined in module Functions
Evaluate a bitwise OR operation.
Syntactic sugar for col(names).any.
.any_horizontal(*exprs) ⇒ Expr Originally defined in module Functions
Compute the bitwise OR horizontally across columns.
.approx_n_unique(*columns) ⇒ Expr Originally defined in module Functions
Approximate count of unique values.
This function is syntactic sugar for col(columns).approx_n_unique, and
uses the HyperLogLog++ algorithm for cardinality estimation.
.arctan2(y, x) ⇒ Expr Originally defined in module Functions
Compute two argument arctan in radians.
Returns the angle (in radians) in the plane between the positive x-axis and the ray from the origin to (x,y).
.arg_sort_by(exprs, *more_exprs, descending: false, nulls_last: false, multithreaded: true, maintain_order: false) ⇒ Expr Originally defined in module Functions
Find the indexes that would sort the columns.
Argsort by multiple columns. The first column will be used for the ordering. If there are duplicates in the first column, the second column will be used to determine the ordering and so on.
.arg_where(condition, eager: false) ⇒ Expr, Series Originally defined in module Functions
Return indices where condition evaluates true.
.build_info ⇒ Hash
Return detailed Polars build information.
159 160 161 |
# File 'lib/polars.rb', line 159 def self.build_info {"version" => VERSION} end |
.business_day_count(start, stop, week_mask: [true, true, true, true, true, false, false], holidays: []) ⇒ Expr Originally defined in module Functions
This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.
Count the number of business days between start and end (not including end).
.coalesce(exprs, *more_exprs, eager: false) ⇒ Expr Originally defined in module Functions
Folds the columns from left to right, keeping the first non-null value.
.col(name, *more_names) ⇒ Expr Originally defined in module Functions
Return an expression representing a column in a DataFrame.
.collect_all(lazy_frames, optimizations: DEFAULT_QUERY_OPT_FLAGS, engine: "auto", lazy: false) ⇒ Array Originally defined in module Functions
Collect multiple LazyFrames at the same time.
This runs all the computation graphs in parallel on Polars threadpool.
.concat(items, rechunk: false, how: "vertical", parallel: true, strict: false) ⇒ Object Originally defined in module Functions
Aggregate multiple Dataframes/Series to a single DataFrame/Series.
.concat_arr(exprs, *more_exprs) ⇒ Expr Originally defined in module Functions
This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.
Horizontally concatenate columns into a single array column.
Non-array columns are reshaped to a unit-width array. All columns must have
a dtype of either Polars::Array.new(<DataType>, width) or Polars::<DataType>.
.concat_list(exprs, *more_exprs) ⇒ Expr Originally defined in module Functions
Concat the arrays in a Series dtype List in linear time.
.concat_str(exprs, *more_exprs, separator: "", ignore_nulls: false) ⇒ Expr Originally defined in module Functions
Horizontally concat Utf8 Series in linear time. Non-Utf8 columns are cast to Utf8.
.config ⇒ Object
531 532 533 |
# File 'lib/polars/config.rb', line 531 def self.config(...) Config.new(...) end |
.corr(a, b, method: "pearson", ddof: nil, propagate_nans: false, eager: false) ⇒ Expr Originally defined in module Functions
Compute the Pearson's or Spearman rank correlation correlation between two columns.
.count(*columns) ⇒ Expr Originally defined in module Functions
Return the number of non-null values in the column.
This function is syntactic sugar for col(columns).count.
Calling this function without any arguments returns the number of rows in the
context. This way of using the function is deprecated. Please use len
instead.
.cov(a, b, ddof: 1, eager: false) ⇒ Expr Originally defined in module Functions
Compute the covariance between two columns/ expressions.
.cs ⇒ Object
1988 1989 1990 |
# File 'lib/polars/selectors.rb', line 1988 def self.cs Selectors end |
.cum_count(*columns, reverse: false) ⇒ Expr Originally defined in module Functions
Return the cumulative count of the non-null values in the column.
This function is syntactic sugar for col(columns).cum_count.
If no arguments are passed, returns the cumulative count of a context. Rows containing null values count towards the result.
.cum_fold(acc, exprs, returns_scalar: false, return_dtype: nil, include_init: false, &function) ⇒ Object Originally defined in module Functions
If you simply want the first encountered expression as accumulator,
consider using cum_reduce.
Cumulatively accumulate over multiple columns horizontally/row wise with a left fold.
Every cumulative result is added as a separate field in a Struct column.
.cum_reduce(exprs, returns_scalar: false, return_dtype: nil, &function) ⇒ Expr Originally defined in module Functions
Cumulatively reduce horizontally across columns with a left fold.
Every cumulative result is added as a separate field in a Struct column.
.cum_sum(*names) ⇒ Expr Originally defined in module Functions
Cumulatively sum all values.
Syntactic sugar for col(names).cum_sum.
.cum_sum_horizontal(*exprs) ⇒ Expr Originally defined in module Functions
Cumulatively sum all values horizontally across columns.
.date(year, month, day) ⇒ Expr Originally defined in module Functions
Create a Polars literal expression of type Date.
.date_range(start, stop, interval = "1d", closed: "both", eager: false) ⇒ Object Originally defined in module Functions
If both low and high are passed as date types (not datetime), and the
interval granularity is no finer than 1d, the returned range is also of
type date. All other permutations return a datetime Series.
Create a range of type Datetime (or Date).
.date_ranges(start, stop, interval = "1d", closed: "both", eager: false) ⇒ Object Originally defined in module Functions
interval is created according to the following string language:
- 1ns (1 nanosecond)
- 1us (1 microsecond)
- 1ms (1 millisecond)
- 1s (1 second)
- 1m (1 minute)
- 1h (1 hour)
- 1d (1 calendar day)
- 1w (1 calendar week)
- 1mo (1 calendar month)
- 1q (1 calendar quarter)
- 1y (1 calendar year)
Or combine them: "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
By "calendar day", we mean the corresponding time on the next day (which may not be 24 hours, due to daylight savings). Similarly for "calendar week", "calendar month", "calendar quarter", and "calendar year".
Create a column of date ranges.
.datetime(year, month, day, hour = nil, minute = nil, second = nil, microsecond = nil, time_unit: "us", time_zone: nil, ambiguous: "raise") ⇒ Expr Originally defined in module Functions
Create a Polars literal expression of type Datetime.
.datetime_range(start, stop, interval = "1d", closed: "both", time_unit: nil, time_zone: nil, eager: false) ⇒ Object Originally defined in module Functions
Generate a datetime range.
.datetime_ranges(start, stop, interval: "1d", closed: "both", time_unit: nil, time_zone: nil, eager: false) ⇒ Object Originally defined in module Functions
Create a column of datetime ranges.
.disable_string_cache ⇒ nil Originally defined in module Functions
Disable and clear the global string cache.
.dtype_of(col_or_expr) ⇒ DataTypeExpr Originally defined in module Functions
This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.
Get a lazily evaluated :class:DataType of a column or expression.
.duration(weeks: nil, days: nil, hours: nil, minutes: nil, seconds: nil, milliseconds: nil, microseconds: nil, nanoseconds: nil, time_unit: nil) ⇒ Expr Originally defined in module Functions
Create polars Duration from distinct time components.
.element ⇒ Expr Originally defined in module Functions
Alias for an element in evaluated in an eval expression.
.enable_string_cache ⇒ nil Originally defined in module Functions
Enable the global string cache.
Categorical columns created under the same global string cache have
the same underlying physical value when string values are equal. This allows the
columns to be concatenated or used in a join operation, for example.
.escape_regex(s) ⇒ String Originally defined in module Functions
Escapes string regex meta characters.
.exclude(columns, *more_columns) ⇒ Object Originally defined in module Functions
Exclude certain columns from a wildcard/regex selection.
.field(name) ⇒ Expr Originally defined in module Functions
Select a field in the current struct.with_fields scope.
.fold(acc, exprs, returns_scalar: false, return_dtype: nil, &function) ⇒ Expr Originally defined in module Functions
Accumulate over multiple columns horizontally/row wise with a left fold.
.format(f_string, *args) ⇒ Expr Originally defined in module Functions
Format expressions as a string.
.from_epoch(column, time_unit: "s") ⇒ Object Originally defined in module Functions
Utility function that parses an epoch timestamp (or Unix time) to Polars Date(time).
Depending on the unit provided, this function will return a different dtype:
- time_unit: "d" returns pl.Date
- time_unit: "s" returns pl.Datetime"us"
- time_unit: "ms" returns pl.Datetime["ms"]
- time_unit: "us" returns pl.Datetime["us"]
- time_unit: "ns" returns pl.Datetime["ns"]
.from_hash(data, schema: nil, schema_overrides: nil, strict: true) ⇒ DataFrame Originally defined in module Convert
Construct a DataFrame from a hash of arrays.
This operation clones data, unless you pass in a Hash<String, Series>.
.from_hashes(data, schema: nil, schema_overrides: nil, strict: true, infer_schema_length: N_INFER_DEFAULT) ⇒ DataFrame Originally defined in module Convert
Construct a DataFrame from an array of hashes. This operation clones data.
.from_numo(data, schema: nil, schema_overrides: nil, orient: nil) ⇒ DataFrame Originally defined in module Convert
Construct a DataFrame from a NumPy ndarray. This operation clones data.
Note that this is slower than creating from columnar memory.
.from_records(data, schema: nil, schema_overrides: nil, strict: true, orient: nil, infer_schema_length: N_INFER_DEFAULT) ⇒ DataFrame Originally defined in module Convert
Construct a DataFrame from an array of arrays. This operation clones data.
Note that this is slower than creating from columnar memory.
.get_index_type ⇒ Object
Return the data type used for Polars indexing.
149 150 151 |
# File 'lib/polars.rb', line 149 def self.get_index_type Plr.get_index_type end |
.groups(column) ⇒ Object Originally defined in module Functions
Syntactic sugar for Polars.col("foo").agg_groups.
.head(column, n = 10) ⇒ Expr Originally defined in module Functions
Get the first n rows.
This function is syntactic sugar for col(column).head(n).
.implode(*columns) ⇒ Expr Originally defined in module Functions
Aggregate all column values into a list.
This function is syntactic sugar for col(name).implode.
.int_range(start = 0, stop = nil, step: 1, eager: false, dtype: Int64) ⇒ Expr, Series Also known as: arange Originally defined in module Functions
Create a range expression (or Series).
This can be used in a select, with_column, etc. Be sure that the resulting
range size is equal to the length of the DataFrame you are collecting.
.int_ranges(start = 0, stop = nil, step: 1, dtype: Int64, eager: false) ⇒ Expr, Series Originally defined in module Functions
Generate a range of integers for each row of the input columns.
.len ⇒ Expr Also known as: length Originally defined in module Functions
Return the number of rows in the context.
This is similar to COUNT(*) in SQL.
.linear_spaces(start, stop, num_samples, closed: "both", as_array: false, eager: false) ⇒ Expr, Series Originally defined in module Functions
This functionality is experimental. It may be changed at any point without it being considered a breaking change.
Generate a sequence of evenly-spaced values for each row between start and end.
The number of values in each sequence is determined by num_samples.
.lit(value, dtype: nil, allow_object: false) ⇒ Expr Originally defined in module Functions
Return an expression representing a literal value.
.map_batches(exprs, return_dtype: nil, is_elementwise: false, returns_scalar: false, &function) ⇒ Expr Originally defined in module Functions
This method is much slower than the native expressions API. Only use it if you cannot implement your logic otherwise.
A UDF passed to map_batches must be pure, meaning that it cannot modify
or depend on state other than its arguments. We may call the function
with arbitrary input data.
Map a custom function over multiple columns/expressions.
Produces a single Series result.
.map_groups(exprs, return_dtype: nil, is_elementwise: false, returns_scalar: false, &function) ⇒ Expr Originally defined in module Functions
This method is much slower than the native expressions API. Only use it if you cannot implement your logic otherwise.
Apply a custom/user-defined function (UDF) in a GroupBy context.
.max(*names) ⇒ Expr Originally defined in module Functions
Get the maximum value.
Syntactic sugar for col(names).max.
.max_horizontal(*exprs) ⇒ Expr Originally defined in module Functions
Get the maximum value horizontally across columns.
.mean(*columns) ⇒ Expr Originally defined in module Functions
Get the mean value.
This function is syntactic sugar for col(columns).mean.
.mean_horizontal(*exprs, ignore_nulls: true) ⇒ Expr Originally defined in module Functions
Compute the mean of all values horizontally across columns.
.median(*columns) ⇒ Expr Originally defined in module Functions
Get the median value.
This function is syntactic sugar for pl.col(columns).median.
.min(*names) ⇒ Expr Originally defined in module Functions
Get the minimum value.
Syntactic sugar for col(names).min.
.min_horizontal(*exprs) ⇒ Expr Originally defined in module Functions
Get the minimum value horizontally across columns.
.n_unique(*columns) ⇒ Expr Originally defined in module Functions
Count unique values.
This function is syntactic sugar for col(columns).n_unique.
.nth(*indices, strict: true) ⇒ Expr Originally defined in module Functions
Get the nth column(s) of the context.
.ones(n, dtype: Float64, eager: false) ⇒ Object Originally defined in module Functions
Construct a column of length n filled with ones.
This is syntactic sugar for the repeat function.
.quantile(column, quantile, interpolation: "nearest") ⇒ Expr Originally defined in module Functions
Syntactic sugar for Polars.col("foo").quantile(...).
.read_avro(source, columns: nil, n_rows: nil) ⇒ DataFrame Originally defined in module IO
Read into a DataFrame from Apache Avro format.
.read_csv(source, has_header: true, columns: nil, new_columns: nil, separator: ",", comment_prefix: nil, quote_char: '"', skip_rows: 0, skip_lines: 0, schema: nil, schema_overrides: nil, null_values: nil, missing_utf8_is_empty_string: false, ignore_errors: false, try_parse_dates: false, n_threads: nil, infer_schema: true, infer_schema_length: N_INFER_DEFAULT, batch_size: 8192, n_rows: nil, encoding: "utf8", low_memory: false, rechunk: false, storage_options: nil, skip_rows_after_header: 0, row_index_name: nil, row_index_offset: 0, eol_char: "\n", raise_if_empty: true, truncate_ragged_lines: false, decimal_comma: false, glob: true) ⇒ DataFrame Originally defined in module IO
This operation defaults to a rechunk operation at the end, meaning that
all data will be stored continuously in memory.
Set rechunk: false if you are benchmarking the csv-reader. A rechunk is
an expensive operation.
Read a CSV file into a DataFrame.
.read_csv_batched(source, has_header: true, columns: nil, new_columns: nil, separator: ",", comment_prefix: nil, quote_char: '"', skip_rows: 0, skip_lines: 0, schema_overrides: nil, null_values: nil, missing_utf8_is_empty_string: false, ignore_errors: false, try_parse_dates: false, n_threads: nil, infer_schema_length: N_INFER_DEFAULT, batch_size: 50_000, n_rows: nil, encoding: "utf8", low_memory: false, rechunk: false, skip_rows_after_header: 0, row_index_name: nil, row_index_offset: 0, eol_char: "\n", raise_if_empty: true, truncate_ragged_lines: false, decimal_comma: false) ⇒ BatchedCsvReader Originally defined in module IO
Use scan_csv().collect_batches instead.
Read a CSV file in batches.
Upon creation of the BatchedCsvReader,
polars will gather statistics and determine the
file chunks. After that work will only be done
if next_batches is called.
.read_database(query, schema_overrides: nil) ⇒ DataFrame Originally defined in module IO
Read a SQL query into a DataFrame.
.read_delta(source, version: nil, columns: nil, rechunk: nil, storage_options: nil, delta_table_options: nil) ⇒ DataFrame Originally defined in module IO
Reads into a DataFrame from a Delta lake table.
.read_ipc(source, columns: nil, n_rows: nil, memory_map: true, storage_options: nil, row_index_name: nil, row_index_offset: 0, rechunk: true) ⇒ DataFrame Originally defined in module IO
Read into a DataFrame from Arrow IPC (Feather v2) file.
.read_ipc_schema(source) ⇒ Hash Originally defined in module IO
Get a schema of the IPC file without reading data.
.read_ipc_stream(source, columns: nil, n_rows: nil, storage_options: nil, row_index_name: nil, row_index_offset: 0, rechunk: true) ⇒ DataFrame Originally defined in module IO
Read into a DataFrame from Arrow IPC record batch stream.
See "Streaming format" on https://arrow.apache.org/docs/python/ipc.html.
.read_json(source, schema: nil, schema_overrides: nil, infer_schema_length: N_INFER_DEFAULT) ⇒ DataFrame Originally defined in module IO
Read into a DataFrame from a JSON file.
.read_ndjson(source, schema: nil, schema_overrides: nil, infer_schema_length: N_INFER_DEFAULT, batch_size: 1024, n_rows: nil, low_memory: false, rechunk: false, row_index_name: nil, row_index_offset: 0, ignore_errors: false, storage_options: nil, credential_provider: "auto", retries: nil, file_cache_ttl: nil, include_file_paths: nil) ⇒ DataFrame Originally defined in module IO
Read into a DataFrame from a newline delimited JSON file.
.read_parquet(source, columns: nil, n_rows: nil, row_index_name: nil, row_index_offset: 0, parallel: "auto", use_statistics: true, hive_partitioning: nil, glob: true, schema: nil, hive_schema: nil, try_parse_hive_dates: true, rechunk: false, low_memory: false, storage_options: nil, credential_provider: "auto", retries: nil, include_file_paths: nil, missing_columns: "raise", allow_missing_columns: nil) ⇒ DataFrame Originally defined in module IO
Read into a DataFrame from a parquet file.
.read_parquet_metadata(source, storage_options: nil, credential_provider: "auto", retries: nil) ⇒ Hash Originally defined in module IO
This functionality is considered experimental. It may be removed or changed at any point without it being considered a breaking change.
Get file-level custom metadata of a Parquet file without reading data.
.read_parquet_schema(source) ⇒ Schema Originally defined in module IO
Get a schema of the Parquet file without reading data.
.reduce(exprs, returns_scalar: false, return_dtype: nil, &function) ⇒ Expr Originally defined in module Functions
Accumulate over multiple columns horizontally/ row wise with a left fold.
.repeat(value, n, dtype: nil, eager: false) ⇒ Object Originally defined in module Functions
Repeat a single value n times.
.rolling_corr(a, b, window_size:, min_samples: nil, ddof: 1) ⇒ Expr Originally defined in module Functions
Compute the rolling correlation between two columns/ expressions.
The window at a given row includes the row itself and the
window_size - 1 elements before it.
.rolling_cov(a, b, window_size:, min_samples: nil, ddof: 1) ⇒ Expr Originally defined in module Functions
Compute the rolling covariance between two columns/ expressions.
The window at a given row includes the row itself and the
window_size - 1 elements before it.
.scan_csv(source, has_header: true, separator: ",", comment_prefix: nil, quote_char: '"', skip_rows: 0, skip_lines: 0, schema: nil, schema_overrides: nil, null_values: nil, missing_utf8_is_empty_string: false, ignore_errors: false, cache: true, with_column_names: nil, infer_schema: true, infer_schema_length: N_INFER_DEFAULT, n_rows: nil, encoding: "utf8", low_memory: false, rechunk: false, skip_rows_after_header: 0, row_index_name: nil, row_index_offset: 0, try_parse_dates: false, eol_char: "\n", new_columns: nil, raise_if_empty: true, truncate_ragged_lines: false, decimal_comma: false, glob: true, storage_options: nil, credential_provider: "auto", retries: nil, file_cache_ttl: nil, include_file_paths: nil) ⇒ LazyFrame Originally defined in module IO
Lazily read from a CSV file or multiple files via glob patterns.
This allows the query optimizer to push down predicates and projections to the scan level, thereby potentially reducing memory overhead.
.scan_delta(source, version: nil, storage_options: nil, delta_table_options: nil, rechunk: nil) ⇒ LazyFrame Originally defined in module IO
Lazily read from a Delta lake table.
.scan_iceberg(source, snapshot_id: nil, storage_options: nil) ⇒ LazyFrame Originally defined in module IO
Lazily read from an Apache Iceberg table.
.scan_ipc(source, n_rows: nil, cache: true, rechunk: false, row_index_name: nil, row_index_offset: 0, glob: true, storage_options: nil, credential_provider: "auto", retries: nil, file_cache_ttl: nil, hive_partitioning: nil, hive_schema: nil, try_parse_hive_dates: true, include_file_paths: nil, _record_batch_statistics: false) ⇒ LazyFrame Originally defined in module IO
Lazily read from an Arrow IPC (Feather v2) file or multiple files via glob patterns.
This allows the query optimizer to push down predicates and projections to the scan level, thereby potentially reducing memory overhead.
.scan_ndjson(source, schema: nil, schema_overrides: nil, infer_schema_length: N_INFER_DEFAULT, batch_size: 1024, n_rows: nil, low_memory: false, rechunk: false, row_index_name: nil, row_index_offset: 0, ignore_errors: false, storage_options: nil, credential_provider: "auto", retries: nil, file_cache_ttl: nil, include_file_paths: nil) ⇒ LazyFrame Originally defined in module IO
Lazily read from a newline delimited JSON file.
This allows the query optimizer to push down predicates and projections to the scan level, thereby potentially reducing memory overhead.
.scan_parquet(source, n_rows: nil, row_index_name: nil, row_index_offset: 0, parallel: "auto", use_statistics: true, hive_partitioning: nil, glob: true, hidden_file_prefix: nil, schema: nil, hive_schema: nil, try_parse_hive_dates: true, rechunk: false, low_memory: false, cache: true, storage_options: nil, credential_provider: "auto", retries: nil, include_file_paths: nil, missing_columns: "raise", allow_missing_columns: nil, extra_columns: "raise", cast_options: nil, _column_mapping: nil, _default_values: nil, _deletion_files: nil, _table_statistics: nil, _row_count: nil) ⇒ LazyFrame Originally defined in module IO
Lazily read from a parquet file or multiple files via glob patterns.
This allows the query optimizer to push down predicates and projections to the scan level, thereby potentially reducing memory overhead.
.select(*exprs, eager: true, **named_exprs) ⇒ DataFrame Originally defined in module Functions
Run polars expressions without a context.
This is syntactic sugar for running df.select on an empty DataFrame.
.self_dtype ⇒ DataTypeExpr Originally defined in module Functions
This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.
Get the dtype of self in map_elements and map_batches.
.set_random_seed(seed) ⇒ nil Originally defined in module Functions
Set the global random seed for Polars.
This random seed is used to determine things such as shuffle ordering.
.show_versions ⇒ nil
Print out the version of Polars and its optional dependencies.
166 167 168 169 170 171 172 173 |
# File 'lib/polars.rb', line 166 def self.show_versions puts "--------Version info---------" puts "Polars: #{VERSION}" puts "Index type: #{get_index_type}" puts "Platform: #{RUBY_PLATFORM}" puts "Ruby: #{RUBY_VERSION}" nil end |
.sql_expr(sql) ⇒ Expr Originally defined in module Functions
Parse one or more SQL expressions to polars expression(s).
.std(column, ddof: 1) ⇒ Expr Originally defined in module Functions
Get the standard deviation.
This function is syntactic sugar for col(column).std(ddof: ddof).
.string_cache ⇒ Object
31 32 33 |
# File 'lib/polars/string_cache.rb', line 31 def self.string_cache(...) StringCache.new(...) end |
.struct(*exprs, schema: nil, eager: false, **named_exprs) ⇒ Object Originally defined in module Functions
Collect several columns into a Series of dtype Struct.
.sum(*names) ⇒ Expr Originally defined in module Functions
Sum all values.
Syntactic sugar for col(name).sum.
.sum_horizontal(*exprs, ignore_nulls: true) ⇒ Expr Originally defined in module Functions
Sum all values horizontally across columns.
.tail(column, n = 10) ⇒ Expr Originally defined in module Functions
Get the last n rows.
This function is syntactic sugar for col(column).tail(n).
.thread_pool_size ⇒ Integer
Return the number of threads in the Polars thread pool.
138 139 140 |
# File 'lib/polars.rb', line 138 def self.thread_pool_size Plr.thread_pool_size end |
.time(hour = nil, minute = nil, second = nil, microsecond = nil) ⇒ Expr Originally defined in module Functions
Create a Polars literal expression of type Time.
.time_range(start = nil, stop = nil, interval = "1h", closed: "both", eager: false) ⇒ Object Originally defined in module Functions
Generate a time range.
.time_ranges(start = nil, stop = nil, interval = "1h", closed: "both", eager: false) ⇒ Object Originally defined in module Functions
Create a column of time ranges.
.union(items, how: "vertical", strict: false) ⇒ Object Originally defined in module Functions
This function does not guarantee any specific ordering of rows in the result.
If you need predictable row ordering, use Polars.concat instead.
Combine multiple DataFrames, LazyFrames, or Series into a single object.
.using_string_cache ⇒ Boolean Originally defined in module Functions
Check whether the global string cache is enabled.
.var(column, ddof: 1) ⇒ Expr Originally defined in module Functions
Get the variance.
This function is syntactic sugar for col(column).var(ddof: ddof).
.when(*predicates, **constraints) ⇒ When Originally defined in module Functions
Start a "when, then, otherwise" expression.