Class: Polars::ArrayExpr

Inherits:
Object
  • Object
show all
Defined in:
lib/polars/array_expr.rb

Overview

Namespace for array related expressions.

Instance Method Summary collapse

Instance Method Details

#allExpr

Evaluate whether all boolean values are true for every subarray.

Examples:

df = Polars::DataFrame.new(
  {
    "a": [
      [true, true],
      [false, true],
      [false, false],
      [nil, nil],
      nil
    ]
  },
  schema: {"a" => Polars::Array.new(Polars::Boolean, 2)}
)
df.with_columns(all: Polars.col("a").arr.all)
# =>
# shape: (5, 2)
# ┌────────────────┬───────┐
# │ a              ┆ all   │
# │ ---            ┆ ---   │
# │ array[bool, 2] ┆ bool  │
# ╞════════════════╪═══════╡
# │ [true, true]   ┆ true  │
# │ [false, true]  ┆ false │
# │ [false, false] ┆ false │
# │ [null, null]   ┆ true  │
# │ null           ┆ null  │
# └────────────────┴───────┘

Returns:



202
203
204
# File 'lib/polars/array_expr.rb', line 202

def all
  Utils.wrap_expr(_rbexpr.arr_all)
end

#anyExpr

Evaluate whether any boolean value is true for every subarray.

Examples:

df = Polars::DataFrame.new(
  {
    "a": [
      [true, true],
      [false, true],
      [false, false],
      [nil, nil],
      nil
    ]
  },
  schema: {"a" => Polars::Array.new(Polars::Boolean, 2)}
)
df.with_columns(any: Polars.col("a").arr.any)
# =>
# shape: (5, 2)
# ┌────────────────┬───────┐
# │ a              ┆ any   │
# │ ---            ┆ ---   │
# │ array[bool, 2] ┆ bool  │
# ╞════════════════╪═══════╡
# │ [true, true]   ┆ true  │
# │ [false, true]  ┆ true  │
# │ [false, false] ┆ false │
# │ [null, null]   ┆ false │
# │ null           ┆ null  │
# └────────────────┴───────┘

Returns:



167
168
169
# File 'lib/polars/array_expr.rb', line 167

def any
  Utils.wrap_expr(_rbexpr.arr_any)
end

#arg_maxExpr

Retrieve the index of the maximum value in every sub-array.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [[1, 2], [2, 1]]
  },
  schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
)
df.with_columns(arg_max: Polars.col("a").arr.arg_max)
# =>
# shape: (2, 2)
# ┌───────────────┬─────────┐
# │ a             ┆ arg_max │
# │ ---           ┆ ---     │
# │ array[i64, 2] ┆ u32     │
# ╞═══════════════╪═════════╡
# │ [1, 2]        ┆ 1       │
# │ [2, 1]        ┆ 0       │
# └───────────────┴─────────┘

Returns:



324
325
326
# File 'lib/polars/array_expr.rb', line 324

def arg_max
  Utils.wrap_expr(_rbexpr.arr_arg_max)
end

#arg_minExpr

Retrieve the index of the minimal value in every sub-array.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [[1, 2], [2, 1]]
  },
  schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
)
df.with_columns(arg_min: Polars.col("a").arr.arg_min)
# =>
# shape: (2, 2)
# ┌───────────────┬─────────┐
# │ a             ┆ arg_min │
# │ ---           ┆ ---     │
# │ array[i64, 2] ┆ u32     │
# ╞═══════════════╪═════════╡
# │ [1, 2]        ┆ 0       │
# │ [2, 1]        ┆ 1       │
# └───────────────┴─────────┘

Returns:



298
299
300
# File 'lib/polars/array_expr.rb', line 298

def arg_min
  Utils.wrap_expr(_rbexpr.arr_arg_min)
end

#contains(item) ⇒ Expr

Check if sub-arrays contain the given item.

Examples:

df = Polars::DataFrame.new(
  {"a" => [["a", "b"], ["x", "y"], ["a", "c"]]},
  schema: {"a" => Polars::Array.new(Polars::String, 2)}
)
df.with_columns(contains: Polars.col("a").arr.contains("a"))
# =>
# shape: (3, 2)
# ┌───────────────┬──────────┐
# │ a             ┆ contains │
# │ ---           ┆ ---      │
# │ array[str, 2] ┆ bool     │
# ╞═══════════════╪══════════╡
# │ ["a", "b"]    ┆ true     │
# │ ["x", "y"]    ┆ false    │
# │ ["a", "c"]    ┆ true     │
# └───────────────┴──────────┘

Parameters:

  • item (Object)

    Item that will be checked for membership

Returns:



504
505
506
507
# File 'lib/polars/array_expr.rb', line 504

def contains(item)
  item = Utils.parse_into_expression(item, str_as_lit: true)
  Utils.wrap_expr(_rbexpr.arr_contains(item))
end

#count_matches(element) ⇒ Expr

Count how often the value produced by element occurs.

Examples:

df = Polars::DataFrame.new(
  {"a" => [[1, 2], [1, 1], [2, 2]]}, schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
)
df.with_columns(number_of_twos: Polars.col("a").arr.count_matches(2))
# =>
# shape: (3, 2)
# ┌───────────────┬────────────────┐
# │ a             ┆ number_of_twos │
# │ ---           ┆ ---            │
# │ array[i64, 2] ┆ u32            │
# ╞═══════════════╪════════════════╡
# │ [1, 2]        ┆ 1              │
# │ [1, 1]        ┆ 0              │
# │ [2, 2]        ┆ 2              │
# └───────────────┴────────────────┘

Parameters:

  • element (Object)

    An expression that produces a single value

Returns:



532
533
534
535
# File 'lib/polars/array_expr.rb', line 532

def count_matches(element)
  element = Utils.parse_into_expression(element, str_as_lit: true)
  Utils.wrap_expr(_rbexpr.arr_count_matches(element))
end

#explodeExpr

Returns a column with a separate row for every array element.

Examples:

df = Polars::DataFrame.new(
  {"a" => [[1, 2, 3], [4, 5, 6]]}, schema: {"a" => Polars::Array.new(Polars::Int64, 3)}
)
df.select(Polars.col("a").arr.explode)
# =>
# shape: (6, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 2   │
# │ 3   │
# │ 4   │
# │ 5   │
# │ 6   │
# └─────┘

Returns:



476
477
478
# File 'lib/polars/array_expr.rb', line 476

def explode
  Utils.wrap_expr(_rbexpr.explode)
end

#firstExpr

Get the first value of the sub-arrays.

Examples:

df = Polars::DataFrame.new(
  {"a" => [[1, 2, 3], [4, 5, 6], [7, 8, 9]]},
  schema: {"a" => Polars::Array.new(Polars::Int32, 3)}
)
df.with_columns(first: Polars.col("a").arr.first)
# =>
# shape: (3, 2)
# ┌───────────────┬───────┐
# │ a             ┆ first │
# │ ---           ┆ ---   │
# │ array[i32, 3] ┆ i32   │
# ╞═══════════════╪═══════╡
# │ [1, 2, 3]     ┆ 1     │
# │ [4, 5, 6]     ┆ 4     │
# │ [7, 8, 9]     ┆ 7     │
# └───────────────┴───────┘

Returns:



386
387
388
# File 'lib/polars/array_expr.rb', line 386

def first
  get(0)
end

#get(index, null_on_oob: true) ⇒ Expr

Get the value by index in the sub-arrays.

So index 0 would return the first item of every sublist and index -1 would return the last item of every sublist if an index is out of bounds, it will return a nil.

Examples:

df = Polars::DataFrame.new(
  {"arr" => [[1, 2, 3], [4, 5, 6], [7, 8, 9]], "idx" => [1, -2, 4]},
  schema: {"arr" => Polars::Array.new(Polars::Int32, 3), "idx" => Polars::Int32}
)
df.with_columns(get: Polars.col("arr").arr.get("idx"))
# =>
# shape: (3, 3)
# ┌───────────────┬─────┬──────┐
# │ arr           ┆ idx ┆ get  │
# │ ---           ┆ --- ┆ ---  │
# │ array[i32, 3] ┆ i32 ┆ i32  │
# ╞═══════════════╪═════╪══════╡
# │ [1, 2, 3]     ┆ 1   ┆ 2    │
# │ [4, 5, 6]     ┆ -2  ┆ 5    │
# │ [7, 8, 9]     ┆ 4   ┆ null │
# └───────────────┴─────┴──────┘

Parameters:

  • index (Integer)

    Index to return per sub-array

  • null_on_oob (Boolean) (defaults to: true)

    Behavior if an index is out of bounds: true -> set as null false -> raise an error

Returns:



360
361
362
363
# File 'lib/polars/array_expr.rb', line 360

def get(index, null_on_oob: true)
  index = Utils.parse_into_expression(index)
  Utils.wrap_expr(_rbexpr.arr_get(index, null_on_oob))
end

#join(separator, ignore_nulls: true) ⇒ Expr

Join all string items in a sub-array and place a separator between them.

This errors if inner type of array != String.

Examples:

df = Polars::DataFrame.new(
  {"s" => [["a", "b"], ["x", "y"]], "separator" => ["*", "_"]},
  schema: {
    "s" => Polars::Array.new(Polars::String, 2),
    "separator" => Polars::String
  }
)
df.with_columns(join: Polars.col("s").arr.join(Polars.col("separator")))
# =>
# shape: (2, 3)
# ┌───────────────┬───────────┬──────┐
# │ s             ┆ separator ┆ join │
# │ ---           ┆ ---       ┆ ---  │
# │ array[str, 2] ┆ str       ┆ str  │
# ╞═══════════════╪═══════════╪══════╡
# │ ["a", "b"]    ┆ *         ┆ a*b  │
# │ ["x", "y"]    ┆ _         ┆ x_y  │
# └───────────────┴───────────┴──────┘

Parameters:

  • separator (String)

    string to separate the items with

  • ignore_nulls (Boolean) (defaults to: true)

    Ignore null values (default).

    If set to false, null values will be propagated. If the sub-list contains any null values, the output is nil.

Returns:



448
449
450
451
# File 'lib/polars/array_expr.rb', line 448

def join(separator, ignore_nulls: true)
  separator = Utils.parse_into_expression(separator, str_as_lit: true)
  Utils.wrap_expr(_rbexpr.arr_join(separator, ignore_nulls))
end

#lastExpr

Get the last value of the sub-arrays.

Examples:

df = Polars::DataFrame.new(
  {"a" => [[1, 2, 3], [4, 5, 6], [7, 8, 9]]},
  schema: {"a" => Polars::Array.new(Polars::Int32, 3)}
)
df.with_columns(last: Polars.col("a").arr.last)
# =>
# shape: (3, 2)
# ┌───────────────┬──────┐
# │ a             ┆ last │
# │ ---           ┆ ---  │
# │ array[i32, 3] ┆ i32  │
# ╞═══════════════╪══════╡
# │ [1, 2, 3]     ┆ 3    │
# │ [4, 5, 6]     ┆ 6    │
# │ [7, 8, 9]     ┆ 9    │
# └───────────────┴──────┘

Returns:



411
412
413
# File 'lib/polars/array_expr.rb', line 411

def last
  get(-1)
end

#maxExpr

Compute the max values of the sub-arrays.

Examples:

df = Polars::DataFrame.new(
  {"a" => [[1, 2], [4, 3]]},
  schema: {"a" => Polars::Array.new(2, Polars::Int64)}
)
df.select(Polars.col("a").arr.max)
# =>
# shape: (2, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 2   │
# │ 4   │
# └─────┘

Returns:



56
57
58
# File 'lib/polars/array_expr.rb', line 56

def max
  Utils.wrap_expr(_rbexpr.array_max)
end

#minExpr

Compute the min values of the sub-arrays.

Examples:

df = Polars::DataFrame.new(
  {"a" => [[1, 2], [4, 3]]},
  schema: {"a" => Polars::Array.new(2, Polars::Int64)}
)
df.select(Polars.col("a").arr.min)
# =>
# shape: (2, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 3   │
# └─────┘

Returns:



32
33
34
# File 'lib/polars/array_expr.rb', line 32

def min
  Utils.wrap_expr(_rbexpr.array_min)
end

#reverseExpr

Reverse the arrays in this column.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [[3, 2, 1], [9, 1, 2]]
  },
  schema: {"a" => Polars::Array.new(Polars::Int64, 3)}
)
df.with_columns(reverse: Polars.col("a").arr.reverse)
# =>
# shape: (2, 2)
# ┌───────────────┬───────────────┐
# │ a             ┆ reverse       │
# │ ---           ┆ ---           │
# │ array[i64, 3] ┆ array[i64, 3] │
# ╞═══════════════╪═══════════════╡
# │ [3, 2, 1]     ┆ [1, 2, 3]     │
# │ [9, 1, 2]     ┆ [2, 1, 9]     │
# └───────────────┴───────────────┘

Returns:



272
273
274
# File 'lib/polars/array_expr.rb', line 272

def reverse
  Utils.wrap_expr(_rbexpr.arr_reverse)
end

#sort(descending: false, nulls_last: false) ⇒ Expr

Sort the arrays in this column.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [[3, 2, 1], [9, 1, 2]],
  },
  schema: {"a" => Polars::Array.new(Polars::Int64, 3)}
)
df.with_columns(sort: Polars.col("a").arr.sort)
# =>
# shape: (2, 2)
# ┌───────────────┬───────────────┐
# │ a             ┆ sort          │
# │ ---           ┆ ---           │
# │ array[i64, 3] ┆ array[i64, 3] │
# ╞═══════════════╪═══════════════╡
# │ [3, 2, 1]     ┆ [1, 2, 3]     │
# │ [9, 1, 2]     ┆ [1, 2, 9]     │
# └───────────────┴───────────────┘
df.with_columns(sort: Polars.col("a").arr.sort(descending: true))
# =>
# shape: (2, 2)
# ┌───────────────┬───────────────┐
# │ a             ┆ sort          │
# │ ---           ┆ ---           │
# │ array[i64, 3] ┆ array[i64, 3] │
# ╞═══════════════╪═══════════════╡
# │ [3, 2, 1]     ┆ [3, 2, 1]     │
# │ [9, 1, 2]     ┆ [9, 2, 1]     │
# └───────────────┴───────────────┘

Parameters:

  • descending (Boolean) (defaults to: false)

    Sort in descending order.

  • nulls_last (Boolean) (defaults to: false)

    Place null values last.

Returns:



246
247
248
# File 'lib/polars/array_expr.rb', line 246

def sort(descending: false, nulls_last: false)
  Utils.wrap_expr(_rbexpr.arr_sort(descending, nulls_last))
end

#sumExpr

Compute the sum values of the sub-arrays.

Examples:

df = Polars::DataFrame.new(
  {"a" => [[1, 2], [4, 3]]},
  schema: {"a" => Polars::Array.new(2, Polars::Int64)}
)
df.select(Polars.col("a").arr.sum)
# =>
# shape: (2, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 3   │
# │ 7   │
# └─────┘

Returns:



80
81
82
# File 'lib/polars/array_expr.rb', line 80

def sum
  Utils.wrap_expr(_rbexpr.array_sum)
end

#to_listExpr

Convert an Array column into a List column with the same inner data type.

Examples:

df = Polars::DataFrame.new(
  {"a" => [[1, 2], [3, 4]]},
  schema: {"a" => Polars::Array.new(Polars::Int8, 2)}
)
df.select(Polars.col("a").arr.to_list)
# =>
# shape: (2, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ list[i8] │
# ╞══════════╡
# │ [1, 2]   │
# │ [3, 4]   │
# └──────────┘

Returns:



132
133
134
# File 'lib/polars/array_expr.rb', line 132

def to_list
  Utils.wrap_expr(_rbexpr.arr_to_list)
end

#unique(maintain_order: false) ⇒ Expr

Get the unique/distinct values in the array.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [[1, 1, 2]]
  },
  schema: {"a" => Polars::Array.new(Polars::Int64, 3)}
)
df.select(Polars.col("a").arr.unique)
# =>
# shape: (1, 1)
# ┌───────────┐
# │ a         │
# │ ---       │
# │ list[i64] │
# ╞═══════════╡
# │ [1, 2]    │
# └───────────┘

Parameters:

  • maintain_order (Boolean) (defaults to: false)

    Maintain order of data. This requires more work.

Returns:



108
109
110
# File 'lib/polars/array_expr.rb', line 108

def unique(maintain_order: false)
  Utils.wrap_expr(_rbexpr.arr_unique(maintain_order))
end