Class: Polars::ListExpr

Inherits:
Object
  • Object
show all
Defined in:
lib/polars/list_expr.rb

Overview

Namespace for list related expressions.

Instance Method Summary collapse

Instance Method Details

#[](item) ⇒ Expr

Get the value by index in the sublists.

Returns:



397
398
399
# File 'lib/polars/list_expr.rb', line 397

def [](item)
  get(item)
end

#allExpr

Evaluate whether all boolean values in a list are true.

Examples:

df = Polars::DataFrame.new(
  {"a" => [[true, true], [false, true], [false, false], [nil], [], nil]}
)
df.with_columns(all: Polars.col("a").list.all)
# =>
# shape: (6, 2)
# ┌────────────────┬───────┐
# │ a              ┆ all   │
# │ ---            ┆ ---   │
# │ list[bool]     ┆ bool  │
# ╞════════════════╪═══════╡
# │ [true, true]   ┆ true  │
# │ [false, true]  ┆ false │
# │ [false, false] ┆ false │
# │ [null]         ┆ true  │
# │ []             ┆ true  │
# │ null           ┆ null  │
# └────────────────┴───────┘

Returns:



35
36
37
# File 'lib/polars/list_expr.rb', line 35

def all
  Utils.wrap_expr(_rbexpr.list_all)
end

#anyExpr

Evaluate whether any boolean value in a list is true.

Examples:

df = Polars::DataFrame.new(
  {"a" => [[true, true], [false, true], [false, false], [nil], [], nil]}
)
df.with_columns(any: Polars.col("a").list.any)
# =>
# shape: (6, 2)
# ┌────────────────┬───────┐
# │ a              ┆ any   │
# │ ---            ┆ ---   │
# │ list[bool]     ┆ bool  │
# ╞════════════════╪═══════╡
# │ [true, true]   ┆ true  │
# │ [false, true]  ┆ true  │
# │ [false, false] ┆ false │
# │ [null]         ┆ false │
# │ []             ┆ false │
# │ null           ┆ null  │
# └────────────────┴───────┘

Returns:



62
63
64
# File 'lib/polars/list_expr.rb', line 62

def any
  Utils.wrap_expr(_rbexpr.list_any)
end

#arg_maxExpr

Retrieve the index of the maximum value in every sublist.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [[1, 2], [2, 1]]
  }
)
df.select(Polars.col("a").list.arg_max)
# =>
# shape: (2, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 1   │
# │ 0   │
# └─────┘

Returns:



583
584
585
# File 'lib/polars/list_expr.rb', line 583

def arg_max
  Utils.wrap_expr(_rbexpr.list_arg_max)
end

#arg_minExpr

Retrieve the index of the minimal value in every sublist.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [[1, 2], [2, 1]]
  }
)
df.select(Polars.col("a").list.arg_min)
# =>
# shape: (2, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 0   │
# │ 1   │
# └─────┘

Returns:



558
559
560
# File 'lib/polars/list_expr.rb', line 558

def arg_min
  Utils.wrap_expr(_rbexpr.list_arg_min)
end

#concat(other) ⇒ Expr

Concat the arrays in a Series dtype List in linear time.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [["a"], ["x"]],
    "b" => [["b", "c"], ["y", "z"]]
  }
)
df.select(Polars.col("a").list.concat("b"))
# =>
# shape: (2, 1)
# ┌─────────────────┐
# │ a               │
# │ ---             │
# │ list[str]       │
# ╞═════════════════╡
# │ ["a", "b", "c"] │
# │ ["x", "y", "z"] │
# └─────────────────┘

Parameters:

  • other (Object)

    Columns to concat into a List Series

Returns:



345
346
347
348
349
350
351
352
353
354
355
356
357
358
# File 'lib/polars/list_expr.rb', line 345

def concat(other)
  if other.is_a?(::Array) && ![Expr, String, Series].any? { |c| other[0].is_a?(c) }
    return concat(Series.new([other]))
  end

  if !other.is_a?(::Array)
    other_list = [other]
  else
    other_list = other.dup
  end

  other_list.insert(0, Utils.wrap_expr(_rbexpr))
  Polars.concat_list(other_list)
end

#contains(item) ⇒ Expr

Check if sublists contain the given item.

Examples:

df = Polars::DataFrame.new({"foo" => [[3, 2, 1], [], [1, 2]]})
df.select(Polars.col("foo").list.contains(1))
# =>
# shape: (3, 1)
# ┌───────┐
# │ foo   │
# │ ---   │
# │ bool  │
# ╞═══════╡
# │ true  │
# │ false │
# │ true  │
# └───────┘

Parameters:

  • item (Object)

    Item that will be checked for membership

Returns:



504
505
506
# File 'lib/polars/list_expr.rb', line 504

def contains(item)
  Utils.wrap_expr(_rbexpr.list_contains(Utils.parse_into_expression(item)))
end

#count_matches(element) ⇒ Expr Also known as: count_match

Count how often the value produced by element occurs.

Examples:

df = Polars::DataFrame.new({"listcol" => [[0], [1], [1, 2, 3, 2], [1, 2, 1], [4, 4]]})
df.select(Polars.col("listcol").list.count_match(2).alias("number_of_twos"))
# =>
# shape: (5, 1)
# ┌────────────────┐
# │ number_of_twos │
# │ ---            │
# │ u32            │
# ╞════════════════╡
# │ 0              │
# │ 0              │
# │ 2              │
# │ 1              │
# │ 0              │
# └────────────────┘

Parameters:

  • element (Expr)

    An expression that produces a single value

Returns:



724
725
726
# File 'lib/polars/list_expr.rb', line 724

def count_matches(element)
  Utils.wrap_expr(_rbexpr.list_count_matches(Utils.parse_into_expression(element)))
end

#diff(n: 1, null_behavior: "ignore") ⇒ Expr

Calculate the n-th discrete difference of every sublist.

Examples:

s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
s.list.diff
# =>
# shape: (2,)
# Series: 'a' [list[i64]]
# [
#         [null, 1, … 1]
#         [null, -8, -1]
# ]

Parameters:

  • n (Integer) (defaults to: 1)

    Number of slots to shift.

  • null_behavior ("ignore", "drop") (defaults to: "ignore")

    How to handle null values.

Returns:



606
607
608
# File 'lib/polars/list_expr.rb', line 606

def diff(n: 1, null_behavior: "ignore")
  Utils.wrap_expr(_rbexpr.list_diff(n, null_behavior))
end

#drop_nullsExpr

Drop all null values in the list.

The original order of the remaining elements is preserved.

Examples:

df = Polars::DataFrame.new({"values" => [[nil, 1, nil, 2], [nil], [3, 4]]})
df.with_columns(drop_nulls: Polars.col("values").list.drop_nulls)
# =>
# shape: (3, 2)
# ┌────────────────┬────────────┐
# │ values         ┆ drop_nulls │
# │ ---            ┆ ---        │
# │ list[i64]      ┆ list[i64]  │
# ╞════════════════╪════════════╡
# │ [null, 1, … 2] ┆ [1, 2]     │
# │ [null]         ┆ []         │
# │ [3, 4]         ┆ [3, 4]     │
# └────────────────┴────────────┘

Returns:



108
109
110
# File 'lib/polars/list_expr.rb', line 108

def drop_nulls
  Utils.wrap_expr(_rbexpr.list_drop_nulls)
end

#eval(expr, parallel: false) ⇒ Expr

Run any polars expression against the lists' elements.

Examples:

df = Polars::DataFrame.new({"a" => [1, 8, 3], "b" => [4, 5, 2]})
df.with_column(
  Polars.concat_list(["a", "b"]).list.eval(Polars.element.rank).alias("rank")
)
# =>
# shape: (3, 3)
# ┌─────┬─────┬────────────┐
# │ a   ┆ b   ┆ rank       │
# │ --- ┆ --- ┆ ---        │
# │ i64 ┆ i64 ┆ list[f64]  │
# ╞═════╪═════╪════════════╡
# │ 1   ┆ 4   ┆ [1.0, 2.0] │
# │ 8   ┆ 5   ┆ [2.0, 1.0] │
# │ 3   ┆ 2   ┆ [2.0, 1.0] │
# └─────┴─────┴────────────┘

Parameters:

  • expr (Expr)

    Expression to run. Note that you can select an element with Polars.first, or Polars.col

  • parallel (Boolean) (defaults to: false)

    Run all expression parallel. Don't activate this blindly. Parallelism is worth it if there is enough work to do per thread.

    This likely should not be use in the group by context, because we already parallel execution per group

Returns:



787
788
789
# File 'lib/polars/list_expr.rb', line 787

def eval(expr, parallel: false)
  Utils.wrap_expr(_rbexpr.list_eval(expr._rbexpr, parallel))
end

#firstExpr

Get the first value of the sublists.

Examples:

df = Polars::DataFrame.new({"foo" => [[3, 2, 1], [], [1, 2]]})
df.select(Polars.col("foo").list.first)
# =>
# shape: (3, 1)
# ┌──────┐
# │ foo  │
# │ ---  │
# │ i64  │
# ╞══════╡
# │ 3    │
# │ null │
# │ 1    │
# └──────┘

Returns:



457
458
459
# File 'lib/polars/list_expr.rb', line 457

def first
  get(0)
end

#gather(index, null_on_oob: false) ⇒ Expr Also known as: take

Take sublists by multiple indices.

The indices may be defined in a single column, or by sublists in another column of dtype List.

Examples:

df = Polars::DataFrame.new({"a" => [[3, 2, 1], [], [1, 2, 3, 4, 5]]})
df.with_columns(gather: Polars.col("a").list.gather([0, 4], null_on_oob: true))
# =>
# shape: (3, 2)
# ┌─────────────┬──────────────┐
# │ a           ┆ gather       │
# │ ---         ┆ ---          │
# │ list[i64]   ┆ list[i64]    │
# ╞═════════════╪══════════════╡
# │ [3, 2, 1]   ┆ [3, null]    │
# │ []          ┆ [null, null] │
# │ [1, 2, … 5] ┆ [1, 5]       │
# └─────────────┴──────────────┘

Parameters:

  • index (Object)

    Indices to return per sublist

  • null_on_oob (Boolean) (defaults to: false)

    Behavior if an index is out of bounds: True -> set as null False -> raise an error Note that defaulting to raising an error is much cheaper

Returns:



430
431
432
433
434
435
436
# File 'lib/polars/list_expr.rb', line 430

def gather(index, null_on_oob: false)
  if index.is_a?(::Array)
    index = Series.new(index)
  end
  index = Utils.parse_into_expression(index, str_as_lit: false)
  Utils.wrap_expr(_rbexpr.list_gather(index, null_on_oob))
end

#get(index, null_on_oob: true) ⇒ Expr

Get the value by index in the sublists.

So index 0 would return the first item of every sublist and index -1 would return the last item of every sublist if an index is out of bounds, it will return a None.

Examples:

df = Polars::DataFrame.new({"foo" => [[3, 2, 1], [], [1, 2]]})
df.select(Polars.col("foo").list.get(0))
# =>
# shape: (3, 1)
# ┌──────┐
# │ foo  │
# │ ---  │
# │ i64  │
# ╞══════╡
# │ 3    │
# │ null │
# │ 1    │
# └──────┘

Parameters:

  • index (Integer)

    Index to return per sublist

  • null_on_oob (Boolean) (defaults to: true)

    Behavior if an index is out of bounds: true -> set as null false -> raise an error

Returns:



389
390
391
392
# File 'lib/polars/list_expr.rb', line 389

def get(index, null_on_oob: true)
  index = Utils.parse_into_expression(index)
  Utils.wrap_expr(_rbexpr.list_get(index, null_on_oob))
end

#head(n = 5) ⇒ Expr

Slice the first n values of every sublist.

Examples:

s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
s.list.head(2)
# =>
# shape: (2,)
# Series: 'a' [list[i64]]
# [
#         [1, 2]
#         [10, 2]
# ]

Parameters:

  • n (Integer) (defaults to: 5)

    Number of values to return for each sublist.

Returns:



675
676
677
# File 'lib/polars/list_expr.rb', line 675

def head(n = 5)
  slice(0, n)
end

#join(separator, ignore_nulls: true) ⇒ Expr

Join all string items in a sublist and place a separator between them.

This errors if inner type of list != :str.

Examples:

df = Polars::DataFrame.new({"s" => [["a", "b", "c"], ["x", "y"]]})
df.select(Polars.col("s").list.join(" "))
# =>
# shape: (2, 1)
# ┌───────┐
# │ s     │
# │ ---   │
# │ str   │
# ╞═══════╡
# │ a b c │
# │ x y   │
# └───────┘

Parameters:

  • separator (String)

    string to separate the items with

  • ignore_nulls (Boolean) (defaults to: true)

    Ignore null values (default).

Returns:



532
533
534
535
# File 'lib/polars/list_expr.rb', line 532

def join(separator, ignore_nulls: true)
  separator = Utils.parse_into_expression(separator, str_as_lit: true)
  Utils.wrap_expr(_rbexpr.list_join(separator, ignore_nulls))
end

#lastExpr

Get the last value of the sublists.

Examples:

df = Polars::DataFrame.new({"foo" => [[3, 2, 1], [], [1, 2]]})
df.select(Polars.col("foo").list.last)
# =>
# shape: (3, 1)
# ┌──────┐
# │ foo  │
# │ ---  │
# │ i64  │
# ╞══════╡
# │ 1    │
# │ null │
# │ 2    │
# └──────┘

Returns:



479
480
481
# File 'lib/polars/list_expr.rb', line 479

def last
  get(-1)
end

#lengthsExpr Also known as: len

Get the length of the arrays as :u32.

Examples:

df = Polars::DataFrame.new({"foo" => [1, 2], "bar" => [["a", "b"], ["c"]]})
df.select(Polars.col("bar").list.lengths)
# =>
# shape: (2, 1)
# ┌─────┐
# │ bar │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 2   │
# │ 1   │
# └─────┘

Returns:



83
84
85
# File 'lib/polars/list_expr.rb', line 83

def lengths
  Utils.wrap_expr(_rbexpr.list_len)
end

#maxExpr

Compute the max value of the lists in the array.

Examples:

df = Polars::DataFrame.new({"values" => [[1], [2, 3]]})
df.select(Polars.col("values").list.max)
# =>
# shape: (2, 1)
# ┌────────┐
# │ values │
# │ ---    │
# │ i64    │
# ╞════════╡
# │ 1      │
# │ 3      │
# └────────┘

Returns:



200
201
202
# File 'lib/polars/list_expr.rb', line 200

def max
  Utils.wrap_expr(_rbexpr.list_max)
end

#meanExpr

Compute the mean value of the lists in the array.

Examples:

df = Polars::DataFrame.new({"values" => [[1], [2, 3]]})
df.select(Polars.col("values").list.mean)
# =>
# shape: (2, 1)
# ┌────────┐
# │ values │
# │ ---    │
# │ f64    │
# ╞════════╡
# │ 1.0    │
# │ 2.5    │
# └────────┘

Returns:



242
243
244
# File 'lib/polars/list_expr.rb', line 242

def mean
  Utils.wrap_expr(_rbexpr.list_mean)
end

#minExpr

Compute the min value of the lists in the array.

Examples:

df = Polars::DataFrame.new({"values" => [[1], [2, 3]]})
df.select(Polars.col("values").list.min)
# =>
# shape: (2, 1)
# ┌────────┐
# │ values │
# │ ---    │
# │ i64    │
# ╞════════╡
# │ 1      │
# │ 2      │
# └────────┘

Returns:



221
222
223
# File 'lib/polars/list_expr.rb', line 221

def min
  Utils.wrap_expr(_rbexpr.list_min)
end

#reverseExpr

Reverse the arrays in the list.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [[3, 2, 1], [9, 1, 2]]
  }
)
df.select(Polars.col("a").list.reverse)
# =>
# shape: (2, 1)
# ┌───────────┐
# │ a         │
# │ ---       │
# │ list[i64] │
# ╞═══════════╡
# │ [1, 2, 3] │
# │ [2, 1, 9] │
# └───────────┘

Returns:



292
293
294
# File 'lib/polars/list_expr.rb', line 292

def reverse
  Utils.wrap_expr(_rbexpr.list_reverse)
end

#sample(n: nil, fraction: nil, with_replacement: false, shuffle: false, seed: nil) ⇒ Expr

Sample from this list.

Examples:

df = Polars::DataFrame.new({"values" => [[1, 2, 3], [4, 5]], "n" => [2, 1]})
df.with_columns(sample: Polars.col("values").list.sample(n: Polars.col("n"), seed: 1))
# =>
# shape: (2, 3)
# ┌───────────┬─────┬───────────┐
# │ values    ┆ n   ┆ sample    │
# │ ---       ┆ --- ┆ ---       │
# │ list[i64] ┆ i64 ┆ list[i64] │
# ╞═══════════╪═════╪═══════════╡
# │ [1, 2, 3] ┆ 2   ┆ [2, 1]    │
# │ [4, 5]    ┆ 1   ┆ [5]       │
# └───────────┴─────┴───────────┘

Parameters:

  • n (Integer) (defaults to: nil)

    Number of items to return. Cannot be used with fraction. Defaults to 1 if fraction is nil.

  • fraction (Float) (defaults to: nil)

    Fraction of items to return. Cannot be used with n.

  • with_replacement (Boolean) (defaults to: false)

    Allow values to be sampled more than once.

  • shuffle (Boolean) (defaults to: false)

    Shuffle the order of sampled data points.

  • seed (Integer) (defaults to: nil)

    Seed for the random number generator. If set to nil (default), a random seed is generated for each sample operation.

Returns:



142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
# File 'lib/polars/list_expr.rb', line 142

def sample(n: nil, fraction: nil, with_replacement: false, shuffle: false, seed: nil)
  if !n.nil? && !fraction.nil?
    msg = "cannot specify both `n` and `fraction`"
    raise ArgumentError, msg
  end

  if !fraction.nil?
    fraction = Utils.parse_into_expression(fraction)
    return Utils.wrap_expr(
      _rbexpr.list_sample_fraction(
        fraction, with_replacement, shuffle, seed
      )
    )
  end

  n = 1 if n.nil?
  n = Utils.parse_into_expression(n)
  Utils.wrap_expr(_rbexpr.list_sample_n(n, with_replacement, shuffle, seed))
end

#shift(n = 1) ⇒ Expr

Shift values by the given period.

Examples:

s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
s.list.shift
# =>
# shape: (2,)
# Series: 'a' [list[i64]]
# [
#         [null, 1, … 3]
#         [null, 10, 2]
# ]

Parameters:

  • n (Integer) (defaults to: 1)

    Number of places to shift (may be negative).

Returns:



627
628
629
630
# File 'lib/polars/list_expr.rb', line 627

def shift(n = 1)
  n = Utils.parse_into_expression(n)
  Utils.wrap_expr(_rbexpr.list_shift(n))
end

#slice(offset, length = nil) ⇒ Expr

Slice every sublist.

Examples:

s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
s.list.slice(1, 2)
# =>
# shape: (2,)
# Series: 'a' [list[i64]]
# [
#         [2, 3]
#         [2, 1]
# ]

Parameters:

  • offset (Integer)

    Start index. Negative indexing is supported.

  • length (Integer) (defaults to: nil)

    Length of the slice. If set to nil (default), the slice is taken to the end of the list.

Returns:



652
653
654
655
656
# File 'lib/polars/list_expr.rb', line 652

def slice(offset, length = nil)
  offset = Utils.parse_into_expression(offset, str_as_lit: false)
  length = Utils.parse_into_expression(length, str_as_lit: false)
  Utils.wrap_expr(_rbexpr.list_slice(offset, length))
end

#sort(reverse: false) ⇒ Expr

Sort the arrays in the list.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [[3, 2, 1], [9, 1, 2]]
  }
)
df.select(Polars.col("a").list.sort)
# =>
# shape: (2, 1)
# ┌───────────┐
# │ a         │
# │ ---       │
# │ list[i64] │
# ╞═══════════╡
# │ [1, 2, 3] │
# │ [1, 2, 9] │
# └───────────┘

Returns:



267
268
269
# File 'lib/polars/list_expr.rb', line 267

def sort(reverse: false)
  Utils.wrap_expr(_rbexpr.list_sort(reverse))
end

#sumExpr

Sum all the lists in the array.

Examples:

df = Polars::DataFrame.new({"values" => [[1], [2, 3]]})
df.select(Polars.col("values").list.sum)
# =>
# shape: (2, 1)
# ┌────────┐
# │ values │
# │ ---    │
# │ i64    │
# ╞════════╡
# │ 1      │
# │ 5      │
# └────────┘

Returns:



179
180
181
# File 'lib/polars/list_expr.rb', line 179

def sum
  Utils.wrap_expr(_rbexpr.list_sum)
end

#tail(n = 5) ⇒ Expr

Slice the last n values of every sublist.

Examples:

s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
s.list.tail(2)
# =>
# shape: (2,)
# Series: 'a' [list[i64]]
# [
#         [3, 4]
#         [2, 1]
# ]

Parameters:

  • n (Integer) (defaults to: 5)

    Number of values to return for each sublist.

Returns:



696
697
698
699
# File 'lib/polars/list_expr.rb', line 696

def tail(n = 5)
  n = Utils.parse_into_expression(n)
  Utils.wrap_expr(_rbexpr.list_tail(n))
end

#to_struct(n_field_strategy: "first_non_null", name_generator: nil) ⇒ Expr

Convert the series of type List to a series of type Struct.

Examples:

df = Polars::DataFrame.new({"a" => [[1, 2, 3], [1, 2]]})
df.select([Polars.col("a").list.to_struct])
# =>
# shape: (2, 1)
# ┌────────────┐
# │ a          │
# │ ---        │
# │ struct[3]  │
# ╞════════════╡
# │ {1,2,3}    │
# │ {1,2,null} │
# └────────────┘

Parameters:

  • n_field_strategy ("first_non_null", "max_width") (defaults to: "first_non_null")

    Strategy to determine the number of fields of the struct.

  • name_generator (Object) (defaults to: nil)

    A custom function that can be used to generate the field names. Default field names are field_0, field_1 .. field_n

Returns:

Raises:

  • (Todo)


752
753
754
755
# File 'lib/polars/list_expr.rb', line 752

def to_struct(n_field_strategy: "first_non_null", name_generator: nil)
  raise Todo if name_generator
  Utils.wrap_expr(_rbexpr.list_to_struct(n_field_strategy, name_generator, 0))
end

#unique(maintain_order: false) ⇒ Expr

Get the unique/distinct values in the list.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [[1, 1, 2]]
  }
)
df.select(Polars.col("a").list.unique)
# =>
# shape: (1, 1)
# ┌───────────┐
# │ a         │
# │ ---       │
# │ list[i64] │
# ╞═══════════╡
# │ [1, 2]    │
# └───────────┘

Returns:



316
317
318
# File 'lib/polars/list_expr.rb', line 316

def unique(maintain_order: false)
  Utils.wrap_expr(_rbexpr.list_unique(maintain_order))
end