Module: Polars::Selectors

Defined in:
lib/polars/selectors.rb

Class Method Summary collapse

Class Method Details

.allSelector

Select all columns.

Examples:

Select all columns, casting them to string:

df = Polars::DataFrame.new(
  {
    "dt" => [Date.new(1999, 12, 31), Date.new(2024, 1, 1)],
    "value" => [1_234_500, 5_000_555]
  },
  schema_overrides: {"value" => Polars::Int32}
)
df.select(Polars.cs.all.cast(Polars::String))
# =>
# shape: (2, 2)
# ┌────────────┬─────────┐
# │ dt         ┆ value   │
# │ ---        ┆ ---     │
# │ str        ┆ str     │
# ╞════════════╪═════════╡
# │ 1999-12-31 ┆ 1234500 │
# │ 2024-01-01 ┆ 5000555 │
# └────────────┴─────────┘

Select all columns except for those matching the given dtypes:

df.select(Polars.cs.all - Polars.cs.numeric)
# =>
# shape: (2, 1)
# ┌────────────┐
# │ dt         │
# │ ---        │
# │ date       │
# ╞════════════╡
# │ 1999-12-31 │
# │ 2024-01-01 │
# └────────────┘

Returns:



74
75
76
# File 'lib/polars/selectors.rb', line 74

def self.all
  Selector._from_rbselector(RbSelector.all)
end

.alpha(ascii_only: false, ignore_spaces: false) ⇒ Selector

Note:

Matching column names cannot contain any non-alphabetic characters. Note that the definition of "alphabetic" consists of all valid Unicode alphabetic characters (\p{Alphabetic}) by default; this can be changed by setting ascii_only: true.

Select all columns with alphabetic names (eg: only letters).

Examples:

df = Polars::DataFrame.new(
  {
    "no1" => [100, 200, 300],
    "café" => ["espresso", "latte", "mocha"],
    "t or f" => [true, false, nil],
    "hmm" => ["aaa", "bbb", "ccc"],
    "都市" => ["東京", "大阪", "京都"]
  }
)

Select columns with alphabetic names; note that accented characters and kanji are recognised as alphabetic here:

df.select(Polars.cs.alpha)
# =>
# shape: (3, 3)
# ┌──────────┬─────┬──────┐
# │ café     ┆ hmm ┆ 都市 │
# │ ---      ┆ --- ┆ ---  │
# │ str      ┆ str ┆ str  │
# ╞══════════╪═════╪══════╡
# │ espresso ┆ aaa ┆ 東京 │
# │ latte    ┆ bbb ┆ 大阪 │
# │ mocha    ┆ ccc ┆ 京都 │
# └──────────┴─────┴──────┘

Constrain the definition of "alphabetic" to ASCII characters only:

df.select(Polars.cs.alpha(ascii_only: true))
# =>
# shape: (3, 1)
# ┌─────┐
# │ hmm │
# │ --- │
# │ str │
# ╞═════╡
# │ aaa │
# │ bbb │
# │ ccc │
# └─────┘
df.select(Polars.cs.alpha(ascii_only: true, ignore_spaces: true))
# =>
# shape: (3, 2)
# ┌────────┬─────┐
# │ t or f ┆ hmm │
# │ ---    ┆ --- │
# │ bool   ┆ str │
# ╞════════╪═════╡
# │ true   ┆ aaa │
# │ false  ┆ bbb │
# │ null   ┆ ccc │
# └────────┴─────┘

Select all columns except for those with alphabetic names:

df.select(~Polars.cs.alpha)
# =>
# shape: (3, 2)
# ┌─────┬────────┐
# │ no1 ┆ t or f │
# │ --- ┆ ---    │
# │ i64 ┆ bool   │
# ╞═════╪════════╡
# │ 100 ┆ true   │
# │ 200 ┆ false  │
# │ 300 ┆ null   │
# └─────┴────────┘
df.select(~Polars.cs.alpha(ignore_spaces: true))
# =>
# shape: (3, 1)
# ┌─────┐
# │ no1 │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 100 │
# │ 200 │
# │ 300 │
# └─────┘

Parameters:

  • ascii_only (Boolean) (defaults to: false)

    Indicate whether to consider only ASCII alphabetic characters, or the full Unicode range of valid letters (accented, idiographic, etc).

  • ignore_spaces (Boolean) (defaults to: false)

    Indicate whether to ignore the presence of spaces in column names; if so, only the other (non-space) characters are considered.

Returns:



175
176
177
178
179
180
# File 'lib/polars/selectors.rb', line 175

def self.alpha(ascii_only: false, ignore_spaces: false)
  # note that we need to supply a pattern compatible with the *rust* regex crate
  re_alpha = ascii_only ? "a-zA-Z" : "\\p{Alphabetic}"
  re_space = ignore_spaces ? " " : ""
  Selector._from_rbselector(RbSelector.matches("^[#{re_alpha}#{re_space}]+$"))
end

.alphanumeric(ascii_only: false, ignore_spaces: false) ⇒ Selector

Note:

Matching column names cannot contain any non-alphabetic or integer characters. Note that the definition of "alphabetic" consists of all valid Unicode alphabetic characters (\p{Alphabetic}) and digit characters (\d) by default; this can be changed by setting ascii_only: true.

Select all columns with alphanumeric names (eg: only letters and the digits 0-9).

Examples:

Select columns with alphanumeric names:

df = Polars::DataFrame.new(
  {
    "1st_col" => [100, 200, 300],
    "flagged" => [true, false, true],
    "00prefix" => ["01:aa", "02:bb", "03:cc"],
    "last col" => ["x", "y", "z"]
  }
)
df.select(Polars.cs.alphanumeric)
# =>
# shape: (3, 2)
# ┌─────────┬──────────┐
# │ flagged ┆ 00prefix │
# │ ---     ┆ ---      │
# │ bool    ┆ str      │
# ╞═════════╪══════════╡
# │ true    ┆ 01:aa    │
# │ false   ┆ 02:bb    │
# │ true    ┆ 03:cc    │
# └─────────┴──────────┘
df.select(Polars.cs.alphanumeric(ignore_spaces: true))
# =>
# shape: (3, 3)
# ┌─────────┬──────────┬──────────┐
# │ flagged ┆ 00prefix ┆ last col │
# │ ---     ┆ ---      ┆ ---      │
# │ bool    ┆ str      ┆ str      │
# ╞═════════╪══════════╪══════════╡
# │ true    ┆ 01:aa    ┆ x        │
# │ false   ┆ 02:bb    ┆ y        │
# │ true    ┆ 03:cc    ┆ z        │
# └─────────┴──────────┴──────────┘

Select all columns except for those with alphanumeric names:

df.select(~Polars.cs.alphanumeric)
# =>
# shape: (3, 2)
# ┌─────────┬──────────┐
# │ 1st_col ┆ last col │
# │ ---     ┆ ---      │
# │ i64     ┆ str      │
# ╞═════════╪══════════╡
# │ 100     ┆ x        │
# │ 200     ┆ y        │
# │ 300     ┆ z        │
# └─────────┴──────────┘
df.select(~Polars.cs.alphanumeric(ignore_spaces: true))
# =>
# shape: (3, 1)
# ┌─────────┐
# │ 1st_col │
# │ ---     │
# │ i64     │
# ╞═════════╡
# │ 100     │
# │ 200     │
# │ 300     │
# └─────────┘

Parameters:

  • ascii_only (Boolean) (defaults to: false)

    Indicate whether to consider only ASCII alphabetic characters, or the full Unicode range of valid letters (accented, idiographic, etc).

  • ignore_spaces (Boolean) (defaults to: false)

    Indicate whether to ignore the presence of spaces in column names; if so, only the other (non-space) characters are considered.

Returns:



262
263
264
265
266
267
268
269
270
# File 'lib/polars/selectors.rb', line 262

def self.alphanumeric(ascii_only: false, ignore_spaces: false)
  # note that we need to supply patterns compatible with the *rust* regex crate
  re_alpha = ascii_only ? "a-zA-Z" : "\\p{Alphabetic}"
  re_digit = ascii_only ? "0-9" : "\\d"
  re_space = ignore_spaces ? " " : ""
  return Selector._from_rbselector(
    RbSelector.matches("^[#{re_alpha}#{re_digit}#{re_space}]+$")
  )
end

.array(inner = nil, width: nil) ⇒ Selector

Note:

This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.

Select all array columns.

Examples:

Select all array columns:

df = Polars::DataFrame.new(
  {
    "foo" => [["xx", "yy"], ["x", "y"]],
    "bar" => [123, 456],
    "baz" => [2.0, 5.5]
  },
  schema_overrides: {"foo" => Polars::Array.new(Polars::String, 2)}
)
df.select(Polars.cs.array)
# =>
# shape: (2, 1)
# ┌───────────────┐
# │ foo           │
# │ ---           │
# │ array[str, 2] │
# ╞═══════════════╡
# │ ["xx", "yy"]  │
# │ ["x", "y"]    │
# └───────────────┘

Select all columns except for those that are array:

df.select(~Polars.cs.array)
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ bar ┆ baz │
# │ --- ┆ --- │
# │ i64 ┆ f64 │
# ╞═════╪═════╡
# │ 123 ┆ 2.0 │
# │ 456 ┆ 5.5 │
# └─────┴─────┘

Select all array columns with a certain matching inner type:

df.select(Polars.cs.array(Polars.cs.string))
# =>
# shape: (2, 1)
# ┌───────────────┐
# │ foo           │
# │ ---           │
# │ array[str, 2] │
# ╞═══════════════╡
# │ ["xx", "yy"]  │
# │ ["x", "y"]    │
# └───────────────┘
df.select(Polars.cs.array(Polars.cs.integer))
# =>
# shape: (0, 0)
# ┌┐
# ╞╡
# └┘
df.select(Polars.cs.array(width: 2))
# =>
# shape: (2, 1)
# ┌───────────────┐
# │ foo           │
# │ ---           │
# │ array[str, 2] │
# ╞═══════════════╡
# │ ["xx", "yy"]  │
# │ ["x", "y"]    │
# └───────────────┘
df.select(Polars.cs.array(width: 3))
# =>
# shape: (0, 0)
# ┌┐
# ╞╡
# └┘

Returns:



793
794
795
796
# File 'lib/polars/selectors.rb', line 793

def self.array(inner = nil, width: nil)
  inner_s = !inner.nil? ? inner._rbselector : nil
  Selector._from_rbselector(RbSelector.array(inner_s, width))
end

.binarySelector

Select all binary columns.

Examples:

df = Polars::DataFrame.new({"a" => ["hello".b], "b" => ["world"], "c" => ["!".b], "d" => [":)"]})
# =>
# shape: (1, 4)
# ┌──────────┬───────┬────────┬─────┐
# │ a        ┆ b     ┆ c      ┆ d   │
# │ ---      ┆ ---   ┆ ---    ┆ --- │
# │ binary   ┆ str   ┆ binary ┆ str │
# ╞══════════╪═══════╪════════╪═════╡
# │ b"hello" ┆ world ┆ b"!"   ┆ :)  │
# └──────────┴───────┴────────┴─────┘

Select binary columns and export as a hash:

df.select(Polars.cs.binary).to_h(as_series: false)
# => {"a"=>["hello"], "c"=>["!"]}

Select all columns except for those that are binary:

df.select(~Polars.cs.binary).to_h(as_series: false)
# => {"b"=>["world"], "d"=>[":)"]}

Returns:



295
296
297
# File 'lib/polars/selectors.rb', line 295

def self.binary
  by_dtype([Binary])
end

.booleanSelector

Select all boolean columns.

Examples:

df = Polars::DataFrame.new({"n" => 1..4}).with_columns(n_even: Polars.col("n") % 2 == 0)
# =>
# shape: (4, 2)
# ┌─────┬────────┐
# │ n   ┆ n_even │
# │ --- ┆ ---    │
# │ i64 ┆ bool   │
# ╞═════╪════════╡
# │ 1   ┆ false  │
# │ 2   ┆ true   │
# │ 3   ┆ false  │
# │ 4   ┆ true   │
# └─────┴────────┘

Select and invert boolean columns:

df.with_columns(is_odd: Polars.cs.boolean.not_)
# =>
# shape: (4, 3)
# ┌─────┬────────┬────────┐
# │ n   ┆ n_even ┆ is_odd │
# │ --- ┆ ---    ┆ ---    │
# │ i64 ┆ bool   ┆ bool   │
# ╞═════╪════════╪════════╡
# │ 1   ┆ false  ┆ true   │
# │ 2   ┆ true   ┆ false  │
# │ 3   ┆ false  ┆ true   │
# │ 4   ┆ true   ┆ false  │
# └─────┴────────┴────────┘

Select all columns except for those that are boolean:

df.select(~Polars.cs.boolean)
# =>
# shape: (4, 1)
# ┌─────┐
# │ n   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 2   │
# │ 3   │
# │ 4   │
# └─────┘

Returns:



347
348
349
# File 'lib/polars/selectors.rb', line 347

def self.boolean
  by_dtype([Boolean])
end

.by_dtype(*dtypes) ⇒ Selector

Select all columns matching the given dtypes.

Group by string columns and sum the numeric columns: df.group_by(Polars.cs.string).agg(Polars.cs.numeric.sum).sort("other") # => # shape: (2, 2) # ┌───────┬──────────┐ # │ other ┆ value │ # │ --- ┆ --- │ # │ str ┆ i64 │ # ╞═══════╪══════════╡ # │ bar ┆ 5000555 │ # │ foo ┆ -3265500 │ # └───────┴──────────┘

Examples:

Select all columns with date or string dtypes:

df = Polars::DataFrame.new(
  {
    "dt" => [Date.new(1999, 12, 31), Date.new(2024, 1, 1), Date.new(2010, 7, 5)],
    "value" => [1_234_500, 5_000_555, -4_500_000],
    "other" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.cs.by_dtype(Polars::Date, Polars::String))
# =>
# shape: (3, 2)
# ┌────────────┬───────┐
# │ dt         ┆ other │
# │ ---        ┆ ---   │
# │ date       ┆ str   │
# ╞════════════╪═══════╡
# │ 1999-12-31 ┆ foo   │
# │ 2024-01-01 ┆ bar   │
# │ 2010-07-05 ┆ foo   │
# └────────────┴───────┘

Select all columns that are not of date or string dtype:

df.select(~Polars.cs.by_dtype(Polars::Date, Polars::String))
# =>
# shape: (3, 1)
# ┌──────────┐
# │ value    │
# │ ---      │
# │ i64      │
# ╞══════════╡
# │ 1234500  │
# │ 5000555  │
# │ -4500000 │
# └──────────┘

Returns:



402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
# File 'lib/polars/selectors.rb', line 402

def self.by_dtype(*dtypes)
  all_dtypes = []
  dtypes.each do |tp|
    if Utils.is_polars_dtype(tp) || tp.is_a?(Class)
      all_dtypes << tp
    elsif tp.is_a?(::Array)
      tp.each do |t|
        if !(Utils.is_polars_dtype(t) || t.is_a?(Class))
          msg = "invalid dtype: #{t.inspect}"
          raise TypeError, msg
        end
        all_dtypes << t
      end
    else
      msg = "invalid dtype: #{tp.inspect}"
      raise TypeError, msg
    end
  end

  Selector._by_dtype(all_dtypes)
end

.by_index(*indices, require_all: true) ⇒ Selector

Note:

Matching columns are returned in the order in which their indexes appear in the selector, not the underlying schema order.

Select all columns matching the given indices (or range objects).

Examples:

df = Polars::DataFrame.new(
  {
    "key" => ["abc"],
    **100.times.to_h { |i| ["c%02d" % i, 0.5 * i] }
  }
)
# =>
# shape: (1, 101)
# ┌─────┬─────┬─────┬─────┬───┬──────┬──────┬──────┬──────┐
# │ key ┆ c00 ┆ c01 ┆ c02 ┆ … ┆ c96  ┆ c97  ┆ c98  ┆ c99  │
# │ --- ┆ --- ┆ --- ┆ --- ┆   ┆ ---  ┆ ---  ┆ ---  ┆ ---  │
# │ str ┆ f64 ┆ f64 ┆ f64 ┆   ┆ f64  ┆ f64  ┆ f64  ┆ f64  │
# ╞═════╪═════╪═════╪═════╪═══╪══════╪══════╪══════╪══════╡
# │ abc ┆ 0.0 ┆ 0.5 ┆ 1.0 ┆ … ┆ 48.0 ┆ 48.5 ┆ 49.0 ┆ 49.5 │
# └─────┴─────┴─────┴─────┴───┴──────┴──────┴──────┴──────┘

Select columns by index ("key" column and the two first/last columns):

df.select(Polars.cs.by_index(0, 1, 2, -2, -1))
# =>
# shape: (1, 5)
# ┌─────┬─────┬─────┬──────┬──────┐
# │ key ┆ c00 ┆ c01 ┆ c98  ┆ c99  │
# │ --- ┆ --- ┆ --- ┆ ---  ┆ ---  │
# │ str ┆ f64 ┆ f64 ┆ f64  ┆ f64  │
# ╞═════╪═════╪═════╪══════╪══════╡
# │ abc ┆ 0.0 ┆ 0.5 ┆ 49.0 ┆ 49.5 │
# └─────┴─────┴─────┴──────┴──────┘

Select the "key" column and use a range object to select various columns.

df.select(Polars.cs.by_index(0, (1...101).step(20)))
# =>
# shape: (1, 6)
# ┌─────┬─────┬──────┬──────┬──────┬──────┐
# │ key ┆ c00 ┆ c20  ┆ c40  ┆ c60  ┆ c80  │
# │ --- ┆ --- ┆ ---  ┆ ---  ┆ ---  ┆ ---  │
# │ str ┆ f64 ┆ f64  ┆ f64  ┆ f64  ┆ f64  │
# ╞═════╪═════╪══════╪══════╪══════╪══════╡
# │ abc ┆ 0.0 ┆ 10.0 ┆ 20.0 ┆ 30.0 ┆ 40.0 │
# └─────┴─────┴──────┴──────┴──────┴──────┘
df.select(Polars.cs.by_index(0, (101...0).step(-25), require_all: false))
# =>
# shape: (1, 5)
# ┌─────┬──────┬──────┬──────┬─────┐
# │ key ┆ c75  ┆ c50  ┆ c25  ┆ c00 │
# │ --- ┆ ---  ┆ ---  ┆ ---  ┆ --- │
# │ str ┆ f64  ┆ f64  ┆ f64  ┆ f64 │
# ╞═════╪══════╪══════╪══════╪═════╡
# │ abc ┆ 37.5 ┆ 25.0 ┆ 12.5 ┆ 0.0 │
# └─────┴──────┴──────┴──────┴─────┘

Select all columns except for the even-indexed ones:

df.select(~Polars.cs.by_index((1...100).step(2)))
# =>
# shape: (1, 51)
# ┌─────┬─────┬─────┬─────┬───┬──────┬──────┬──────┬──────┐
# │ key ┆ c01 ┆ c03 ┆ c05 ┆ … ┆ c93  ┆ c95  ┆ c97  ┆ c99  │
# │ --- ┆ --- ┆ --- ┆ --- ┆   ┆ ---  ┆ ---  ┆ ---  ┆ ---  │
# │ str ┆ f64 ┆ f64 ┆ f64 ┆   ┆ f64  ┆ f64  ┆ f64  ┆ f64  │
# ╞═════╪═════╪═════╪═════╪═══╪══════╪══════╪══════╪══════╡
# │ abc ┆ 0.5 ┆ 1.5 ┆ 2.5 ┆ … ┆ 46.5 ┆ 47.5 ┆ 48.5 ┆ 49.5 │
# └─────┴─────┴─────┴─────┴───┴──────┴──────┴──────┴──────┘

Parameters:

  • indices (Array)

    One or more column indices (or range objects). Negative indexing is supported.

Returns:



500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
# File 'lib/polars/selectors.rb', line 500

def self.by_index(*indices, require_all: true)
  all_indices = []
  indices.each do |idx|
    if idx.is_a?(Enumerable)
      all_indices.concat(idx.to_a)
    elsif idx.is_a?(Integer)
      all_indices << idx
    else
      msg = "invalid index value: #{idx.inspect}"
      raise TypeError, msg
    end
  end

  Selector._from_rbselector(RbSelector.by_index(all_indices, require_all))
end

.by_name(*names, require_all: true) ⇒ Selector

Note:

Matching columns are returned in the order in which they are declared in the selector, not the underlying schema order.

Select all columns matching the given names.

Examples:

df = Polars::DataFrame.new(
  {
    "foo" => ["x", "y"],
    "bar" => [123, 456],
    "baz" => [2.0, 5.5],
    "zap" => [false, true]
  }
)

Select columns by name:

df.select(Polars.cs.by_name("foo", "bar"))
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ foo ┆ bar │
# │ --- ┆ --- │
# │ str ┆ i64 │
# ╞═════╪═════╡
# │ x   ┆ 123 │
# │ y   ┆ 456 │
# └─────┴─────┘

Match any of the given columns by name:

df.select(Polars.cs.by_name("baz", "moose", "foo", "bear", require_all: false))
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ baz ┆ foo │
# │ --- ┆ --- │
# │ f64 ┆ str │
# ╞═════╪═════╡
# │ 2.0 ┆ x   │
# │ 5.5 ┆ y   │
# └─────┴─────┘

Match all columns except for those given:

df.select(~Polars.cs.by_name("foo", "bar"))
# =>
# shape: (2, 2)
# ┌─────┬───────┐
# │ baz ┆ zap   │
# │ --- ┆ ---   │
# │ f64 ┆ bool  │
# ╞═════╪═══════╡
# │ 2.0 ┆ false │
# │ 5.5 ┆ true  │
# └─────┴───────┘

Parameters:

  • names (Array)

    One or more names of columns to select.

  • require_all (Boolean) (defaults to: true)

    Whether to match all names (the default) or any of the names.

Returns:



577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
# File 'lib/polars/selectors.rb', line 577

def self.by_name(*names, require_all: true)
  all_names = []
  names.each do |nm|
    if nm.is_a?(::String)
      all_names << nm
    elsif nm.is_a?(::Array)
      nm.each do |n|
        if !n.is_a?(::String)
          msg = "invalid name: #{n.inspect}"
          raise TypeError, msg
        end
        all_names << n
      end
    else
      msg = "invalid name: #{nm.inspect}"
      raise TypeError, msg
    end
  end

  Selector._by_name(all_names, strict: require_all, expand_patterns: false)
end

.categoricalSelector

Select all categorical columns.

Examples:

df = Polars::DataFrame.new(
  {
    "foo" => ["xx", "yy"],
    "bar" => [123, 456],
    "baz" => [2.0, 5.5]
  },
  schema_overrides: {"foo" => Polars::Categorical}
)

Select all categorical columns:

df.select(Polars.cs.categorical)
# =>
# shape: (2, 1)
# ┌─────┐
# │ foo │
# │ --- │
# │ cat │
# ╞═════╡
# │ xx  │
# │ yy  │
# └─────┘

Select all columns except for those that are categorical:

df.select(~Polars.cs.categorical)
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ bar ┆ baz │
# │ --- ┆ --- │
# │ i64 ┆ f64 │
# ╞═════╪═════╡
# │ 123 ┆ 2.0 │
# │ 456 ┆ 5.5 │
# └─────┴─────┘

Returns:



928
929
930
# File 'lib/polars/selectors.rb', line 928

def self.categorical
  Selector._from_rbselector(RbSelector.categorical)
end

.contains(*substring) ⇒ Selector

Select columns whose names contain the given literal substring(s).

Examples:

df = Polars::DataFrame.new(
  {
    "foo" => ["x", "y"],
    "bar" => [123, 456],
    "baz" => [2.0, 5.5],
    "zap" => [false, true]
  }
)

Select columns that contain the substring 'ba':

df.select(Polars.cs.contains("ba"))
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ bar ┆ baz │
# │ --- ┆ --- │
# │ i64 ┆ f64 │
# ╞═════╪═════╡
# │ 123 ┆ 2.0 │
# │ 456 ┆ 5.5 │
# └─────┴─────┘

Select columns that contain the substring 'ba' or the letter 'z':

df.select(Polars.cs.contains("ba", "z"))
# =>
# shape: (2, 3)
# ┌─────┬─────┬───────┐
# │ bar ┆ baz ┆ zap   │
# │ --- ┆ --- ┆ ---   │
# │ i64 ┆ f64 ┆ bool  │
# ╞═════╪═════╪═══════╡
# │ 123 ┆ 2.0 ┆ false │
# │ 456 ┆ 5.5 ┆ true  │
# └─────┴─────┴───────┘

Select all columns except for those that contain the substring 'ba':

df.select(~Polars.cs.contains("ba"))
# =>
# shape: (2, 2)
# ┌─────┬───────┐
# │ foo ┆ zap   │
# │ --- ┆ ---   │
# │ str ┆ bool  │
# ╞═════╪═══════╡
# │ x   ┆ false │
# │ y   ┆ true  │
# └─────┴───────┘

Parameters:

  • substring (Object)

    Substring(s) that matching column names should contain.

Returns:



987
988
989
990
991
992
# File 'lib/polars/selectors.rb', line 987

def self.contains(*substring)
  escaped_substring = _re_string(substring)
  raw_params = "^.*#{escaped_substring}.*$"

  Selector._from_rbselector(RbSelector.matches(raw_params))
end

.dateSelector

Select all date columns.

Examples:

df = Polars::DataFrame.new(
  {
    "dtm" => [DateTime.new(2001, 5, 7, 10, 25), DateTime.new(2031, 12, 31, 0, 30)],
    "dt" => [Date.new(1999, 12, 31), Date.new(2024, 8, 9)]
  }
)

Select all date columns:

df.select(Polars.cs.date)
# =>
# shape: (2, 1)
# ┌────────────┐
# │ dt         │
# │ ---        │
# │ date       │
# ╞════════════╡
# │ 1999-12-31 │
# │ 2024-08-09 │
# └────────────┘

Select all columns except for those that are dates:

df.select(~Polars.cs.date)
# =>
# shape: (2, 1)
# ┌─────────────────────┐
# │ dtm                 │
# │ ---                 │
# │ datetime[ns]        │
# ╞═════════════════════╡
# │ 2001-05-07 10:25:00 │
# │ 2031-12-31 00:30:00 │
# └─────────────────────┘

Returns:



1031
1032
1033
# File 'lib/polars/selectors.rb', line 1031

def self.date
  by_dtype([Date])
end

.datetime(time_unit = nil, time_zone: ["*", nil]) ⇒ Selector

Select all datetime columns, optionally filtering by time unit/zone.

Parameters:

  • time_unit ('ms', 'us', 'ns') (defaults to: nil)

    One (or more) of the allowed timeunit precision strings, "ms", "us", and "ns". Omit to select columns with any valid timeunit.

  • time_zone (String) (defaults to: ["*", nil])
    • One or more timezone strings, as defined in zoneinfo (to see valid options run import zoneinfo; zoneinfo.available_timezones() for a full list).
    • Set nil to select Datetime columns that do not have a timezone.
    • Set "" to select Datetime columns that have *any timezone.

Returns:



1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
# File 'lib/polars/selectors.rb', line 1047

def self.datetime(time_unit = nil, time_zone: ["*", nil])
  if time_unit.nil?
    time_unit_lst = ["ms", "us", "ns"]
  else
    time_unit_lst = time_unit.is_a?(::String) ? [time_unit] : time_unit.to_a
  end

  if time_zone.nil?
    time_zone_lst = [nil]
  elsif time_zone
    # TODO improve
    time_zone_lst = time_zone.to_a
  end

  Selector._from_rbselector(RbSelector.datetime(time_unit_lst, time_zone_lst))
end

.decimalSelector

Select all decimal columns.

Examples:

df = Polars::DataFrame.new(
  {
    "foo" => ["x", "y"],
    "bar" => [BigDecimal("123"), BigDecimal("456")],
    "baz" => [BigDecimal("2.0005"), BigDecimal("-50.5555")],
  },
  schema_overrides: {"baz" => Polars::Decimal.new(10, 5)}
)

Select all decimal columns:

df.select(Polars.cs.decimal)
# =>
# shape: (2, 2)
# ┌───────────────┬───────────────┐
# │ bar           ┆ baz           │
# │ ---           ┆ ---           │
# │ decimal[38,0] ┆ decimal[10,5] │
# ╞═══════════════╪═══════════════╡
# │ 123           ┆ 2.00050       │
# │ 456           ┆ -50.55550     │
# └───────────────┴───────────────┘

Select all columns except the decimal ones:


df.select(~Polars.cs.decimal)
# =>
# shape: (2, 1)
# ┌─────┐
# │ foo │
# │ --- │
# │ str │
# ╞═════╡
# │ x   │
# │ y   │
# └─────┘

Returns:



1104
1105
1106
1107
# File 'lib/polars/selectors.rb', line 1104

def self.decimal
  # TODO: allow explicit selection by scale/precision?
  Selector._from_rbselector(RbSelector.decimal)
end

.digit(ascii_only: false) ⇒ Selector

Note:

Matching column names cannot contain any non-digit characters. Note that the definition of "digit" consists of all valid Unicode digit characters (\d) by default; this can be changed by setting ascii_only: true.

Select all columns having names consisting only of digits.

Examples:

df = Polars::DataFrame.new(
  {
    "key" => ["aaa", "bbb", "aaa", "bbb", "bbb"],
    "year" => [2001, 2001, 2025, 2025, 2001],
    "value" => [-25, 100, 75, -15, -5]
  }
).pivot(
  "year",
  values: "value",
  index: "key",
  aggregate_function: "sum"
)
# =>
# shape: (2, 3)
# ┌─────┬──────┬──────┐
# │ key ┆ 2001 ┆ 2025 │
# │ --- ┆ ---  ┆ ---  │
# │ str ┆ i64  ┆ i64  │
# ╞═════╪══════╪══════╡
# │ aaa ┆ -25  ┆ 75   │
# │ bbb ┆ 95   ┆ -15  │
# └─────┴──────┴──────┘

Select columns with digit names:

df.select(Polars.cs.digit)
# =>
# shape: (2, 2)
# ┌──────┬──────┐
# │ 2001 ┆ 2025 │
# │ ---  ┆ ---  │
# │ i64  ┆ i64  │
# ╞══════╪══════╡
# │ -25  ┆ 75   │
# │ 95   ┆ -15  │
# └──────┴──────┘

Select all columns except for those with digit names:

df.select(~Polars.cs.digit)
# =>
# shape: (2, 1)
# ┌─────┐
# │ key │
# │ --- │
# │ str │
# ╞═════╡
# │ aaa │
# │ bbb │
# └─────┘

Demonstrate use of ascii_only flag (by default all valid unicode digits are considered, but this can be constrained to ascii 0-9):

df = Polars::DataFrame.new({"१९९९" => [1999], "२०७७" => [2077], "3000": [3000]})
df.select(Polars.cs.digit)
# =>
# shape: (1, 3)
# ┌──────┬──────┬──────┐
# │ १९९९ ┆ २०७७ ┆ 3000 │
# │ ---  ┆ ---  ┆ ---  │
# │ i64  ┆ i64  ┆ i64  │
# ╞══════╪══════╪══════╡
# │ 1999 ┆ 2077 ┆ 3000 │
# └──────┴──────┴──────┘
df.select(Polars.cs.digit(ascii_only: true))
# =>
# shape: (1, 1)
# ┌──────┐
# │ 3000 │
# │ ---  │
# │ i64  │
# ╞══════╡
# │ 3000 │
# └──────┘

Returns:



1192
1193
1194
1195
# File 'lib/polars/selectors.rb', line 1192

def self.digit(ascii_only: false)
  re_digit = ascii_only ? "[0-9]" : "\\d"
  Selector._from_rbselector(RbSelector.matches("^#{re_digit}+$"))
end

.duration(time_unit = nil) ⇒ Selector

Select all duration columns, optionally filtering by time unit.

Parameters:

  • time_unit ('ms', 'us', 'ns') (defaults to: nil)

    One (or more) of the allowed timeunit precision strings, "ms", "us", and "ns". Omit to select columns with any valid timeunit.

Returns:



1204
1205
1206
1207
1208
1209
1210
1211
1212
# File 'lib/polars/selectors.rb', line 1204

def self.duration(time_unit = nil)
  if time_unit.nil?
    time_unit = ["ms", "us", "ns"]
  else
    time_unit = time_unit.is_a?(::String) ? [time_unit] : time_unit.to_a
  end

  Selector._from_rbselector(RbSelector.duration(time_unit))
end

.emptySelector

Select no columns.

This is useful for composition with other selectors.

Examples:

Polars::DataFrame.new({"a" => 1, "b" => 2}).select(Polars.cs.empty)
# =>
# shape: (0, 0)
# ┌┐
# ╞╡
# └┘

Returns:



34
35
36
# File 'lib/polars/selectors.rb', line 34

def self.empty
  Selector._from_rbselector(RbSelector.empty)
end

.ends_with(*suffix) ⇒ Selector

Select columns that end with the given substring(s).

Examples:

df = Polars::DataFrame.new(
  {
    "foo" => ["x", "y"],
    "bar" => [123, 456],
    "baz" => [2.0, 5.5],
    "zap" => [false, true]
  }
)

Select columns that end with the substring 'z':

df.select(Polars.cs.ends_with("z"))
# =>
# shape: (2, 1)
# ┌─────┐
# │ baz │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 2.0 │
# │ 5.5 │
# └─────┘

Select columns that end with either the letter 'z' or 'r':

df.select(Polars.cs.ends_with("z", "r"))
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ bar ┆ baz │
# │ --- ┆ --- │
# │ i64 ┆ f64 │
# ╞═════╪═════╡
# │ 123 ┆ 2.0 │
# │ 456 ┆ 5.5 │
# └─────┴─────┘

Select all columns except for those that end with the substring 'z':

df.select(~Polars.cs.ends_with("z"))
# =>
# shape: (2, 3)
# ┌─────┬─────┬───────┐
# │ foo ┆ bar ┆ zap   │
# │ --- ┆ --- ┆ ---   │
# │ str ┆ i64 ┆ bool  │
# ╞═════╪═════╪═══════╡
# │ x   ┆ 123 ┆ false │
# │ y   ┆ 456 ┆ true  │
# └─────┴─────┴───────┘

Parameters:

  • suffix (Object)

    Substring(s) that matching column names should end with.

Returns:



1269
1270
1271
1272
1273
1274
# File 'lib/polars/selectors.rb', line 1269

def self.ends_with(*suffix)
  escaped_suffix = _re_string(suffix)
  raw_params = "^.*#{escaped_suffix}$"

  Selector._from_rbselector(RbSelector.matches(raw_params))
end

.enumSelector

Note:

This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.

Select all enum columns.

Examples:

Select all enum columns:

df = Polars::DataFrame.new(
  {
    "foo" => ["xx", "yy"],
    "bar" => [123, 456],
    "baz" => [2.0, 5.5],
  },
  schema_overrides: {"foo" => Polars::Enum.new(["xx", "yy"])}
)
df.select(Polars.cs.enum)
# =>
# shape: (2, 1)
# ┌──────┐
# │ foo  │
# │ ---  │
# │ enum │
# ╞══════╡
# │ xx   │
# │ yy   │
# └──────┘

Select all columns except for those that are enum:

df.select(~Polars.cs.enum)
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ bar ┆ baz │
# │ --- ┆ --- │
# │ i64 ┆ f64 │
# ╞═════╪═════╡
# │ 123 ┆ 2.0 │
# │ 456 ┆ 5.5 │
# └─────┴─────┘

Returns:



640
641
642
# File 'lib/polars/selectors.rb', line 640

def self.enum
  Selector._from_rbselector(RbSelector.enum_)
end

.exclude(columns, *more_columns) ⇒ Selector

Note:

If excluding a single selector it is simpler to write as ~selector instead.

Select all columns except those matching the given columns, datatypes, or selectors.

Examples:

Exclude by column name(s):

df = Polars::DataFrame.new(
  {
    "aa" => [1, 2, 3],
    "ba" => ["a", "b", nil],
    "cc" => [nil, 2.5, 1.5]
  }
)
df.select(Polars.cs.exclude("ba", "xx"))
# =>
# shape: (3, 2)
# ┌─────┬──────┐
# │ aa  ┆ cc   │
# │ --- ┆ ---  │
# │ i64 ┆ f64  │
# ╞═════╪══════╡
# │ 1   ┆ null │
# │ 2   ┆ 2.5  │
# │ 3   ┆ 1.5  │
# └─────┴──────┘

Exclude using a column name, a selector, and a dtype:

df.select(Polars.cs.exclude("aa", Polars.cs.string, Polars::UInt32))
# =>
# shape: (3, 1)
# ┌──────┐
# │ cc   │
# │ ---  │
# │ f64  │
# ╞══════╡
# │ null │
# │ 2.5  │
# │ 1.5  │
# └──────┘

Parameters:

  • columns (Object)

    One or more columns (col or name), datatypes, columns, or selectors representing the columns to exclude.

  • more_columns (Array)

    Additional columns, datatypes, or selectors to exclude, specified as positional arguments.

Returns:



1324
1325
1326
# File 'lib/polars/selectors.rb', line 1324

def self.exclude(columns, *more_columns)
  ~_combine_as_selector(columns, *more_columns)
end

.first(strict: true) ⇒ Selector

Select the first column in the current scope.

Examples:

df = Polars::DataFrame.new(
  {
    "foo" => ["x", "y"],
    "bar" => [123, 456],
    "baz" => [2.0, 5.5],
    "zap" => [0, 1]
  }
)

Select the first column:

df.select(Polars.cs.first)
# =>
# shape: (2, 1)
# ┌─────┐
# │ foo │
# │ --- │
# │ str │
# ╞═════╡
# │ x   │
# │ y   │
# └─────┘

Select everything except for the first column:

df.select(~Polars.cs.first)
# =>
# shape: (2, 3)
# ┌─────┬─────┬─────┐
# │ bar ┆ baz ┆ zap │
# │ --- ┆ --- ┆ --- │
# │ i64 ┆ f64 ┆ i64 │
# ╞═════╪═════╪═════╡
# │ 123 ┆ 2.0 ┆ 0   │
# │ 456 ┆ 5.5 ┆ 1   │
# └─────┴─────┴─────┘

Returns:



1367
1368
1369
# File 'lib/polars/selectors.rb', line 1367

def self.first(strict: true)
  Selector._from_rbselector(RbSelector.first(strict))
end

.floatSelector

Select all float columns.

Examples:

df = Polars::DataFrame.new(
  {
    "foo" => ["x", "y"],
    "bar" => [123, 456],
    "baz" => [2.0, 5.5],
    "zap" => [0.0, 1.0]
  },
  schema_overrides: {"baz" => Polars::Float32, "zap" => Polars::Float64}
)

Select all float columns:

df.select(Polars.cs.float)
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ baz ┆ zap │
# │ --- ┆ --- │
# │ f32 ┆ f64 │
# ╞═════╪═════╡
# │ 2.0 ┆ 0.0 │
# │ 5.5 ┆ 1.0 │
# └─────┴─────┘

Select all columns except for those that are float:

df.select(~Polars.cs.float)
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ foo ┆ bar │
# │ --- ┆ --- │
# │ str ┆ i64 │
# ╞═════╪═════╡
# │ x   ┆ 123 │
# │ y   ┆ 456 │
# └─────┴─────┘

Returns:



1411
1412
1413
# File 'lib/polars/selectors.rb', line 1411

def self.float
  Selector._from_rbselector(RbSelector.float)
end

.integerSelector

Select all integer columns.

Examples:

df = Polars::DataFrame.new(
  {
    "foo" => ["x", "y"],
    "bar" => [123, 456],
    "baz" => [2.0, 5.5],
    "zap" => [0, 1]
  }
)

Select all integer columns:

df.select(Polars.cs.integer)
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ bar ┆ zap │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 123 ┆ 0   │
# │ 456 ┆ 1   │
# └─────┴─────┘

Select all columns except for those that are integer:

df.select(~Polars.cs.integer)
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ foo ┆ baz │
# │ --- ┆ --- │
# │ str ┆ f64 │
# ╞═════╪═════╡
# │ x   ┆ 2.0 │
# │ y   ┆ 5.5 │
# └─────┴─────┘

Returns:



1454
1455
1456
# File 'lib/polars/selectors.rb', line 1454

def self.integer
  Selector._from_rbselector(RbSelector.integer)
end

.last(strict: true) ⇒ Selector

Select the last column in the current scope.

Examples:

df = Polars::DataFrame.new(
  {
    "foo" => ["x", "y"],
    "bar" => [123, 456],
    "baz" => [2.0, 5.5],
    "zap" => [0, 1]
  }
)

Select the last column:

df.select(Polars.cs.last)
# =>
# shape: (2, 1)
# ┌─────┐
# │ zap │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 0   │
# │ 1   │
# └─────┘

Select everything except for the last column:

df.select(~Polars.cs.last)
# =>
# shape: (2, 3)
# ┌─────┬─────┬─────┐
# │ foo ┆ bar ┆ baz │
# │ --- ┆ --- ┆ --- │
# │ str ┆ i64 ┆ f64 │
# ╞═════╪═════╪═════╡
# │ x   ┆ 123 ┆ 2.0 │
# │ y   ┆ 456 ┆ 5.5 │
# └─────┴─────┴─────┘

Returns:



1611
1612
1613
# File 'lib/polars/selectors.rb', line 1611

def self.last(strict: true)
  Selector._from_rbselector(RbSelector.last(strict))
end

.list(inner = nil) ⇒ Selector

Note:

This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.

Select all list columns.

Examples:

Select all list columns:

df = Polars::DataFrame.new(
  {
    "foo" => [["xx", "yy"], ["x"]],
    "bar" => [123, 456],
    "baz" => [2.0, 5.5]
  }
)
df.select(Polars.cs.list)
# =>
# shape: (2, 1)
# ┌──────────────┐
# │ foo          │
# │ ---          │
# │ list[str]    │
# ╞══════════════╡
# │ ["xx", "yy"] │
# │ ["x"]        │
# └──────────────┘

Select all columns except for those that are list:

df.select(~Polars.cs.list)
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ bar ┆ baz │
# │ --- ┆ --- │
# │ i64 ┆ f64 │
# ╞═════╪═════╡
# │ 123 ┆ 2.0 │
# │ 456 ┆ 5.5 │
# └─────┴─────┘

Select all list columns with a certain matching inner type:

df.select(Polars.cs.list(Polars.cs.string))
# =>
# shape: (2, 1)
# ┌──────────────┐
# │ foo          │
# │ ---          │
# │ list[str]    │
# ╞══════════════╡
# │ ["xx", "yy"] │
# │ ["x"]        │
# └──────────────┘
df.select(Polars.cs.list(Polars.cs.integer))
# =>
# shape: (0, 0)
# ┌┐
# ╞╡
# └┘

Returns:



705
706
707
708
# File 'lib/polars/selectors.rb', line 705

def self.list(inner = nil)
  inner_s = !inner.nil? ? inner._rbselector : nil
  Selector._from_rbselector(RbSelector.list(inner_s))
end

.matches(pattern) ⇒ Selector

Select all columns that match the given regex pattern.

Examples:

Match column names containing an 'a', preceded by a character that is not 'z':

df = Polars::DataFrame.new(
  {
    "foo" => ["x", "y"],
    "bar" => [123, 456],
    "baz" => [2.0, 5.5],
    "zap" => [0, 1]
  }
)
df.select(Polars.cs.matches("[^z]a"))
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ bar ┆ baz │
# │ --- ┆ --- │
# │ i64 ┆ f64 │
# ╞═════╪═════╡
# │ 123 ┆ 2.0 │
# │ 456 ┆ 5.5 │
# └─────┴─────┘

Do not match column names ending in 'R' or 'z' (case-insensitively):

df.select(~Polars.cs.matches("(?i)R|z$"))
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ foo ┆ zap │
# │ --- ┆ --- │
# │ str ┆ i64 │
# ╞═════╪═════╡
# │ x   ┆ 0   │
# │ y   ┆ 1   │
# └─────┴─────┘

Parameters:

  • pattern (String)

    A valid regular expression pattern, compatible with the regex crate.

Returns:



1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
# File 'lib/polars/selectors.rb', line 1655

def self.matches(pattern)
  if pattern == ".*"
    all
  else
    if pattern.start_with?(".*")
      pattern = pattern[2..]
    elsif pattern.end_with?(".*")
      pattern = pattern[..-3]
    end

    pfx = !pattern.start_with?("^") ? "^.*" : ""
    sfx = !pattern.end_with?("$") ? ".*$" : ""
    raw_params = "#{pfx}#{pattern}#{sfx}"

    Selector._from_rbselector(RbSelector.matches(raw_params))
  end
end

.nestedSelector

Note:

This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.

Select all nested columns.

A nested column is a list, array or struct.

Examples:

Select all nested columns:

df = Polars::DataFrame.new(
  {
    "foo" => [{"a" => "xx", "b" => "z"}, {"a" => "x", "b" => "y"}],
    "bar" => [123, 456],
    "baz" => [2.0, 5.5],
    "wow" => [[1, 2], [3]]
  }
)
df.select(Polars.cs.nested)
# =>
# shape: (2, 2)
# ┌────────────┬───────────┐
# │ foo        ┆ wow       │
# │ ---        ┆ ---       │
# │ struct[2]  ┆ list[i64] │
# ╞════════════╪═══════════╡
# │ {"xx","z"} ┆ [1, 2]    │
# │ {"x","y"}  ┆ [3]       │
# └────────────┴───────────┘

Select all columns except for those that are nested:

df.select(~Polars.cs.nested)
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ bar ┆ baz │
# │ --- ┆ --- │
# │ i64 ┆ f64 │
# ╞═════╪═════╡
# │ 123 ┆ 2.0 │
# │ 456 ┆ 5.5 │
# └─────┴─────┘

Returns:



885
886
887
# File 'lib/polars/selectors.rb', line 885

def self.nested
  Selector._from_rbselector(RbSelector.nested)
end

.numericSelector

Select all numeric columns.

Examples:

df = Polars::DataFrame.new(
  {
    "foo" => ["x", "y"],
    "bar" => [123, 456],
    "baz" => [2.0, 5.5],
    "zap" => [0, 0]
  },
  schema_overrides: {"bar" => Polars::Int16, "baz" => Polars::Float32, "zap" => Polars::UInt8},
)

Match all numeric columns:

df.select(Polars.cs.numeric)
# =>
# shape: (2, 3)
# ┌─────┬─────┬─────┐
# │ bar ┆ baz ┆ zap │
# │ --- ┆ --- ┆ --- │
# │ i16 ┆ f32 ┆ u8  │
# ╞═════╪═════╪═════╡
# │ 123 ┆ 2.0 ┆ 0   │
# │ 456 ┆ 5.5 ┆ 0   │
# └─────┴─────┴─────┘

Match all columns except for those that are numeric:

df.select(~Polars.cs.numeric)
# =>
# shape: (2, 1)
# ┌─────┐
# │ foo │
# │ --- │
# │ str │
# ╞═════╡
# │ x   │
# │ y   │
# └─────┘

Returns:



1713
1714
1715
# File 'lib/polars/selectors.rb', line 1713

def self.numeric
  Selector._from_rbselector(RbSelector.numeric)
end

.objectSelector

Select all object columns.

Examples:

df = Polars::DataFrame.new(
  {
    "idx" => [0, 1],
    "uuid_obj" => ["6be063cf-c9c6-43be-878e-e446cfd42981", "7849d8f9-2cac-48e7-96d3-63cf81c14869"],
    "uuid_str" => ["acab9fea-c05d-4b91-b639-418004a63f33", "28c65415-8b7d-4857-a4ce-300dca14b12b"]
  },
  schema_overrides: {"idx" => Polars::Int32, "uuid_obj" => Polars::Object}
)
df.select(Polars.cs.object).to_h(as_series: false)
# => {"uuid_obj"=>["6be063cf-c9c6-43be-878e-e446cfd42981", "7849d8f9-2cac-48e7-96d3-63cf81c14869"]}

Returns:



1732
1733
1734
# File 'lib/polars/selectors.rb', line 1732

def self.object
  Selector._from_rbselector(RbSelector.object)
end

.signed_integerSelector

Select all signed integer columns.

Examples:

df = Polars::DataFrame.new(
  {
    "foo" => [-123, -456],
    "bar" => [3456, 6789],
    "baz" => [7654, 4321],
    "zap" => ["ab", "cd"]
  },
  schema_overrides: {"bar" => Polars::UInt32, "baz" => Polars::UInt64}
)

Select all signed integer columns:

df.select(Polars.cs.signed_integer)
# =>
# shape: (2, 1)
# ┌──────┐
# │ foo  │
# │ ---  │
# │ i64  │
# ╞══════╡
# │ -123 │
# │ -456 │
# └──────┘
df.select(~Polars.cs.signed_integer)
# =>
# shape: (2, 3)
# ┌──────┬──────┬─────┐
# │ bar  ┆ baz  ┆ zap │
# │ ---  ┆ ---  ┆ --- │
# │ u32  ┆ u64  ┆ str │
# ╞══════╪══════╪═════╡
# │ 3456 ┆ 7654 ┆ ab  │
# │ 6789 ┆ 4321 ┆ cd  │
# └──────┴──────┴─────┘

Select all integer columns (both signed and unsigned):

df.select(Polars.cs.integer)
# =>
# shape: (2, 3)
# ┌──────┬──────┬──────┐
# │ foo  ┆ bar  ┆ baz  │
# │ ---  ┆ ---  ┆ ---  │
# │ i64  ┆ u32  ┆ u64  │
# ╞══════╪══════╪══════╡
# │ -123 ┆ 3456 ┆ 7654 │
# │ -456 ┆ 6789 ┆ 4321 │
# └──────┴──────┴──────┘

Returns:



1511
1512
1513
# File 'lib/polars/selectors.rb', line 1511

def self.signed_integer
  Selector._from_rbselector(RbSelector.signed_integer)
end

.starts_with(*prefix) ⇒ Selector

Select columns that start with the given substring(s).

Examples:

df = Polars::DataFrame.new(
  {
    "foo" => [1.0, 2.0],
    "bar" => [3.0, 4.0],
    "baz" => [5, 6],
    "zap" => [7, 8]
  }
)

Match columns starting with a 'b':

df.select(Polars.cs.starts_with("b"))
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ bar ┆ baz │
# │ --- ┆ --- │
# │ f64 ┆ i64 │
# ╞═════╪═════╡
# │ 3.0 ┆ 5   │
# │ 4.0 ┆ 6   │
# └─────┴─────┘

Match columns starting with either the letter 'b' or 'z':

df.select(Polars.cs.starts_with("b", "z"))
# =>
# shape: (2, 3)
# ┌─────┬─────┬─────┐
# │ bar ┆ baz ┆ zap │
# │ --- ┆ --- ┆ --- │
# │ f64 ┆ i64 ┆ i64 │
# ╞═════╪═════╪═════╡
# │ 3.0 ┆ 5   ┆ 7   │
# │ 4.0 ┆ 6   ┆ 8   │
# └─────┴─────┴─────┘

Match all columns except for those starting with 'b':

df.select(~Polars.cs.starts_with("b"))
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ foo ┆ zap │
# │ --- ┆ --- │
# │ f64 ┆ i64 │
# ╞═════╪═════╡
# │ 1.0 ┆ 7   │
# │ 2.0 ┆ 8   │
# └─────┴─────┘

Parameters:

  • prefix (Object)

    Substring(s) that matching column names should start with.

Returns:



1791
1792
1793
1794
1795
1796
# File 'lib/polars/selectors.rb', line 1791

def self.starts_with(*prefix)
  escaped_prefix = _re_string(prefix)
  raw_params = "^#{escaped_prefix}.*$"

  Selector._from_rbselector(RbSelector.matches(raw_params))
end

.string(include_categorical: false) ⇒ Selector

Select all String (and, optionally, Categorical) string columns.

df.group_by(Polars.cs.string).agg(Polars.cs.numeric.sum).sort(Polars.cs.string) shape: (2, 3) ┌─────┬─────┬─────┐ │ w ┆ x ┆ y │ │ --- ┆ --- ┆ --- │ │ str ┆ i64 ┆ f64 │ ╞═════╪═════╪═════╡ │ xx ┆ 0 ┆ 2.0 │ │ yy ┆ 6 ┆ 7.0 │ └─────┴─────┴─────┘

Examples:

df = Polars::DataFrame.new(
  {
    "w" => ["xx", "yy", "xx", "yy", "xx"],
    "x" => [1, 2, 1, 4, -2],
    "y" => [3.0, 4.5, 1.0, 2.5, -2.0],
    "z" => ["a", "b", "a", "b", "b"]
  },
).with_columns(
  z: Polars.col("z").cast(Polars::Categorical.new("lexical")),
)

Group by all string columns, sum the numeric columns, then sort by the string cols:


Group by all string and categorical columns:

df.group_by(Polars.cs.string(include_categorical: true)).agg(Polars.cs.numeric.sum).sort(
  Polars.cs.string(include_categorical: true)
)
# =>
# shape: (3, 4)
# ┌─────┬─────┬─────┬──────┐
# │ w   ┆ z   ┆ x   ┆ y    │
# │ --- ┆ --- ┆ --- ┆ ---  │
# │ str ┆ cat ┆ i64 ┆ f64  │
# ╞═════╪═════╪═════╪══════╡
# │ xx  ┆ a   ┆ 2   ┆ 4.0  │
# │ xx  ┆ b   ┆ -2  ┆ -2.0 │
# │ yy  ┆ b   ┆ 6   ┆ 7.0  │
# └─────┴─────┴─────┴──────┘

Returns:



1841
1842
1843
1844
1845
1846
1847
1848
# File 'lib/polars/selectors.rb', line 1841

def self.string(include_categorical: false)
  string_dtypes = [String]
  if include_categorical
    string_dtypes << Categorical
  end

  by_dtype(string_dtypes)
end

.structSelector

Note:

This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.

Select all struct columns.

Examples:

Select all struct columns:

df = Polars::DataFrame.new(
  {
    "foo" => [{"a": "xx", "b": "z"}, {"a": "x", "b": "y"}],
    "bar" => [123, 456],
    "baz" => [2.0, 5.5]
  }
)
df.select(Polars.cs.struct)
# =>
# shape: (2, 1)
# ┌────────────┐
# │ foo        │
# │ ---        │
# │ struct[2]  │
# ╞════════════╡
# │ {"xx","z"} │
# │ {"x","y"}  │
# └────────────┘

Select all columns except for those that are struct:

df.select(~Polars.cs.struct)
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ bar ┆ baz │
# │ --- ┆ --- │
# │ i64 ┆ f64 │
# ╞═════╪═════╡
# │ 123 ┆ 2.0 │
# │ 456 ┆ 5.5 │
# └─────┴─────┘

Returns:



838
839
840
# File 'lib/polars/selectors.rb', line 838

def self.struct
  Selector._from_rbselector(RbSelector.struct_)
end

.temporalSelector

Select all temporal columns.

Examples:

Match all temporal columns:

df = Polars::DataFrame.new(
  {
    "dt" => [Date.new(2021, 1, 1), Date.new(2021, 1, 2)],
    "tm" => [DateTime.new(2000, 1, 1, 12, 0, 0), DateTime.new(2000, 1, 1, 20, 30, 45)],
    "value" => [1.2345, 2.3456],
  },
  schema_overrides: {"tm" => Polars::Time}
)
df.select(Polars.cs.temporal)
# =>
# shape: (2, 2)
# ┌────────────┬──────────┐
# │ dt         ┆ tm       │
# │ ---        ┆ ---      │
# │ date       ┆ time     │
# ╞════════════╪══════════╡
# │ 2021-01-01 ┆ 12:00:00 │
# │ 2021-01-02 ┆ 20:30:45 │
# └────────────┴──────────┘

Match all temporal columns except for time columns:

df.select(Polars.cs.temporal - Polars.cs.time)
# =>
# shape: (2, 1)
# ┌────────────┐
# │ dt         │
# │ ---        │
# │ date       │
# ╞════════════╡
# │ 2021-01-01 │
# │ 2021-01-02 │
# └────────────┘

Match all columns except for temporal columns:

df.select(~Polars.cs.temporal)
# =>
# shape: (2, 1)
# ┌────────┐
# │ value  │
# │ ---    │
# │ f64    │
# ╞════════╡
# │ 1.2345 │
# │ 2.3456 │
# └────────┘

Returns:



1900
1901
1902
# File 'lib/polars/selectors.rb', line 1900

def self.temporal
  Selector._from_rbselector(RbSelector.temporal)
end

.timeSelector

Select all time columns.

Examples:

df = Polars::DataFrame.new(
  {
    "dtm" => [DateTime.new(2001, 5, 7, 10, 25), DateTime.new(2031, 12, 31, 0, 30)],
    "dt" => [Date.new(1999, 12, 31), Date.new(2024, 8, 9)],
    "tm" => [Time.utc(2001, 1, 1, 0, 0, 0), Time.utc(2001, 1, 1, 23, 59, 59)]
  },
  schema_overrides: {"tm" => Polars::Time}
)

Select all time columns:

df.select(Polars.cs.time)
# =>
# shape: (2, 1)
# ┌──────────┐
# │ tm       │
# │ ---      │
# │ time     │
# ╞══════════╡
# │ 00:00:00 │
# │ 23:59:59 │
# └──────────┘

Select all columns except for those that are times:

df.select(~Polars.cs.time)
# =>
# shape: (2, 2)
# ┌─────────────────────┬────────────┐
# │ dtm                 ┆ dt         │
# │ ---                 ┆ ---        │
# │ datetime[ns]        ┆ date       │
# ╞═════════════════════╪════════════╡
# │ 2001-05-07 10:25:00 ┆ 1999-12-31 │
# │ 2031-12-31 00:30:00 ┆ 2024-08-09 │
# └─────────────────────┴────────────┘

Returns:



1943
1944
1945
# File 'lib/polars/selectors.rb', line 1943

def self.time
  by_dtype([Time])
end

.unsigned_integerSelector

Select all unsigned integer columns.

Examples:

df = Polars::DataFrame.new(
  {
    "foo" => [-123, -456],
    "bar" => [3456, 6789],
    "baz" => [7654, 4321],
    "zap" => ["ab", "cd"]
  },
  schema_overrides: {"bar" => Polars::UInt32, "baz" => Polars::UInt64}
)

Select all unsigned integer columns:

df.select(Polars.cs.unsigned_integer)
# =>
# shape: (2, 2)
# ┌──────┬──────┐
# │ bar  ┆ baz  │
# │ ---  ┆ ---  │
# │ u32  ┆ u64  │
# ╞══════╪══════╡
# │ 3456 ┆ 7654 │
# │ 6789 ┆ 4321 │
# └──────┴──────┘

Select all columns except for those that are unsigned integers:

df.select(~Polars.cs.unsigned_integer)
# =>
# shape: (2, 2)
# ┌──────┬─────┐
# │ foo  ┆ zap │
# │ ---  ┆ --- │
# │ i64  ┆ str │
# ╞══════╪═════╡
# │ -123 ┆ ab  │
# │ -456 ┆ cd  │
# └──────┴─────┘

Select all integer columns (both signed and unsigned):

df.select(Polars.cs.integer)
# =>
# shape: (2, 3)
# ┌──────┬──────┬──────┐
# │ foo  ┆ bar  ┆ baz  │
# │ ---  ┆ ---  ┆ ---  │
# │ i64  ┆ u32  ┆ u64  │
# ╞══════╪══════╪══════╡
# │ -123 ┆ 3456 ┆ 7654 │
# │ -456 ┆ 6789 ┆ 4321 │
# └──────┴──────┴──────┘

Returns:



1568
1569
1570
# File 'lib/polars/selectors.rb', line 1568

def self.unsigned_integer
  Selector._from_rbselector(RbSelector.unsigned_integer)
end