Module: Polars::Selectors

Defined in:
lib/polars/selectors.rb

Class Method Summary collapse

Class Method Details

.allSelectorProxy

Select all columns.

Examples:

df = Polars::DataFrame.new(
  {
    "dt" => [Date.new(1999, 12, 31), Date.new(2024, 1, 1)],
    "value" => [1_234_500, 5_000_555]
  },
  schema_overrides: {"value" => Polars::Int32}
)

Select all columns, casting them to string:

df.select(Polars.cs.all.cast(Polars::String))
# =>
# shape: (2, 2)
# ┌────────────┬─────────┐
# │ dt         ┆ value   │
# │ ---        ┆ ---     │
# │ str        ┆ str     │
# ╞════════════╪═════════╡
# │ 1999-12-31 ┆ 1234500 │
# │ 2024-01-01 ┆ 5000555 │
# └────────────┴─────────┘

Select all columns except for those matching the given dtypes:

df.select(Polars.cs.all - Polars.cs.numeric)
# =>
# shape: (2, 1)
# ┌────────────┐
# │ dt         │
# │ ---        │
# │ date       │
# ╞════════════╡
# │ 1999-12-31 │
# │ 2024-01-01 │
# └────────────┘

Returns:

  • (SelectorProxy)


172
173
174
# File 'lib/polars/selectors.rb', line 172

def self.all
  _selector_proxy_(F.all, name: "all")
end

.alpha(ascii_only: false, ignore_spaces: false) ⇒ SelectorProxy

Note:

Matching column names cannot contain any non-alphabetic characters. Note that the definition of “alphabetic” consists of all valid Unicode alphabetic characters (\p{Alphabetic}) by default; this can be changed by setting ascii_only: true.

Select all columns with alphabetic names (eg: only letters).

Examples:

df = Polars::DataFrame.new(
  {
    "no1" => [100, 200, 300],
    "café" => ["espresso", "latte", "mocha"],
    "t or f" => [true, false, nil],
    "hmm" => ["aaa", "bbb", "ccc"],
    "都市" => ["東京", "大阪", "京都"]
  }
)

Select columns with alphabetic names; note that accented characters and kanji are recognised as alphabetic here:

df.select(Polars.cs.alpha)
# =>
# shape: (3, 3)
# ┌──────────┬─────┬──────┐
# │ café     ┆ hmm ┆ 都市 │
# │ ---      ┆ --- ┆ ---  │
# │ str      ┆ str ┆ str  │
# ╞══════════╪═════╪══════╡
# │ espresso ┆ aaa ┆ 東京 │
# │ latte    ┆ bbb ┆ 大阪 │
# │ mocha    ┆ ccc ┆ 京都 │
# └──────────┴─────┴──────┘

Constrain the definition of “alphabetic” to ASCII characters only:

df.select(Polars.cs.alpha(ascii_only: true))
# =>
# shape: (3, 1)
# ┌─────┐
# │ hmm │
# │ --- │
# │ str │
# ╞═════╡
# │ aaa │
# │ bbb │
# │ ccc │
# └─────┘
df.select(Polars.cs.alpha(ascii_only: true, ignore_spaces: true))
# =>
# shape: (3, 2)
# ┌────────┬─────┐
# │ t or f ┆ hmm │
# │ ---    ┆ --- │
# │ bool   ┆ str │
# ╞════════╪═════╡
# │ true   ┆ aaa │
# │ false  ┆ bbb │
# │ null   ┆ ccc │
# └────────┴─────┘

Select all columns except for those with alphabetic names:

df.select(~Polars.cs.alpha)
# =>
# shape: (3, 2)
# ┌─────┬────────┐
# │ no1 ┆ t or f │
# │ --- ┆ ---    │
# │ i64 ┆ bool   │
# ╞═════╪════════╡
# │ 100 ┆ true   │
# │ 200 ┆ false  │
# │ 300 ┆ null   │
# └─────┴────────┘
df.select(~Polars.cs.alpha(ignore_spaces: true))
# =>
# shape: (3, 1)
# ┌─────┐
# │ no1 │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 100 │
# │ 200 │
# │ 300 │
# └─────┘

Parameters:

  • ascii_only (Boolean) (defaults to: false)

    Indicate whether to consider only ASCII alphabetic characters, or the full Unicode range of valid letters (accented, idiographic, etc).

  • ignore_spaces (Boolean) (defaults to: false)

    Indicate whether to ignore the presence of spaces in column names; if so, only the other (non-space) characters are considered.

Returns:

  • (SelectorProxy)


273
274
275
276
277
278
279
280
281
282
# File 'lib/polars/selectors.rb', line 273

def self.alpha(ascii_only: false, ignore_spaces: false)
  # note that we need to supply a pattern compatible with the *rust* regex crate
  re_alpha = ascii_only ? "a-zA-Z" : "\\p{Alphabetic}"
  re_space = ignore_spaces ? " " : ""
  _selector_proxy_(
    F.col("^[#{re_alpha}#{re_space}]+$"),
    name: "alpha",
    parameters: {"ascii_only" => ascii_only, "ignore_spaces" => ignore_spaces},
  )
end

.binarySelectorProxy

Select all binary columns.

Examples:

df = Polars::DataFrame.new({"a" => ["hello".b], "b" => ["world"], "c" => ["!".b], "d" => [":)"]})
# =>
# shape: (1, 4)
# ┌──────────┬───────┬────────┬─────┐
# │ a        ┆ b     ┆ c      ┆ d   │
# │ ---      ┆ ---   ┆ ---    ┆ --- │
# │ binary   ┆ str   ┆ binary ┆ str │
# ╞══════════╪═══════╪════════╪═════╡
# │ b"hello" ┆ world ┆ b"!"   ┆ :)  │
# └──────────┴───────┴────────┴─────┘

Select binary columns and export as a dict:

df.select(Polars.cs.binary).to_h(as_series: false)
# => {"a"=>["hello"], "c"=>["!"]}

Select all columns except for those that are binary:

df.select(~Polars.cs.binary).to_h(as_series: false)
# => {"b"=>["world"], "d"=>[":)"]}

Returns:

  • (SelectorProxy)


311
312
313
# File 'lib/polars/selectors.rb', line 311

def self.binary
  _selector_proxy_(F.col(Binary), name: "binary")
end

.booleanSelectorProxy

Select all boolean columns.

Examples:

df = Polars::DataFrame.new({"n" => 1..4}).with_columns(n_even: Polars.col("n") % 2 == 0)
# =>
# shape: (4, 2)
# ┌─────┬────────┐
# │ n   ┆ n_even │
# │ --- ┆ ---    │
# │ i64 ┆ bool   │
# ╞═════╪════════╡
# │ 1   ┆ false  │
# │ 2   ┆ true   │
# │ 3   ┆ false  │
# │ 4   ┆ true   │
# └─────┴────────┘

Select and invert boolean columns:

df.with_columns(is_odd: Polars.cs.boolean.not_)
# =>
# shape: (4, 3)
# ┌─────┬────────┬────────┐
# │ n   ┆ n_even ┆ is_odd │
# │ --- ┆ ---    ┆ ---    │
# │ i64 ┆ bool   ┆ bool   │
# ╞═════╪════════╪════════╡
# │ 1   ┆ false  ┆ true   │
# │ 2   ┆ true   ┆ false  │
# │ 3   ┆ false  ┆ true   │
# │ 4   ┆ true   ┆ false  │
# └─────┴────────┴────────┘

Select all columns except for those that are boolean:

df.select(~Polars.cs.boolean)
# =>
# shape: (4, 1)
# ┌─────┐
# │ n   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 2   │
# │ 3   │
# │ 4   │
# └─────┘

Returns:

  • (SelectorProxy)


363
364
365
# File 'lib/polars/selectors.rb', line 363

def self.boolean
  _selector_proxy_(F.col(Boolean), name: "boolean")
end

.by_name(*names, require_all: true) ⇒ SelectorProxy

Note:

Matching columns are returned in the order in which they are declared in the selector, not the underlying schema order.

Select all columns matching the given names.

Examples:

df = Polars::DataFrame.new(
  {
    "foo" => ["x", "y"],
    "bar" => [123, 456],
    "baz" => [2.0, 5.5],
    "zap" => [false, true]
  }
)

Select columns by name:

df.select(Polars.cs.by_name("foo", "bar"))
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ foo ┆ bar │
# │ --- ┆ --- │
# │ str ┆ i64 │
# ╞═════╪═════╡
# │ x   ┆ 123 │
# │ y   ┆ 456 │
# └─────┴─────┘

Match any of the given columns by name:

df.select(Polars.cs.by_name("baz", "moose", "foo", "bear", require_all: false))
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ foo ┆ baz │
# │ --- ┆ --- │
# │ str ┆ f64 │
# ╞═════╪═════╡
# │ x   ┆ 2.0 │
# │ y   ┆ 5.5 │
# └─────┴─────┘

Match all columns except for those given:

df.select(~Polars.cs.by_name("foo", "bar"))
# =>
# shape: (2, 2)
# ┌─────┬───────┐
# │ baz ┆ zap   │
# │ --- ┆ ---   │
# │ f64 ┆ bool  │
# ╞═════╪═══════╡
# │ 2.0 ┆ false │
# │ 5.5 ┆ true  │
# └─────┴───────┘

Parameters:

  • names (Array)

    One or more names of columns to select.

  • require_all (Boolean) (defaults to: true)

    Whether to match all names (the default) or any of the names.

Returns:

  • (SelectorProxy)


436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
# File 'lib/polars/selectors.rb', line 436

def self.by_name(*names, require_all: true)
  all_names = []
  names.each do |nm|
    if nm.is_a?(::String)
      all_names << nm
    else
      msg = "invalid name: #{nm.inspect}"
      raise TypeError, msg
    end
  end

  selector_params = {"*names" => all_names}
  match_cols = all_names
  if !require_all
    match_cols = "^(#{all_names.map { |nm| Utils.re_escape(nm) }.join("|")})$"
    selector_params["require_all"] = require_all
  end

  _selector_proxy_(
    F.col(match_cols),
    name: "by_name",
    parameters: selector_params
  )
end

.categoricalSelectorProxy

Select all categorical columns.

Examples:

df = Polars::DataFrame.new(
  {
    "foo" => ["xx", "yy"],
    "bar" => [123, 456],
    "baz" => [2.0, 5.5]
  },
  schema_overrides: {"foo" => Polars::Categorical}
)

Select all categorical columns:

df.select(Polars.cs.categorical)
# =>
# shape: (2, 1)
# ┌─────┐
# │ foo │
# │ --- │
# │ cat │
# ╞═════╡
# │ xx  │
# │ yy  │
# └─────┘

Select all columns except for those that are categorical:

df.select(~Polars.cs.categorical)
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ bar ┆ baz │
# │ --- ┆ --- │
# │ i64 ┆ f64 │
# ╞═════╪═════╡
# │ 123 ┆ 2.0 │
# │ 456 ┆ 5.5 │
# └─────┴─────┘

Returns:

  • (SelectorProxy)


500
501
502
# File 'lib/polars/selectors.rb', line 500

def self.categorical
  _selector_proxy_(F.col(Categorical), name: "categorical")
end

.contains(*substring) ⇒ SelectorProxy

Select columns whose names contain the given literal substring(s).

Examples:

df = Polars::DataFrame.new(
  {
    "foo" => ["x", "y"],
    "bar" => [123, 456],
    "baz" => [2.0, 5.5],
    "zap" => [false, true]
  }
)

Select columns that contain the substring ‘ba’:

df.select(Polars.cs.contains("ba"))
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ bar ┆ baz │
# │ --- ┆ --- │
# │ i64 ┆ f64 │
# ╞═════╪═════╡
# │ 123 ┆ 2.0 │
# │ 456 ┆ 5.5 │
# └─────┴─────┘

Select columns that contain the substring ‘ba’ or the letter ‘z’:

df.select(Polars.cs.contains("ba", "z"))
# =>
# shape: (2, 3)
# ┌─────┬─────┬───────┐
# │ bar ┆ baz ┆ zap   │
# │ --- ┆ --- ┆ ---   │
# │ i64 ┆ f64 ┆ bool  │
# ╞═════╪═════╪═══════╡
# │ 123 ┆ 2.0 ┆ false │
# │ 456 ┆ 5.5 ┆ true  │
# └─────┴─────┴───────┘

Select all columns except for those that contain the substring ‘ba’:

df.select(~Polars.cs.contains("ba"))
# =>
# shape: (2, 2)
# ┌─────┬───────┐
# │ foo ┆ zap   │
# │ --- ┆ ---   │
# │ str ┆ bool  │
# ╞═════╪═══════╡
# │ x   ┆ false │
# │ y   ┆ true  │
# └─────┴───────┘

Parameters:

  • substring (Object)

    Substring(s) that matching column names should contain.

Returns:

  • (SelectorProxy)


559
560
561
562
563
564
565
566
567
568
# File 'lib/polars/selectors.rb', line 559

def self.contains(*substring)
  escaped_substring = _re_string(substring)
  raw_params = "^.*#{escaped_substring}.*$"

  _selector_proxy_(
    F.col(raw_params),
    name: "contains",
    parameters: {"*substring" => escaped_substring}
  )
end

.dateSelectorProxy

Select all date columns.

Examples:

df = Polars::DataFrame.new(
  {
    "dtm" => [DateTime.new(2001, 5, 7, 10, 25), DateTime.new(2031, 12, 31, 0, 30)],
    "dt" => [Date.new(1999, 12, 31), Date.new(2024, 8, 9)]
  }
)

Select all date columns:

df.select(Polars.cs.date)
# =>
# shape: (2, 1)
# ┌────────────┐
# │ dt         │
# │ ---        │
# │ date       │
# ╞════════════╡
# │ 1999-12-31 │
# │ 2024-08-09 │
# └────────────┘

Select all columns except for those that are dates:

df.select(~Polars.cs.date)
# =>
# shape: (2, 1)
# ┌─────────────────────┐
# │ dtm                 │
# │ ---                 │
# │ datetime[ns]        │
# ╞═════════════════════╡
# │ 2001-05-07 10:25:00 │
# │ 2031-12-31 00:30:00 │
# └─────────────────────┘

Returns:

  • (SelectorProxy)


607
608
609
# File 'lib/polars/selectors.rb', line 607

def self.date
  _selector_proxy_(F.col(Date), name: "date")
end

.decimalSelectorProxy

Select all decimal columns.

Examples:

df = Polars::DataFrame.new(
  {
    "foo" => ["x", "y"],
    "bar" => [BigDecimal("123"), BigDecimal("456")],
    "baz" => [BigDecimal("2.0005"), BigDecimal("-50.5555")],
  },
  schema_overrides: {"baz" => Polars::Decimal.new(10, 5)}
)

Select all decimal columns:

df.select(Polars.cs.decimal)
# =>
# shape: (2, 2)
# ┌──────────────┬───────────────┐
# │ bar          ┆ baz           │
# │ ---          ┆ ---           │
# │ decimal[*,0] ┆ decimal[10,5] │
# ╞══════════════╪═══════════════╡
# │ 123          ┆ 2.00050       │
# │ 456          ┆ -50.55550     │
# └──────────────┴───────────────┘

Select all columns except the decimal ones:


df.select(~Polars.cs.decimal)
# =>
# shape: (2, 1)
# ┌─────┐
# │ foo │
# │ --- │
# │ str │
# ╞═════╡
# │ x   │
# │ y   │
# └─────┘

Returns:

  • (SelectorProxy)


655
656
657
658
# File 'lib/polars/selectors.rb', line 655

def self.decimal
  # TODO: allow explicit selection by scale/precision?
  _selector_proxy_(F.col(Decimal), name: "decimal")
end

.ends_with(*suffix) ⇒ SelectorProxy

Select columns that end with the given substring(s).

Examples:

df = Polars::DataFrame.new(
  {
    "foo" => ["x", "y"],
    "bar" => [123, 456],
    "baz" => [2.0, 5.5],
    "zap" => [false, true]
  }
)

Select columns that end with the substring ‘z’:

df.select(Polars.cs.ends_with("z"))
# =>
# shape: (2, 1)
# ┌─────┐
# │ baz │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 2.0 │
# │ 5.5 │
# └─────┘

Select columns that end with either the letter ‘z’ or ‘r’:

df.select(Polars.cs.ends_with("z", "r"))
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ bar ┆ baz │
# │ --- ┆ --- │
# │ i64 ┆ f64 │
# ╞═════╪═════╡
# │ 123 ┆ 2.0 │
# │ 456 ┆ 5.5 │
# └─────┴─────┘

Select all columns except for those that end with the substring ‘z’:

df.select(~Polars.cs.ends_with("z"))
# =>
# shape: (2, 3)
# ┌─────┬─────┬───────┐
# │ foo ┆ bar ┆ zap   │
# │ --- ┆ --- ┆ ---   │
# │ str ┆ i64 ┆ bool  │
# ╞═════╪═════╪═══════╡
# │ x   ┆ 123 ┆ false │
# │ y   ┆ 456 ┆ true  │
# └─────┴─────┴───────┘

Parameters:

  • suffix (Object)

    Substring(s) that matching column names should end with.

Returns:

  • (SelectorProxy)


715
716
717
718
719
720
721
722
723
724
# File 'lib/polars/selectors.rb', line 715

def self.ends_with(*suffix)
  escaped_suffix = _re_string(suffix)
  raw_params = "^.*#{escaped_suffix}$"

  _selector_proxy_(
    F.col(raw_params),
    name: "ends_with",
    parameters: {"*suffix" => escaped_suffix},
  )
end

.firstSelectorProxy

Select the first column in the current scope.

Examples:

df = Polars::DataFrame.new(
  {
    "foo" => ["x", "y"],
    "bar" => [123, 456],
    "baz" => [2.0, 5.5],
    "zap" => [0, 1]
  }
)

Select the first column:

df.select(Polars.cs.first)
# =>
# shape: (2, 1)
# ┌─────┐
# │ foo │
# │ --- │
# │ str │
# ╞═════╡
# │ x   │
# │ y   │
# └─────┘

Select everything except for the first column:

df.select(~Polars.cs.first)
# =>
# shape: (2, 3)
# ┌─────┬─────┬─────┐
# │ bar ┆ baz ┆ zap │
# │ --- ┆ --- ┆ --- │
# │ i64 ┆ f64 ┆ i64 │
# ╞═════╪═════╪═════╡
# │ 123 ┆ 2.0 ┆ 0   │
# │ 456 ┆ 5.5 ┆ 1   │
# └─────┴─────┴─────┘

Returns:

  • (SelectorProxy)


765
766
767
# File 'lib/polars/selectors.rb', line 765

def self.first
  _selector_proxy_(F.first, name: "first")
end

.floatSelectorProxy

Select all float columns.

Examples:

df = Polars::DataFrame.new(
  {
    "foo" => ["x", "y"],
    "bar" => [123, 456],
    "baz" => [2.0, 5.5],
    "zap" => [0.0, 1.0]
  },
  schema_overrides: {"baz" => Polars::Float32, "zap" => Polars::Float64}
)

Select all float columns:

df.select(Polars.cs.float)
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ baz ┆ zap │
# │ --- ┆ --- │
# │ f32 ┆ f64 │
# ╞═════╪═════╡
# │ 2.0 ┆ 0.0 │
# │ 5.5 ┆ 1.0 │
# └─────┴─────┘

Select all columns except for those that are float:

df.select(~Polars.cs.float)
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ foo ┆ bar │
# │ --- ┆ --- │
# │ str ┆ i64 │
# ╞═════╪═════╡
# │ x   ┆ 123 │
# │ y   ┆ 456 │
# └─────┴─────┘

Returns:

  • (SelectorProxy)


809
810
811
# File 'lib/polars/selectors.rb', line 809

def self.float
  _selector_proxy_(F.col(FLOAT_DTYPES), name: "float")
end

.integerSelectorProxy

Select all integer columns.

Examples:

df = Polars::DataFrame.new(
  {
    "foo" => ["x", "y"],
    "bar" => [123, 456],
    "baz" => [2.0, 5.5],
    "zap" => [0, 1]
  }
)

Select all integer columns:

df.select(Polars.cs.integer)
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ bar ┆ zap │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 123 ┆ 0   │
# │ 456 ┆ 1   │
# └─────┴─────┘

Select all columns except for those that are integer:

df.select(~Polars.cs.integer)
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ foo ┆ baz │
# │ --- ┆ --- │
# │ str ┆ f64 │
# ╞═════╪═════╡
# │ x   ┆ 2.0 │
# │ y   ┆ 5.5 │
# └─────┴─────┘

Returns:

  • (SelectorProxy)


852
853
854
# File 'lib/polars/selectors.rb', line 852

def self.integer
  _selector_proxy_(F.col(INTEGER_DTYPES), name: "integer")
end

.lastSelectorProxy

Select the last column in the current scope.

Examples:

df = Polars::DataFrame.new(
  {
    "foo" => ["x", "y"],
    "bar" => [123, 456],
    "baz" => [2.0, 5.5],
    "zap" => [0, 1]
  }
)

Select the last column:

df.select(Polars.cs.last)
# =>
# shape: (2, 1)
# ┌─────┐
# │ zap │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 0   │
# │ 1   │
# └─────┘

Select everything except for the last column:

df.select(~Polars.cs.last)
# =>
# shape: (2, 3)
# ┌─────┬─────┬─────┐
# │ foo ┆ bar ┆ baz │
# │ --- ┆ --- ┆ --- │
# │ str ┆ i64 ┆ f64 │
# ╞═════╪═════╪═════╡
# │ x   ┆ 123 ┆ 2.0 │
# │ y   ┆ 456 ┆ 5.5 │
# └─────┴─────┴─────┘

Returns:

  • (SelectorProxy)


1009
1010
1011
# File 'lib/polars/selectors.rb', line 1009

def self.last
  _selector_proxy_(F.last, name: "last")
end

.numericSelectorProxy

Select all numeric columns.

Examples:

df = Polars::DataFrame.new(
  {
    "foo" => ["x", "y"],
    "bar" => [123, 456],
    "baz" => [2.0, 5.5],
    "zap" => [0, 0]
  },
  schema_overrides: {"bar" => Polars::Int16, "baz" => Polars::Float32, "zap" => Polars::UInt8},
)

Match all numeric columns:

df.select(Polars.cs.numeric)
# =>
# shape: (2, 3)
# ┌─────┬─────┬─────┐
# │ bar ┆ baz ┆ zap │
# │ --- ┆ --- ┆ --- │
# │ i16 ┆ f32 ┆ u8  │
# ╞═════╪═════╪═════╡
# │ 123 ┆ 2.0 ┆ 0   │
# │ 456 ┆ 5.5 ┆ 0   │
# └─────┴─────┴─────┘

Match all columns except for those that are numeric:

df.select(~Polars.cs.numeric)
# =>
# shape: (2, 1)
# ┌─────┐
# │ foo │
# │ --- │
# │ str │
# ╞═════╡
# │ x   │
# │ y   │
# └─────┘

Returns:

  • (SelectorProxy)


1053
1054
1055
# File 'lib/polars/selectors.rb', line 1053

def self.numeric
  _selector_proxy_(F.col(NUMERIC_DTYPES), name: "numeric")
end

.signed_integerSelectorProxy

Select all signed integer columns.

Examples:

df = Polars::DataFrame.new(
  {
    "foo" => [-123, -456],
    "bar" => [3456, 6789],
    "baz" => [7654, 4321],
    "zap" => ["ab", "cd"]
  },
  schema_overrides: {"bar" => Polars::UInt32, "baz" => Polars::UInt64}
)

Select all signed integer columns:

df.select(Polars.cs.signed_integer)
# =>
# shape: (2, 1)
# ┌──────┐
# │ foo  │
# │ ---  │
# │ i64  │
# ╞══════╡
# │ -123 │
# │ -456 │
# └──────┘
df.select(~Polars.cs.signed_integer)
# =>
# shape: (2, 3)
# ┌──────┬──────┬─────┐
# │ bar  ┆ baz  ┆ zap │
# │ ---  ┆ ---  ┆ --- │
# │ u32  ┆ u64  ┆ str │
# ╞══════╪══════╪═════╡
# │ 3456 ┆ 7654 ┆ ab  │
# │ 6789 ┆ 4321 ┆ cd  │
# └──────┴──────┴─────┘

Select all integer columns (both signed and unsigned):

df.select(Polars.cs.integer)
# =>
# shape: (2, 3)
# ┌──────┬──────┬──────┐
# │ foo  ┆ bar  ┆ baz  │
# │ ---  ┆ ---  ┆ ---  │
# │ i64  ┆ u32  ┆ u64  │
# ╞══════╪══════╪══════╡
# │ -123 ┆ 3456 ┆ 7654 │
# │ -456 ┆ 6789 ┆ 4321 │
# └──────┴──────┴──────┘

Returns:

  • (SelectorProxy)


909
910
911
# File 'lib/polars/selectors.rb', line 909

def self.signed_integer
  _selector_proxy_(F.col(SIGNED_INTEGER_DTYPES), name: "signed_integer")
end

.starts_with(*prefix) ⇒ SelectorProxy

Select columns that start with the given substring(s).

Examples:

df = Polars::DataFrame.new(
  {
    "foo" => [1.0, 2.0],
    "bar" => [3.0, 4.0],
    "baz" => [5, 6],
    "zap" => [7, 8]
  }
)

Match columns starting with a ‘b’:

df.select(Polars.cs.starts_with("b"))
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ bar ┆ baz │
# │ --- ┆ --- │
# │ f64 ┆ i64 │
# ╞═════╪═════╡
# │ 3.0 ┆ 5   │
# │ 4.0 ┆ 6   │
# └─────┴─────┘

Match columns starting with either the letter ‘b’ or ‘z’:

df.select(Polars.cs.starts_with("b", "z"))
# =>
# shape: (2, 3)
# ┌─────┬─────┬─────┐
# │ bar ┆ baz ┆ zap │
# │ --- ┆ --- ┆ --- │
# │ f64 ┆ i64 ┆ i64 │
# ╞═════╪═════╪═════╡
# │ 3.0 ┆ 5   ┆ 7   │
# │ 4.0 ┆ 6   ┆ 8   │
# └─────┴─────┴─────┘

Match all columns except for those starting with ‘b’:

df.select(~Polars.cs.starts_with("b"))
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ foo ┆ zap │
# │ --- ┆ --- │
# │ f64 ┆ i64 │
# ╞═════╪═════╡
# │ 1.0 ┆ 7   │
# │ 2.0 ┆ 8   │
# └─────┴─────┘

Parameters:

  • prefix (Object)

    Substring(s) that matching column names should start with.

Returns:

  • (SelectorProxy)


1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
# File 'lib/polars/selectors.rb', line 1112

def self.starts_with(*prefix)
  escaped_prefix = _re_string(prefix)
  raw_params = "^#{escaped_prefix}.*$"

  _selector_proxy_(
    F.col(raw_params),
    name: "starts_with",
    parameters: {"*prefix" => prefix}
  )
end

.string(include_categorical: false) ⇒ SelectorProxy

Select all String (and, optionally, Categorical) string columns.

df.group_by(Polars.cs.string).agg(Polars.cs.numeric.sum).sort(Polars.cs.string) shape: (2, 3) ┌─────┬─────┬─────┐ │ w ┆ x ┆ y │ │ — ┆ — ┆ — │ │ str ┆ i64 ┆ f64 │ ╞═════╪═════╪═════╡ │ xx ┆ 0 ┆ 2.0 │ │ yy ┆ 6 ┆ 7.0 │ └─────┴─────┴─────┘

Examples:

df = Polars::DataFrame.new(
  {
    "w" => ["xx", "yy", "xx", "yy", "xx"],
    "x" => [1, 2, 1, 4, -2],
    "y" => [3.0, 4.5, 1.0, 2.5, -2.0],
    "z" => ["a", "b", "a", "b", "b"]
  },
).with_columns(
  z: Polars.col("z").cast(Polars::Categorical.new("lexical")),
)

Group by all string columns, sum the numeric columns, then sort by the string cols:

Group by all string and categorical columns:

df.group_by(Polars.cs.string(include_categorical: true)).agg(Polars.cs.numeric.sum).sort(
  Polars.cs.string(include_categorical: true)
)
# =>
# shape: (3, 4)
# ┌─────┬─────┬─────┬──────┐
# │ w   ┆ z   ┆ x   ┆ y    │
# │ --- ┆ --- ┆ --- ┆ ---  │
# │ str ┆ cat ┆ i64 ┆ f64  │
# ╞═════╪═════╪═════╪══════╡
# │ xx  ┆ a   ┆ 2   ┆ 4.0  │
# │ xx  ┆ b   ┆ -2  ┆ -2.0 │
# │ yy  ┆ b   ┆ 6   ┆ 7.0  │
# └─────┴─────┴─────┴──────┘

Returns:

  • (SelectorProxy)


1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
# File 'lib/polars/selectors.rb', line 1166

def self.string(include_categorical: false)
  string_dtypes = [String]
  if include_categorical
    string_dtypes << Categorical
  end

  _selector_proxy_(
    F.col(string_dtypes),
    name: "string",
    parameters: {"include_categorical" => include_categorical},
  )
end

.timeSelectorProxy

Select all time columns.

Examples:

df = Polars::DataFrame.new(
  {
    "dtm" => [DateTime.new(2001, 5, 7, 10, 25), DateTime.new(2031, 12, 31, 0, 30)],
    "dt" => [Date.new(1999, 12, 31), Date.new(2024, 8, 9)],
    "tm" => [Time.utc(2001, 1, 1, 0, 0, 0), Time.utc(2001, 1, 1, 23, 59, 59)]
  },
  schema_overrides: {"tm" => Polars::Time}
)

Select all time columns:

df.select(Polars.cs.time)
# =>
# shape: (2, 1)
# ┌──────────┐
# │ tm       │
# │ ---      │
# │ time     │
# ╞══════════╡
# │ 00:00:00 │
# │ 23:59:59 │
# └──────────┘

Select all columns except for those that are times:

df.select(~Polars.cs.time)
# =>
# shape: (2, 2)
# ┌─────────────────────┬────────────┐
# │ dtm                 ┆ dt         │
# │ ---                 ┆ ---        │
# │ datetime[ns]        ┆ date       │
# ╞═════════════════════╪════════════╡
# │ 2001-05-07 10:25:00 ┆ 1999-12-31 │
# │ 2031-12-31 00:30:00 ┆ 2024-08-09 │
# └─────────────────────┴────────────┘

Returns:

  • (SelectorProxy)


1218
1219
1220
# File 'lib/polars/selectors.rb', line 1218

def self.time
  _selector_proxy_(F.col(Time), name: "time")
end

.unsigned_integerSelectorProxy

Select all unsigned integer columns.

Examples:

df = Polars::DataFrame.new(
  {
    "foo" => [-123, -456],
    "bar" => [3456, 6789],
    "baz" => [7654, 4321],
    "zap" => ["ab", "cd"]
  },
  schema_overrides: {"bar" => Polars::UInt32, "baz" => Polars::UInt64}
)

Select all unsigned integer columns:

df.select(Polars.cs.unsigned_integer)
# =>
# shape: (2, 2)
# ┌──────┬──────┐
# │ bar  ┆ baz  │
# │ ---  ┆ ---  │
# │ u32  ┆ u64  │
# ╞══════╪══════╡
# │ 3456 ┆ 7654 │
# │ 6789 ┆ 4321 │
# └──────┴──────┘

Select all columns except for those that are unsigned integers:

df.select(~Polars.cs.unsigned_integer)
# =>
# shape: (2, 2)
# ┌──────┬─────┐
# │ foo  ┆ zap │
# │ ---  ┆ --- │
# │ i64  ┆ str │
# ╞══════╪═════╡
# │ -123 ┆ ab  │
# │ -456 ┆ cd  │
# └──────┴─────┘

Select all integer columns (both signed and unsigned):

df.select(Polars.cs.integer)
# =>
# shape: (2, 3)
# ┌──────┬──────┬──────┐
# │ foo  ┆ bar  ┆ baz  │
# │ ---  ┆ ---  ┆ ---  │
# │ i64  ┆ u32  ┆ u64  │
# ╞══════╪══════╪══════╡
# │ -123 ┆ 3456 ┆ 7654 │
# │ -456 ┆ 6789 ┆ 4321 │
# └──────┴──────┴──────┘

Returns:

  • (SelectorProxy)


966
967
968
# File 'lib/polars/selectors.rb', line 966

def self.unsigned_integer
  _selector_proxy_(F.col(UNSIGNED_INTEGER_DTYPES), name: "unsigned_integer")
end