Class: Polars::StringNameSpace

Inherits:
Object
  • Object
show all
Defined in:
lib/polars/string_name_space.rb

Overview

Series.str namespace.

Instance Method Summary collapse

Dynamic Method Handling

This class handles dynamic methods through the method_missing method in the class Polars::ExprDispatch

Instance Method Details

#contains(pattern, literal: false) ⇒ Series

Check if strings in Series contain a substring that matches a regex.

Examples:

s = Polars::Series.new(["Crab", "cat and dog", "rab$bit", nil])
s.str.contains("cat|bit")
# =>
# shape: (4,)
# Series: '' [bool]
# [
#         false
#         true
#         true
#         null
# ]
s.str.contains("rab$", literal: true)
# =>
# shape: (4,)
# Series: '' [bool]
# [
#         false
#         false
#         true
#         null
# ]


290
291
292
# File 'lib/polars/string_name_space.rb', line 290

def contains(pattern, literal: false)
  super
end

#count_match(pattern) ⇒ Series

Count all successive non-overlapping regex matches.

Examples:

s = Polars::Series.new("foo", ["123 bla 45 asd", "xyz 678 910t"])
s.str.count_match('\d')
# =>
# shape: (2,)
# Series: 'foo' [u32]
# [
#         5
#         6
# ]


488
489
490
# File 'lib/polars/string_name_space.rb', line 488

def count_match(pattern)
  super
end

#decode(encoding, strict: false) ⇒ Series

Decode a value using the provided encoding.

Examples:

s = Polars::Series.new(["666f6f", "626172", nil])
s.str.decode("hex")
# =>
# shape: (3,)
# Series: '' [binary]
# [
#         b"foo"
#         b"bar"
#         null
# ]


361
362
363
# File 'lib/polars/string_name_space.rb', line 361

def decode(encoding, strict: false)
  super
end

#encode(encoding) ⇒ Series

Encode a value using the provided encoding.

Examples:

s = Polars::Series.new(["foo", "bar", nil])
s.str.encode("hex")
# =>
# shape: (3,)
# Series: '' [str]
# [
#         "666f6f"
#         "626172"
#         null
# ]


383
384
385
# File 'lib/polars/string_name_space.rb', line 383

def encode(encoding)
  super
end

#ends_with(sub) ⇒ Series

Check if string values end with a substring.

Examples:

s = Polars::Series.new("fruits", ["apple", "mango", nil])
s.str.ends_with("go")
# =>
# shape: (3,)
# Series: 'fruits' [bool]
# [
#         false
#         true
#         null
# ]


312
313
314
# File 'lib/polars/string_name_space.rb', line 312

def ends_with(sub)
  super
end

#extract(pattern, group_index: 1) ⇒ Series

Extract the target capture group from provided patterns.

Examples:

df = Polars::DataFrame.new({"foo" => ["123 bla 45 asd", "xyz 678 910t"]})
df.select([Polars.col("foo").str.extract('(\d+)')])
# =>
# shape: (2, 1)
# ┌─────┐
# │ foo │
# │ --- │
# │ str │
# ╞═════╡
# │ 123 │
# │ 678 │
# └─────┘


443
444
445
# File 'lib/polars/string_name_space.rb', line 443

def extract(pattern, group_index: 1)
  super
end

#extract_all(pattern) ⇒ Series

Extracts all matches for the given regex pattern.

Extract each successive non-overlapping regex match in an individual string as an array

Examples:

s = Polars::Series.new("foo", ["123 bla 45 asd", "xyz 678 910t"])
s.str.extract_all('(\d+)')
# =>
# shape: (2,)
# Series: 'foo' [list[str]]
# [
#         ["123", "45"]
#         ["678", "910"]
# ]


467
468
469
# File 'lib/polars/string_name_space.rb', line 467

def extract_all(pattern)
  super
end

#join(delimiter = "-", ignore_nulls: true) ⇒ Series Also known as: concat

Vertically concat the values in the Series to a single string value.

Examples:

Polars::Series.new([1, nil, 2]).str.join("-")
# =>
# shape: (1,)
# Series: '' [str]
# [
#         "1-2"
# ]
Polars::Series.new([1, nil, 2]).str.join("-", ignore_nulls: false)
# =>
# shape: (1,)
# Series: '' [str]
# [
#         null
# ]


252
253
254
# File 'lib/polars/string_name_space.rb', line 252

def join(delimiter = "-", ignore_nulls: true)
  super
end

#json_path_match(json_path) ⇒ Series

Extract the first match of json string with provided JSONPath expression.

Throw errors if encounter invalid json strings. All return value will be casted to Utf8 regardless of the original value.

Documentation on JSONPath standard can be found here.

Examples:

df = Polars::DataFrame.new(
  {"json_val" => ['{"a":"1"}', nil, '{"a":2}', '{"a":2.1}', '{"a":true}']}
)
df.select(Polars.col("json_val").str.json_path_match("$.a"))[0.., 0]
# =>
# shape: (5,)
# Series: 'json_val' [str]
# [
#         "1"
#         null
#         "2"
#         "2.1"
#         "true"
# ]


415
416
417
# File 'lib/polars/string_name_space.rb', line 415

def json_path_match(json_path)
  super
end

#lengthsSeries

Note:

The returned lengths are equal to the number of bytes in the UTF8 string. If you need the length in terms of the number of characters, use n_chars instead.

Get length of the string values in the Series (as number of bytes).

Examples:

s = Polars::Series.new(["Café", nil, "345", "東京"])
s.str.lengths
# =>
# shape: (4,)
# Series: '' [u32]
# [
#         5
#         null
#         3
#         6
# ]


200
201
202
# File 'lib/polars/string_name_space.rb', line 200

def lengths
  super
end

#ljust(width, fillchar = " ") ⇒ Series

Return the string left justified in a string of length width.

Padding is done using the specified fillchar. The original string is returned if width is less than or equal to s.length.

Examples:

s = Polars::Series.new("a", ["cow", "monkey", nil, "hippopotamus"])
s.str.ljust(8, "*")
# =>
# shape: (4,)
# Series: 'a' [str]
# [
#         "cow*****"
#         "monkey**"
#         null
#         "hippopotamus"
# ]


774
775
776
# File 'lib/polars/string_name_space.rb', line 774

def ljust(width, fillchar = " ")
  super
end

#n_charsSeries

Note:

If you know that you are working with ASCII text, lengths will be equivalent, and faster (returns length in terms of the number of bytes).

Get length of the string values in the Series (as number of chars).

Examples:

s = Polars::Series.new(["Café", nil, "345", "東京"])
s.str.n_chars
# =>
# shape: (4,)
# Series: '' [u32]
# [
#         4
#         null
#         3
#         2
# ]


224
225
226
# File 'lib/polars/string_name_space.rb', line 224

def n_chars
  super
end

#replace(pattern, value, literal: false) ⇒ Series

Replace first matching regex/literal substring with a new string value.

Examples:

s = Polars::Series.new(["123abc", "abc456"])
s.str.replace('abc\b', "ABC")
# =>
# shape: (2,)
# Series: '' [str]
# [
#         "123ABC"
#         "abc456"
# ]


626
627
628
# File 'lib/polars/string_name_space.rb', line 626

def replace(pattern, value, literal: false)
  super
end

#replace_all(pattern, value, literal: false) ⇒ Series

Replace all matching regex/literal substrings with a new string value.

Examples:

df = Polars::Series.new(["abcabc", "123a123"])
df.str.replace_all("a", "-")
# =>
# shape: (2,)
# Series: '' [str]
# [
#         "-bc-bc"
#         "123-123"
# ]


651
652
653
# File 'lib/polars/string_name_space.rb', line 651

def replace_all(pattern, value, literal: false)
  super
end

#rjust(width, fillchar = " ") ⇒ Series

Return the string right justified in a string of length width.

Padding is done using the specified fillchar. The original string is returned if width is less than or equal to s.length.

Examples:

s = Polars::Series.new("a", ["cow", "monkey", nil, "hippopotamus"])
s.str.rjust(8, "*")
# =>
# shape: (4,)
# Series: 'a' [str]
# [
#         "*****cow"
#         "**monkey"
#         null
#         "hippopotamus"
# ]


802
803
804
# File 'lib/polars/string_name_space.rb', line 802

def rjust(width, fillchar = " ")
  super
end

#slice(offset, length = nil) ⇒ Series

Create subslices of the string values of a Utf8 Series.

Examples:

s = Polars::Series.new("s", ["pear", nil, "papaya", "dragonfruit"])
s.str.slice(-3)
# =>
# shape: (4,)
# Series: 's' [str]
# [
#         "ear"
#         null
#         "aya"
#         "uit"
# ]

Using the optional length parameter

s.str.slice(4, 3)
# =>
# shape: (4,)
# Series: 's' [str]
# [
#         ""
#         null
#         "ya"
#         "onf"
# ]


876
877
878
879
# File 'lib/polars/string_name_space.rb', line 876

def slice(offset, length = nil)
  s = Utils.wrap_s(_s)
  s.to_frame.select(Polars.col(s.name).str.slice(offset, length)).to_series
end

#split(by, inclusive: false) ⇒ Series

Split the string by a substring.



500
501
502
# File 'lib/polars/string_name_space.rb', line 500

def split(by, inclusive: false)
  super
end

#split_exact(by, n, inclusive: false) ⇒ Series

Split the string by a substring using n splits.

Results in a struct of n+1 fields.

If it cannot make n splits, the remaining field elements will be null.

Examples:

df = Polars::DataFrame.new({"x" => ["a_1", nil, "c", "d_4"]})
df["x"].str.split_exact("_", 1).alias("fields")
# =>
# shape: (4,)
# Series: 'fields' [struct[2]]
# [
#         {"a","1"}
#         {null,null}
#         {"c",null}
#         {"d","4"}
# ]

Split string values in column x in exactly 2 parts and assign each part to a new column.

df["x"]
  .str.split_exact("_", 1)
  .struct.rename_fields(["first_part", "second_part"])
  .alias("fields")
  .to_frame
  .unnest("fields")
# =>
# shape: (4, 2)
# ┌────────────┬─────────────┐
# │ first_part ┆ second_part │
# │ ---        ┆ ---         │
# │ str        ┆ str         │
# ╞════════════╪═════════════╡
# │ a          ┆ 1           │
# │ null       ┆ null        │
# │ c          ┆ null        │
# │ d          ┆ 4           │
# └────────────┴─────────────┘


551
552
553
# File 'lib/polars/string_name_space.rb', line 551

def split_exact(by, n, inclusive: false)
  super
end

#splitn(by, n) ⇒ Series

Split the string by a substring, restricted to returning at most n items.

If the number of possible splits is less than n-1, the remaining field elements will be null. If the number of possible splits is n-1 or greater, the last (nth) substring will contain the remainder of the string.

Examples:

df = Polars::DataFrame.new({"s" => ["foo bar", nil, "foo-bar", "foo bar baz"]})
df["s"].str.splitn(" ", 2).alias("fields")
# =>
# shape: (4,)
# Series: 'fields' [struct[2]]
# [
#         {"foo","bar"}
#         {null,null}
#         {"foo-bar",null}
#         {"foo","bar baz"}
# ]

Split string values in column s in exactly 2 parts and assign each part to a new column.

df["s"]
  .str.splitn(" ", 2)
  .struct.rename_fields(["first_part", "second_part"])
  .alias("fields")
  .to_frame
  .unnest("fields")
# =>
# shape: (4, 2)
# ┌────────────┬─────────────┐
# │ first_part ┆ second_part │
# │ ---        ┆ ---         │
# │ str        ┆ str         │
# ╞════════════╪═════════════╡
# │ foo        ┆ bar         │
# │ null       ┆ null        │
# │ foo-bar    ┆ null        │
# │ foo        ┆ bar baz     │
# └────────────┴─────────────┘


600
601
602
603
# File 'lib/polars/string_name_space.rb', line 600

def splitn(by, n)
  s = Utils.wrap_s(_s)
  s.to_frame.select(Polars.col(s.name).str.splitn(by, n)).to_series
end

#starts_with(sub) ⇒ Series

Check if string values start with a substring.

Examples:

s = Polars::Series.new("fruits", ["apple", "mango", nil])
s.str.starts_with("app")
# =>
# shape: (3,)
# Series: 'fruits' [bool]
# [
#         true
#         false
#         null
# ]


334
335
336
# File 'lib/polars/string_name_space.rb', line 334

def starts_with(sub)
  super
end

#strip_chars(matches = nil) ⇒ Series

Remove leading and trailing whitespace.

Examples:

s = Polars::Series.new([" hello ", "\tworld"])
s.str.strip_chars
# =>
# shape: (2,)
# Series: '' [str]
# [
#         "hello"
#         "world"
# ]


672
673
674
# File 'lib/polars/string_name_space.rb', line 672

def strip_chars(matches = nil)
  super
end

#strip_chars_end(matches = nil) ⇒ Series Also known as: rstrip

Remove trailing whitespace.

Examples:

s = Polars::Series.new([" hello ", "world\t"])
s.str.strip_chars_end
# =>
# shape: (2,)
# Series: '' [str]
# [
#         " hello"
#         "world"
# ]


715
716
717
# File 'lib/polars/string_name_space.rb', line 715

def strip_chars_end(matches = nil)
  super
end

#strip_chars_start(matches = nil) ⇒ Series Also known as: lstrip

Remove leading whitespace.

Examples:

s = Polars::Series.new([" hello ", "\tworld"])
s.str.strip_chars_start
# =>
# shape: (2,)
# Series: '' [str]
# [
#         "hello "
#         "world"
# ]


693
694
695
# File 'lib/polars/string_name_space.rb', line 693

def strip_chars_start(matches = nil)
  super
end

#strptime(datatype, fmt = nil, strict: true, exact: true, cache: true) ⇒ Series

Parse a Series of dtype Utf8 to a Date/Datetime Series.

Examples:

Dealing with a consistent format:

s = Polars::Series.new(["2020-01-01 01:00Z", "2020-01-01 02:00Z"])
s.str.strptime(Polars::Datetime, "%Y-%m-%d %H:%M%#z")
# =>
# shape: (2,)
# Series: '' [datetime[μs, UTC]]
# [
#         2020-01-01 01:00:00 UTC
#         2020-01-01 02:00:00 UTC
# ]

Dealing with different formats.

s = Polars::Series.new(
  "date",
  [
    "2021-04-22",
    "2022-01-04 00:00:00",
    "01/31/22",
    "Sun Jul  8 00:34:60 2001"
  ]
)
s.to_frame.select(
  Polars.coalesce(
    Polars.col("date").str.strptime(Polars::Date, "%F", strict: false),
    Polars.col("date").str.strptime(Polars::Date, "%F %T", strict: false),
    Polars.col("date").str.strptime(Polars::Date, "%D", strict: false),
    Polars.col("date").str.strptime(Polars::Date, "%c", strict: false)
  )
).to_series
# =>
# shape: (4,)
# Series: 'date' [date]
# [
#         2021-04-22
#         2022-01-04
#         2022-01-31
#         2001-07-08
# ]


176
177
178
# File 'lib/polars/string_name_space.rb', line 176

def strptime(datatype, fmt = nil, strict: true, exact: true, cache: true)
  super
end

#to_date(format = nil, strict: true, exact: true, cache: true) ⇒ Series

Convert a Utf8 column into a Date column.

Examples:

s = Polars::Series.new(["2020/01/01", "2020/02/01", "2020/03/01"])
s.str.to_date
# =>
# shape: (3,)
# Series: '' [date]
# [
#         2020-01-01
#         2020-02-01
#         2020-03-01
# ]


41
42
43
# File 'lib/polars/string_name_space.rb', line 41

def to_date(format = nil, strict: true, exact: true, cache: true)
  super
end

#to_datetime(format = nil, time_unit: nil, time_zone: nil, strict: true, exact: true, cache: true, ambiguous: "raise") ⇒ Series

Convert a Utf8 column into a Datetime column.

Examples:

s = Polars::Series.new(["2020-01-01 01:00Z", "2020-01-01 02:00Z"])
s.str.to_datetime("%Y-%m-%d %H:%M%#z")
# =>
# shape: (2,)
# Series: '' [datetime[μs, UTC]]
# [
#         2020-01-01 01:00:00 UTC
#         2020-01-01 02:00:00 UTC
# ]


79
80
81
82
83
84
85
86
87
88
89
# File 'lib/polars/string_name_space.rb', line 79

def to_datetime(
  format = nil,
  time_unit: nil,
  time_zone: nil,
  strict: true,
  exact: true,
  cache: true,
  ambiguous: "raise"
)
  super
end

#to_lowercaseSeries

Modify the strings to their lowercase equivalent.

Examples:

s = Polars::Series.new("foo", ["CAT", "DOG"])
s.str.to_lowercase
# =>
# shape: (2,)
# Series: 'foo' [str]
# [
#         "cat"
#         "dog"
# ]


820
821
822
# File 'lib/polars/string_name_space.rb', line 820

def to_lowercase
  super
end

#to_time(format = nil, strict: true, cache: true) ⇒ Series

Convert a Utf8 column into a Time column.

Examples:

s = Polars::Series.new(["01:00", "02:00", "03:00"])
s.str.to_time("%H:%M")
# =>
# shape: (3,)
# Series: '' [time]
# [
#         01:00:00
#         02:00:00
#         03:00:00
# ]


116
117
118
# File 'lib/polars/string_name_space.rb', line 116

def to_time(format = nil, strict: true, cache: true)
  super
end

#to_uppercaseSeries

Modify the strings to their uppercase equivalent.

Examples:

s = Polars::Series.new("foo", ["cat", "dog"])
s.str.to_uppercase
# =>
# shape: (2,)
# Series: 'foo' [str]
# [
#         "CAT"
#         "DOG"
# ]


838
839
840
# File 'lib/polars/string_name_space.rb', line 838

def to_uppercase
  super
end

#zfill(length) ⇒ Series

Fills the string with zeroes.

Return a copy of the string left filled with ASCII '0' digits to make a string of length width.

A leading sign prefix ('+'/'-') is handled by inserting the padding after the sign character rather than before. The original string is returned if width is less than or equal to s.length.

Examples:

s = Polars::Series.new([-1, 123, 999999, nil])
s.cast(Polars::String).str.zfill(4)
# =>
# shape: (4,)
# Series: '' [str]
# [
#         "-001"
#         "0123"
#         "999999"
#         null
# ]


746
747
748
# File 'lib/polars/string_name_space.rb', line 746

def zfill(length)
  super
end