Method: Polars::Functions#concat

Defined in:
lib/polars/functions/eager.rb

#concat(items, rechunk: false, how: "vertical", parallel: true, strict: false) ⇒ Object

Aggregate multiple Dataframes/Series to a single DataFrame/Series.

Examples:

df1 = Polars::DataFrame.new({"a" => [1], "b" => [3]})
df2 = Polars::DataFrame.new({"a" => [2], "b" => [4]})
Polars.concat([df1, df2])  # default is 'vertical' strategy
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 1   ┆ 3   │
# │ 2   ┆ 4   │
# └─────┴─────┘
df1 = Polars::DataFrame.new({"a" => [1], "b" => [3]})
df2 = Polars::DataFrame.new({"a" => [2.5], "b" => [4]})
Polars.concat([df1, df2], how: "vertical_relaxed")  # 'a' coerced into f64
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ f64 ┆ i64 │
# ╞═════╪═════╡
# │ 1.0 ┆ 3   │
# │ 2.5 ┆ 4   │
# └─────┴─────┘
df_h1 = Polars::DataFrame.new({"l1" => [1, 2], "l2" => [3, 4]})
df_h2 = Polars::DataFrame.new({"r1" => [5, 6], "r2" => [7, 8], "r3" => [9, 10]})
Polars.concat([df_h1, df_h2], how: "horizontal")
# =>
# shape: (2, 5)
# ┌─────┬─────┬─────┬─────┬─────┐
# │ l1  ┆ l2  ┆ r1  ┆ r2  ┆ r3  │
# │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
# │ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 │
# ╞═════╪═════╪═════╪═════╪═════╡
# │ 1   ┆ 3   ┆ 5   ┆ 7   ┆ 9   │
# │ 2   ┆ 4   ┆ 6   ┆ 8   ┆ 10  │
# └─────┴─────┴─────┴─────┴─────┘
df_d1 = Polars::DataFrame.new({"a" => [1], "b" => [3]})
df_d2 = Polars::DataFrame.new({"a" => [2], "c" => [4]})
Polars.concat([df_d1, df_d2], how: "diagonal")
# =>
# shape: (2, 3)
# ┌─────┬──────┬──────┐
# │ a   ┆ b    ┆ c    │
# │ --- ┆ ---  ┆ ---  │
# │ i64 ┆ i64  ┆ i64  │
# ╞═════╪══════╪══════╡
# │ 1   ┆ 3    ┆ null │
# │ 2   ┆ null ┆ 4    │
# └─────┴──────┴──────┘
df_a1 = Polars::DataFrame.new({"id" => [1, 2], "x" => [3, 4]})
df_a2 = Polars::DataFrame.new({"id" => [2, 3], "y" => [5, 6]})
df_a3 = Polars::DataFrame.new({"id" => [1, 3], "z" => [7, 8]})
Polars.concat([df_a1, df_a2, df_a3], how: "align")
# =>
# shape: (3, 4)
# ┌─────┬──────┬──────┬──────┐
# │ id  ┆ x    ┆ y    ┆ z    │
# │ --- ┆ ---  ┆ ---  ┆ ---  │
# │ i64 ┆ i64  ┆ i64  ┆ i64  │
# ╞═════╪══════╪══════╪══════╡
# │ 1   ┆ 3    ┆ null ┆ 7    │
# │ 2   ┆ 4    ┆ 5    ┆ null │
# │ 3   ┆ null ┆ 6    ┆ 8    │
# └─────┴──────┴──────┴──────┘

Parameters:

  • items (Object)

    DataFrames/Series/LazyFrames to concatenate.

  • rechunk (Boolean) (defaults to: false)

    Make sure that all data is in contiguous memory.

  • how ("vertical", "vertical_relaxed", "diagonal", "diagonal_relaxed", "horizontal") (defaults to: "vertical")
    • Vertical: applies multiple vstack operations.
    • Diagonal: finds a union between the column schemas and fills missing column values with null.
    • Horizontal: stacks Series horizontally and fills with nulls if the lengths don't match.
  • parallel (Boolean) (defaults to: true)

    Only relevant for LazyFrames. This determines if the concatenated lazy computations may be executed in parallel.

  • strict (Boolean) (defaults to: false)

    When how=horizontal, require all DataFrames to be the same height, raising an error if not.

Returns:



98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
# File 'lib/polars/functions/eager.rb', line 98

def concat(items, rechunk: false, how: "vertical", parallel: true, strict: false)
  elems = items.to_a

  if elems.empty?
    raise ArgumentError, "cannot concat empty list"
  end

  if how == "align"
    if !elems[0].is_a?(DataFrame) && !elems[0].is_a?(LazyFrame)
      msg = "'align' strategy is not supported for #{elems[0].class.name}"
      raise TypeError, msg
    end

    # establish common columns, maintaining the order in which they appear
    all_columns = elems.flat_map { |e| e.collect_schema.names }
    key = all_columns.uniq.map.with_index.to_h
    common_cols = elems.map { |e| e.collect_schema.names }
      .reduce { |x, y| Set.new(x) & Set.new(y) }
      .sort_by { |k| key[k] }
    # we require at least one key column for 'align'
    if common_cols.empty?
      msg = "'align' strategy requires at least one common column"
      raise InvalidOperationError, msg
    end

    # align the frame data using a full outer join with no suffix-resolution
    # (so we raise an error in case of column collision, like "horizontal")
    lf = elems.map { |df| df.lazy }.reduce do |x, y|
      x.join(
        y,
        how: "full",
        on: common_cols,
        suffix: "_PL_CONCAT_RIGHT",
        maintain_order: "right_left"
      )
      # Coalesce full outer join columns
      .with_columns(
        common_cols.map { |name| F.coalesce([name, "#{name}_PL_CONCAT_RIGHT"]) }
      )
      .drop(common_cols.map { |name| "#{name}_PL_CONCAT_RIGHT" })
    end.sort(common_cols)

    eager = elems[0].is_a?(DataFrame)
    return eager ? lf.collect : lf
  end

  first = elems[0]

  if first.is_a?(DataFrame)
    if how == "vertical"
      out = Utils.wrap_df(Plr.concat_df(elems))
    elsif how == "vertical_relaxed"
      out = Utils.wrap_ldf(
        Plr.concat_lf(
          elems.map { |df| df.lazy },
          rechunk,
          parallel,
          true
        )
      ).collect(optimizations: QueryOptFlags._eager)
    elsif how == "diagonal"
      out = Utils.wrap_df(Plr.concat_df_diagonal(elems))
    elsif how == "diagonal_relaxed"
      out = Utils.wrap_ldf(
        Plr.concat_lf_diagonal(
          elems.map { |df| df.lazy },
          rechunk,
          parallel,
          true
        )
      ).collect(optimizations: QueryOptFlags._eager)
    elsif how == "horizontal"
      out = Utils.wrap_df(Plr.concat_df_horizontal(elems, strict))
    else
      raise ArgumentError, "how must be one of {{'vertical', 'vertical_relaxed', 'diagonal', 'diagonal_relaxed', 'horizontal'}}, got #{how}"
    end
  elsif first.is_a?(LazyFrame)
    if how == "vertical"
      return Utils.wrap_ldf(Plr.concat_lf(elems, rechunk, parallel, false))
    elsif how == "vertical_relaxed"
      return Utils.wrap_ldf(Plr.concat_lf(elems, rechunk, parallel, true))
    elsif how == "diagonal"
      return Utils.wrap_ldf(Plr.concat_lf_diagonal(elems, rechunk, parallel, false))
    elsif how == "diagonal_relaxed"
      return Utils.wrap_ldf(Plr.concat_lf_diagonal(elems, rechunk, parallel, true))
    elsif how == "horizontal"
      return Utils.wrap_ldf(Plr.concat_lf_horizontal(elems, parallel, strict))
    else
      raise ArgumentError, "Lazy only allows 'vertical', 'vertical_relaxed', 'diagonal', and 'diagonal_relaxed' concat strategy."
    end
  elsif first.is_a?(Series)
    if how == "vertical"
      out = Utils.wrap_s(Plr.concat_series(elems))
    else
      msg = "Series only supports 'vertical' concat strategy"
      raise ArgumentError, msg
    end
  elsif first.is_a?(Expr)
    out = first
    elems[1..-1].each do |e|
      out = out.append(e)
    end
  else
    raise ArgumentError, "did not expect type: #{first.class.name} in 'Polars.concat'."
  end

  if rechunk
    out.rechunk
  else
    out
  end
end