Method: Polars::Functions#corr

Defined in:
lib/polars/functions/lazy.rb

#corr(a, b, method: "pearson", ddof: nil, propagate_nans: false, eager: false) ⇒ Expr

Compute the Pearson's or Spearman rank correlation correlation between two columns.

Examples:

Pearson's correlation:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.corr("a", "b"))
# =>
# shape: (1, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 0.544705 │
# └──────────┘

Spearman rank correlation:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.corr("a", "b", method: "spearman"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 0.5 │
# └─────┘

Eager evaluation:

s1 = Polars::Series.new("a", [1, 8, 3])
s2 = Polars::Series.new("b", [4, 5, 2])
Polars.corr(s1, s2, eager: true)
# =>
# shape: (1,)
# Series: 'a' [f64]
# [
#         0.544705
# ]
Polars.corr(s1, s2, method: "spearman", eager: true)
# =>
# shape: (1,)
# Series: 'a' [f64]
# [
#         0.5
# ]

Parameters:

  • a (Object)

    Column name or Expression.

  • b (Object)

    Column name or Expression.

  • method ("pearson", "spearman") (defaults to: "pearson")

    Correlation method.

  • ddof (Integer) (defaults to: nil)

    "Delta Degrees of Freedom": the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1.

  • propagate_nans (Boolean) (defaults to: false)

    If true any NaN encountered will lead to NaN in the output. Defaults to false where NaN are regarded as larger than any finite number and thus lead to the highest rank.

  • eager (Boolean) (defaults to: false)

    Evaluate immediately and return a Series; this requires that at least one of the given arguments is a Series. If set to false (default), return an expression instead.

Returns:



774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
# File 'lib/polars/functions/lazy.rb', line 774

def corr(
  a,
  b,
  method: "pearson",
  ddof: nil,
  propagate_nans: false,
  eager: false
)
  if !ddof.nil?
    Utils.issue_deprecation_warning(
      "The `ddof` parameter has no effect. Do not use it."
    )
  end

  if eager
    if !(a.is_a?(Series) || b.is_a?(Series))
      msg = "expected at least one Series in 'corr' inputs if 'eager: true'"
      raise ArgumentError, msg
    end

    frame = Polars::DataFrame.new([a, b].filter_map { |e| e if e.is_a?(Series) })
    exprs = [a, b].map { |e| e.is_a?(Series) ? e.name : e }
    frame.select(
      corr(*exprs, eager: false, method: method, propagate_nans: propagate_nans)
    ).to_series
  else
    a = Utils.parse_into_expression(a)
    b = Utils.parse_into_expression(b)

    if method == "pearson"
      Utils.wrap_expr(Plr.pearson_corr(a, b))
    elsif method == "spearman"
      Utils.wrap_expr(Plr.spearman_rank_corr(a, b, propagate_nans))
    else
      msg = "method must be one of {{'pearson', 'spearman'}}, got #{method}"
      raise ArgumentError, msg
    end
  end
end