Class: Spark::Mllib::GaussianMixtureModel

Inherits:
Object
  • Object
show all
Defined in:
lib/spark/mllib/clustering/gaussian_mixture.rb

Overview

GaussianMixtureModel

A clustering model derived from the Gaussian Mixture Model method.

Examples:

Spark::Mllib.import

data = [
  DenseVector.new([-0.1, -0.05]),
  DenseVector.new([-0.01, -0.1]),
  DenseVector.new([0.9, 0.8]),
  DenseVector.new([0.75, 0.935]),
  DenseVector.new([-0.83, -0.68]),
  DenseVector.new([-0.91, -0.76])
]

model = GaussianMixture.train($sc.parallelize(data), 3, convergence_tol: 0.0001, max_iterations: 50, seed: 10)

labels = model.predict($sc.parallelize(data)).collect

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(weights, gaussians) ⇒ GaussianMixtureModel

Returns a new instance of GaussianMixtureModel.



29
30
31
32
33
# File 'lib/spark/mllib/clustering/gaussian_mixture.rb', line 29

def initialize(weights, gaussians)
  @weights = weights
  @gaussians = gaussians
  @k = weights.size
end

Instance Attribute Details

#gaussiansObject (readonly)

Returns the value of attribute gaussians.



27
28
29
# File 'lib/spark/mllib/clustering/gaussian_mixture.rb', line 27

def gaussians
  @gaussians
end

#kObject (readonly)

Returns the value of attribute k.



27
28
29
# File 'lib/spark/mllib/clustering/gaussian_mixture.rb', line 27

def k
  @k
end

#weightsObject (readonly)

Returns the value of attribute weights.



27
28
29
# File 'lib/spark/mllib/clustering/gaussian_mixture.rb', line 27

def weights
  @weights
end

Instance Method Details

#meansObject



50
51
52
# File 'lib/spark/mllib/clustering/gaussian_mixture.rb', line 50

def means
  @means ||= @gaussians.map(&:mu)
end

#predict(rdd) ⇒ Object

Find the cluster to which the points in ‘x’ has maximum membership in this model.



37
38
39
40
41
42
43
# File 'lib/spark/mllib/clustering/gaussian_mixture.rb', line 37

def predict(rdd)
  if rdd.is_a?(Spark::RDD)
    predict_soft(rdd).map('lambda{|x| x.index(x.max)}')
  else
    raise ArgumentError, 'Argument must be a RDD.'
  end
end

#predict_soft(rdd) ⇒ Object

Find the membership of each point in ‘x’ to all mixture components.



46
47
48
# File 'lib/spark/mllib/clustering/gaussian_mixture.rb', line 46

def predict_soft(rdd)
  Spark.jb.call(RubyMLLibAPI.new, 'predictSoftGMM', rdd, weights, means, sigmas)
end

#sigmasObject



54
55
56
# File 'lib/spark/mllib/clustering/gaussian_mixture.rb', line 54

def sigmas
  @sigmas ||= @gaussians.map(&:sigma)
end