Class: Spark::Mllib::SparseVector

Inherits:
VectorBase show all
Defined in:
lib/spark/mllib/vector.rb

Overview

A sparse vector represented by an index array and an value array.

Sparse vector is a vector in which most of the elements are zero.

Example:

SparseVector.new(4, {1 => 1.0, 3 => 5.5}).values
# => [0, 1.0, 0, 5.5]

SparseVector.new(4, [[1, 3], [1.0, 5.5]]).values
# => [0, 1.0, 0, 5.5]

SparseVector.new(4, [1, 3], [1.0, 5.5]).values
# => [0, 1.0, 0, 5.5]

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from VectorAdapter

#[]=, #dot, new, #squared_distance, #values

Methods inherited from Vector

elements

Constructor Details

#initialize(arg1, arg2 = nil, arg3 = nil) ⇒ SparseVector

Returns a new instance of SparseVector.



131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# File 'lib/spark/mllib/vector.rb', line 131

def initialize(arg1, arg2=nil, arg3=nil)
    super(:sparse, arg1)

    if arg2.is_a?(Hash)
      @indices = arg2.keys
      @values = arg2.values
    else
      @indices = arg2
      @values = arg3
    end

    @indices.zip(@values).each do |(index, value)|
      self[index] = value
    end
end

Instance Attribute Details

#indicesObject (readonly)

Returns the value of attribute indices.



129
130
131
# File 'lib/spark/mllib/vector.rb', line 129

def indices
  @indices
end

Class Method Details

.parse(data) ⇒ Object

Covert string to vector

SparseVector.parse("(5,[1,4],[3.0,5.0])")


151
152
153
154
155
156
157
158
159
160
161
162
163
164
# File 'lib/spark/mllib/vector.rb', line 151

def self.parse(data)
  data = data.match(/\(([0-9]+)[ ]*,[ ]*\[([0-9,. ]*)\][ ]*,[ ]*\[([0-9,. ]*)\]\)/)
  if data
    size = data[1].to_i
    indices = data[2].split(',')
    indices.map!(&:to_i)
    values = data[3].split(',')
    values.map!(&:to_f)

    SparseVector.new(size, indices, values)
  else
    raise ArgumentError, 'Unknow format for SparseVector.'
  end
end

Instance Method Details

#marshal_dumpObject



175
176
177
# File 'lib/spark/mllib/vector.rb', line 175

def marshal_dump
  [size, indices, values]
end

#marshal_load(array) ⇒ Object



179
180
181
# File 'lib/spark/mllib/vector.rb', line 179

def marshal_load(array)
  initialize(array[0], array[1], array[2])
end

#to_sObject

Convert vector to string

SparseVector.new(5, {1 => 3, 4 => 5}).to_s
# => "(5,[1,4],[3.0,5.0])"


171
172
173
# File 'lib/spark/mllib/vector.rb', line 171

def to_s
  "(#{size},[#{indices.join(',')}],[#{values.join(',')}])"
end