Class: Semantic::Transform::TFIDF

Inherits:
Object
  • Object
show all
Defined in:
lib/semantic/transform/tf_idf_transform.rb

Constant Summary collapse

@@number_of_documents_with_term =
[]

Class Method Summary collapse

Class Method Details

.number_of_documents_with_term(row_index, matrix) ⇒ Object



24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# File 'lib/semantic/transform/tf_idf_transform.rb', line 24

def self.number_of_documents_with_term(row_index, matrix)
  return @@number_of_documents_with_term[row_index] unless @@number_of_documents_with_term[row_index].nil?

  term_document_occurences = 0

  rows,cols = matrix.dimensions

  for n in (0...cols)
    if matrix[row_index, n] > 0 #Term appears in document
      term_document_occurences += 1
    end
  end
  @@number_of_documents_with_term[row_index] = term_document_occurences
  @@number_of_documents_with_term[row_index]
end

.transform(matrix) ⇒ Object



7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# File 'lib/semantic/transform/tf_idf_transform.rb', line 7

def self.transform(matrix)
  number_of_documents = matrix.num_columns
  @@number_of_documents_with_term = []

  matrix.columns.each_with_index do |document, column_index|
    document_term_total = document.rows.inject(0.0) {|word_sum, word_count| word_sum + word_count.to_f }

    document.rows.each_with_index do |term_weight, row_index|
      unless term_weight.to_f == 0.0
        matrix[row_index, column_index] = (term_weight / document_term_total) *
        Math.log((number_of_documents / number_of_documents_with_term(row_index, matrix).to_f).abs)
      end
    end
  end
  matrix
end