Class: RSemantic::Transform::TFIDF
- Inherits:
-
Object
- Object
- RSemantic::Transform::TFIDF
- Defined in:
- lib/rsemantic/transform/tf_idf_transform.rb
Constant Summary collapse
- @@number_of_documents_with_term =
[]
Class Method Summary collapse
Class Method Details
.number_of_documents_with_term(row_index, matrix) ⇒ Object
22 23 24 |
# File 'lib/rsemantic/transform/tf_idf_transform.rb', line 22 def self.number_of_documents_with_term(row_index, matrix) @@number_of_documents_with_term[row_index] ||= matrix.row(row_index).where.size end |
.transform!(matrix) ⇒ Object
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 |
# File 'lib/rsemantic/transform/tf_idf_transform.rb', line 6 def self.transform!(matrix) number_of_documents = matrix.size2 @@number_of_documents_with_term = [] matrix.transpose.enum_for(:each_row).with_index do |document, column_index| document.enum_for(:each).with_index do |term_weight, row_index| unless term_weight == 0.0 inverse_document_frequency = 1 + GSL::Sf.log(number_of_documents / (number_of_documents_with_term(row_index, matrix).to_f + 1)) term_frequency = Math.sqrt(term_weight) matrix[row_index, column_index] = term_frequency * inverse_document_frequency end end end end |