Class: SClust::KMean::DocumentClusterer
- Defined in:
- lib/sclust/kmean/doccluster.rb
Overview
A document clusterer that overrides the + operator to allow for adding Document objects.
Instance Attribute Summary collapse
-
#document_collection ⇒ Object
readonly
Returns the value of attribute document_collection.
Attributes inherited from Clusterer
#cluster_count, #clusters, #iterations, #logger, #points
Instance Method Summary collapse
- #<<(d) ⇒ Object
-
#initialize ⇒ DocumentClusterer
constructor
A new instance of DocumentClusterer.
-
#initialize_points ⇒ Object
This must be run to conver the document collection into the points in a cluster.
- #topics=(n) ⇒ Object
Methods inherited from Clusterer
#+, #assign_all_points, #cluster, #each_cluster, #get_max_terms, #rebuild_document_collection
Constructor Details
#initialize ⇒ DocumentClusterer
Returns a new instance of DocumentClusterer.
38 39 40 41 |
# File 'lib/sclust/kmean/doccluster.rb', line 38 def initialize() @document_collection = SClust::Util::DocumentCollection.new() super() end |
Instance Attribute Details
#document_collection ⇒ Object (readonly)
Returns the value of attribute document_collection.
36 37 38 |
# File 'lib/sclust/kmean/doccluster.rb', line 36 def document_collection @document_collection end |
Instance Method Details
#<<(d) ⇒ Object
43 44 45 46 47 48 49 |
# File 'lib/sclust/kmean/doccluster.rb', line 43 def <<(d) if ( d.is_a?(SClust::Util::Document) ) @document_collection << d else @document_collection << SClust::Util::Document.new(d.to_s) end end |
#initialize_points ⇒ Object
This must be run to conver the document collection into the points in a cluster.
53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
# File 'lib/sclust/kmean/doccluster.rb', line 53 def initialize_points() point_list = [] @document_collection.doclist.each do |doc| doc_terms = SClust::Util::SparseVector.new(0) # Buid a BIG term vector list for this document. doc.terms.each_key do |term| doc_terms[term] = doc.tf(term) - @document_collection.idf(term) end # def initialize(terms, values, source_object = nil) point_list << ClusterPoint.new(doc_terms, doc) end self.points = point_list end |
#topics=(n) ⇒ Object
74 75 76 77 78 79 |
# File 'lib/sclust/kmean/doccluster.rb', line 74 def topics=(n) initialize_points unless ( self.points && self.points.size > 0 ) super(n) end |