Class: Lda::Corpus

Inherits:
Object
  • Object
show all
Defined in:
lib/lda-ruby/corpus/corpus.rb,
ext/lda-ruby/lda-inference.c

Direct Known Subclasses

DataCorpus, DirectoryCorpus, TextCorpus

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(stop_word_list = nil) ⇒ Corpus

Returns a new instance of Corpus.



8
9
10
11
12
13
14
15
16
17
18
19
20
# File 'lib/lda-ruby/corpus/corpus.rb', line 8

def initialize(stop_word_list = nil)
  @documents = []
  @all_terms = Set.new
  @num_terms = @num_docs = 0
  @vocabulary = Vocabulary.new
  @stopwords =  if stop_word_list.nil?
                  File.join(File.dirname(__FILE__), '..', 'config', 'stopwords.yml')
                else
                  stop_word_list
                end
  @stopwords = YAML.load_file(@stopwords)
  @stopwords.map!(&:strip)
end

Instance Attribute Details

#documentsObject (readonly)

Returns the value of attribute documents.



6
7
8
# File 'lib/lda-ruby/corpus/corpus.rb', line 6

def documents
  @documents
end

#num_docsObject (readonly)

Returns the value of attribute num_docs.



6
7
8
# File 'lib/lda-ruby/corpus/corpus.rb', line 6

def num_docs
  @num_docs
end

#num_termsObject (readonly)

Returns the value of attribute num_terms.



6
7
8
# File 'lib/lda-ruby/corpus/corpus.rb', line 6

def num_terms
  @num_terms
end

#stopwordsObject (readonly)

Returns the value of attribute stopwords.



6
7
8
# File 'lib/lda-ruby/corpus/corpus.rb', line 6

def stopwords
  @stopwords
end

#vocabularyObject (readonly)

Returns the value of attribute vocabulary.



6
7
8
# File 'lib/lda-ruby/corpus/corpus.rb', line 6

def vocabulary
  @vocabulary
end

Instance Method Details

#add_document(doc) ⇒ Object



22
23
24
25
26
27
28
29
30
31
32
33
# File 'lib/lda-ruby/corpus/corpus.rb', line 22

def add_document(doc)
  raise 'Parameter +doc+ must be of type Document' unless doc.is_a?(Document)

  @documents << doc

  @all_terms += doc.words
  @num_docs += 1
  @num_terms = @all_terms.size

  update_vocabulary(doc)
  nil
end

#remove_word(word) ⇒ Object



35
36
37
# File 'lib/lda-ruby/corpus/corpus.rb', line 35

def remove_word(word)
  @vocabulary.words.delete word
end