Class: Gulp::Corpus

Inherits:
Object
  • Object
show all
Defined in:
lib/gulp/corpus.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(database_directory) ⇒ Corpus

Returns a new instance of Corpus.



5
6
7
8
9
# File 'lib/gulp/corpus.rb', line 5

def initialize(database_directory)
  @database_directory = database_directory
  @processed_documents = Gulp::DataStore.new("#{@database_directory}/processed_documents")
  @phrase_document_counts = Gulp::DataStore.new("#{@database_directory}/phrase_document_counts")
end

Instance Attribute Details

#phrase_document_countsObject (readonly)

Returns the value of attribute phrase_document_counts.



4
5
6
# File 'lib/gulp/corpus.rb', line 4

def phrase_document_counts
  @phrase_document_counts
end

Instance Method Details

#already_processed?(document_name) ⇒ Boolean

Returns:

  • (Boolean)


15
16
17
# File 'lib/gulp/corpus.rb', line 15

def already_processed?(document_name)
  @processed_documents.has_key?(document_name)
end

#increment_phrase_document_count(phrase) ⇒ Object



27
28
29
# File 'lib/gulp/corpus.rb', line 27

def increment_phrase_document_count(phrase)
  @phrase_document_counts.increment(phrase)
end

#mark_as_processed!(document_name) ⇒ Object



11
12
13
# File 'lib/gulp/corpus.rb', line 11

def mark_as_processed!(document_name)
  @processed_documents.increment(document_name)
end

#number_of_unique_phrasesObject



23
24
25
# File 'lib/gulp/corpus.rb', line 23

def number_of_unique_phrases
  @phrase_document_counts.size
end

#phrase_document_count(phrase) ⇒ Object



31
32
33
# File 'lib/gulp/corpus.rb', line 31

def phrase_document_count(phrase)
  @phrase_document_counts[phrase]
end

#total_number_of_documentsObject



19
20
21
# File 'lib/gulp/corpus.rb', line 19

def total_number_of_documents
  @processed_documents.size
end