Class: Gulp::Document
- Inherits:
-
Object
- Object
- Gulp::Document
- Defined in:
- lib/gulp/document.rb
Instance Attribute Summary collapse
-
#corpus ⇒ Object
readonly
Returns the value of attribute corpus.
-
#name ⇒ Object
readonly
Returns the value of attribute name.
-
#phrase_counts ⇒ Object
readonly
Returns the value of attribute phrase_counts.
-
#word_count ⇒ Object
readonly
Returns the value of attribute word_count.
Instance Method Summary collapse
- #add_text(text) ⇒ Object
- #add_to_corpus! ⇒ Object
- #already_processed? ⇒ Boolean
- #finalized? ⇒ Boolean
-
#initialize(name, corpus) ⇒ Document
constructor
A new instance of Document.
- #number_of_unique_phrases ⇒ Object
- #phrases ⇒ Object
- #process! ⇒ Object
Constructor Details
#initialize(name, corpus) ⇒ Document
Returns a new instance of Document.
5 6 7 8 9 10 11 12 |
# File 'lib/gulp/document.rb', line 5 def initialize(name, corpus) @name = name @corpus = corpus @word_count = 0 @finalized = false @phrase_counts = {}#Gulp::DataStore.new('document') @extractor = Gulp::PhraseExtractor.new end |
Instance Attribute Details
#corpus ⇒ Object (readonly)
Returns the value of attribute corpus.
3 4 5 |
# File 'lib/gulp/document.rb', line 3 def corpus @corpus end |
#name ⇒ Object (readonly)
Returns the value of attribute name.
3 4 5 |
# File 'lib/gulp/document.rb', line 3 def name @name end |
#phrase_counts ⇒ Object (readonly)
Returns the value of attribute phrase_counts.
3 4 5 |
# File 'lib/gulp/document.rb', line 3 def phrase_counts @phrase_counts end |
#word_count ⇒ Object (readonly)
Returns the value of attribute word_count.
3 4 5 |
# File 'lib/gulp/document.rb', line 3 def word_count @word_count end |
Instance Method Details
#add_text(text) ⇒ Object
39 40 41 42 43 44 45 46 47 48 |
# File 'lib/gulp/document.rb', line 39 def add_text(text) raise "cannot add text once finalized" if finalized? word_count, phrases = @extractor.extract(text) @word_count += word_count phrases.each do |phrase| @phrase_counts[phrase] ||= 0 @phrase_counts[phrase] += 1 end end |
#add_to_corpus! ⇒ Object
28 29 30 31 32 33 34 35 36 37 |
# File 'lib/gulp/document.rb', line 28 def add_to_corpus! unless already_processed? @finalized = true @phrase_counts.each_key do |phrase| @corpus.increment_phrase_document_count(phrase) end @corpus.mark_as_processed!(name) end end |
#already_processed? ⇒ Boolean
20 21 22 |
# File 'lib/gulp/document.rb', line 20 def already_processed? @corpus.already_processed?(name) end |
#finalized? ⇒ Boolean
24 25 26 |
# File 'lib/gulp/document.rb', line 24 def finalized? @finalized end |
#number_of_unique_phrases ⇒ Object
50 51 52 |
# File 'lib/gulp/document.rb', line 50 def number_of_unique_phrases phrase_counts.size end |
#phrases ⇒ Object
54 55 56 57 58 |
# File 'lib/gulp/document.rb', line 54 def phrases phrase_counts.map do |phrase, count| Phrase.new(self, phrase, count) end end |
#process! ⇒ Object
14 15 16 17 18 |
# File 'lib/gulp/document.rb', line 14 def process! extractor = XMLTextExtractor.new(self) Nokogiri::XML::SAX::Parser.new(extractor).parse(File.open(name)) self end |