Class: FeldtRuby::NgramWordCounter
- Inherits:
-
WordCounter
- Object
- WordCounter
- FeldtRuby::NgramWordCounter
- Defined in:
- lib/feldtruby/word_counter.rb
Constant Summary
Constants inherited from WordCounter
Instance Method Summary collapse
- #all_ngrams(array) ⇒ Object
- #count_words(words) ⇒ Object
-
#initialize(n = 2) ⇒ NgramWordCounter
constructor
A new instance of NgramWordCounter.
Methods inherited from WordCounter
#count, #count_word, #invidual_words_in_string, #is_stop_word?, #merge!, #preprocess_word, #top_words, #words
Constructor Details
#initialize(n = 2) ⇒ NgramWordCounter
Returns a new instance of NgramWordCounter.
78 79 80 81 |
# File 'lib/feldtruby/word_counter.rb', line 78 def initialize(n = 2) super() @n = n end |
Instance Method Details
#all_ngrams(array) ⇒ Object
90 91 92 93 94 95 96 97 98 99 |
# File 'lib/feldtruby/word_counter.rb', line 90 def all_ngrams(array) res = [] length = array.length index = 0 while (length - index) >= @n res << array[index, @n] index += 1 end res end |
#count_words(words) ⇒ Object
82 83 84 85 86 87 88 89 |
# File 'lib/feldtruby/word_counter.rb', line 82 def count_words(words) # Split sentences, get words in each sentence, create n-grams, filter n-grams containing stop words, and count remaining words.split(/\.\s+(?=[A-Z]{1})/).each do |sentence| ngrams = all_ngrams(invidual_words_in_string(sentence)) non_stop_ngrams = ngrams.select {|ngram| !ngram.any? {|ngw| is_stop_word?(ngw)}} non_stop_ngrams.each {|ngram| count_word(ngram.join(' '))} end end |