Class: FeldtRuby::NgramWordCounter

Inherits:
WordCounter show all
Defined in:
lib/feldtruby/word_counter.rb

Constant Summary

Constants inherited from WordCounter

WordCounter::StopWords

Instance Method Summary collapse

Methods inherited from WordCounter

#count, #count_word, #invidual_words_in_string, #is_stop_word?, #merge!, #preprocess_word, #top_words, #words

Constructor Details

#initialize(n = 2) ⇒ NgramWordCounter

Returns a new instance of NgramWordCounter.



78
79
80
81
# File 'lib/feldtruby/word_counter.rb', line 78

def initialize(n = 2)
  super()
  @n = n
end

Instance Method Details

#all_ngrams(array) ⇒ Object



90
91
92
93
94
95
96
97
98
99
# File 'lib/feldtruby/word_counter.rb', line 90

def all_ngrams(array)
  res = []
  length = array.length
  index = 0
  while (length - index) >= @n
    res << array[index, @n]
    index += 1
  end
  res
end

#count_words(words) ⇒ Object



82
83
84
85
86
87
88
89
# File 'lib/feldtruby/word_counter.rb', line 82

def count_words(words)
  # Split sentences, get words in each sentence, create n-grams, filter n-grams containing stop words, and count remaining
  words.split(/\.\s+(?=[A-Z]{1})/).each do |sentence|
    ngrams = all_ngrams(invidual_words_in_string(sentence))
    non_stop_ngrams = ngrams.select {|ngram| !ngram.any? {|ngw| is_stop_word?(ngw)}}
    non_stop_ngrams.each {|ngram| count_word(ngram.join(' '))}
  end
end