6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
|
# File 'lib/gulp/phrase_extractor.rb', line 6
def (text)
strings = chunk_text(preprocess_text(text))
phrases = []
word_count = 0
strings.each do |string|
words = string.split(/\s+/)
word_count += words.size
next if words.size == 0
ALLOWED_PHRASE_LENGTHS.each do |length|
final_start_position = words.size - length
(0..final_start_position).each do |start_position|
sub_phrase_words = words.slice(start_position, length)
next if STOPWORDS.include?(sub_phrase_words.first.downcase) || STOPWORDS.include?(sub_phrase_words.last.downcase)
phrases << sub_phrase_words.join(' ')
end
end
end
return [word_count, phrases]
end
|