Class: RaskeNLP::Result

Inherits:
Object
  • Object
show all
Defined in:
lib/raske-nlp/result.rb

Constant Summary collapse

SENTENCE_REGEX =
/[\\[\\]\n.!?,;:\t\\-\\"\\(\\)\\\'\u2019\u2013]/
WORD_REGEX =
/[^a-zA-Z0-9_\+\-\/]/
NUMBER_REGEX =
/^-*[0-9,\.]+$/

Instance Method Summary collapse

Constructor Details

#initialize(text, options = {}) ⇒ Result

Returns a new instance of Result.



7
8
9
10
# File 'lib/raske-nlp/result.rb', line 7

def initialize(text, options={})
  @text, @options = text, options
  @stoplist_regex = options[:stop_list].to_regex
end

Instance Method Details

#keywordsObject



49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/raske-nlp/result.rb', line 49

def keywords
  keywords = Hash.new(0)

  phrases.each do |phrase|
    if @options[:min_frequency] > 1
      next if phrases.count(phrase) < @options[:min_frequency]
    end

    words = split_words(phrase)
    keywords[phrase] = words.map { |word| word_scores[word] }.sum
  end

  keywords.select { |word, score|
    score >= @options[:min_score]
  }.sort_by(&:last).reverse.to_h
end

#phrasesObject



16
17
18
19
20
21
22
# File 'lib/raske-nlp/result.rb', line 16

def phrases
  @phrases ||= begin
    sentences.map { |sentence|
      sentence.downcase.split(@stoplist_regex).map(&:strip).select { |phrase| acceptable?(phrase) }
    }.flatten
  end
end

#sentencesObject



12
13
14
# File 'lib/raske-nlp/result.rb', line 12

def sentences
  @text.split(SENTENCE_REGEX).map(&:strip)
end

#word_scoresObject



24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# File 'lib/raske-nlp/result.rb', line 24

def word_scores
  @word_scores ||= begin
    frequencies = Hash.new(0)
    degrees = Hash.new(0)

    phrases.each do |phrase|
      words = split_words(phrase)
      words.each do |word|
        frequencies[word] += 1
        degrees[word] += words.length - 1
      end
    end

    frequencies.each do |word, frequency|
      degrees[word] += frequency
    end

    scores = {}
    frequencies.each do |word, frequency|
      scores[word] = degrees[word] / frequency.to_f
    end
    scores
  end
end