Class: NounPhraseDetector

Inherits:
Object
  • Object
show all
Defined in:
lib/noun_phrase_detector.rb

Constant Summary collapse

ADJECTIVE =

adjective, advective comparative, advective superlative

%w{JJ JJR JJS}
NOUN =

noun, noun plural, proper noun, proper noun plural

%w{NN NNS NNP NNPS}
PREPOSITION =
%w{TO}

Instance Method Summary collapse

Constructor Details

#initialize(text, lexicon_path) ⇒ NounPhraseDetector

Returns a new instance of NounPhraseDetector.



9
10
11
12
13
# File 'lib/noun_phrase_detector.rb', line 9

def initialize(text, lexicon_path)
  @words = []
  @pos = []
  analyze_part_of_speech(text, lexicon_path)
end

Instance Method Details

#count_noun_phrasesObject



49
50
51
52
53
54
55
56
57
58
# File 'lib/noun_phrase_detector.rb', line 49

def count_noun_phrases
  noun_phrases = extract_all_noun_phrases
  phrase_counts = Hash.new {|hash,k| hash[k] = 0}
  
  noun_phrases.each do |phrase|
    phrase_counts[phrase] += 1
  end
  
  phrase_counts
end

#extract_all_noun_phrasesObject

Basic noun phrases pattern (Adjective | Noun)* (Noun Preposition)? (Adjective | Noun)* Noun



17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# File 'lib/noun_phrase_detector.rb', line 17

def extract_all_noun_phrases
  phrases = []
  
  # We are going to start with noun on the furthest right
  # and work our way to the left
  @pos.each_with_index do |word, index|
    next unless NOUN.include?(word)
    
    phrase = [index]
    i = index
    
    until i == 0
      i -= 1
      
      if i != 0 && NOUN.include?(@pos[i]) && PREPOSITION.include?(@pos[i-1])
        phrase.unshift(i-1)
        phrase.unshift(i)
      elsif NOUN.include?(@pos[i]) || ADJECTIVE.include?(@pos[i])
        phrase.unshift(i)
      else
        break
      end
    end
    
    phrases << phrase
  end
  
  phrases.map do |x|
    x.map {|i| @words[i]}.join(' ')
  end
end