Class: Semantic::Parser

Inherits:
Object
  • Object
show all
Defined in:
lib/semantic/parser.rb

Instance Method Summary collapse

Constructor Details

#initializeParser

Returns a new instance of Parser.



6
7
8
9
10
11
12
# File 'lib/semantic/parser.rb', line 6

def initialize
  #English stopwords from ftp://ftp.cs.cornell.edu/pub/smart/english.stop
  #TODO: nicer way to reference stop file location?
  File.open(File.dirname(__FILE__)+'/../../resources/english.stop', 'r') do |file|
    @stopwords = file.read().split()
  end
end

Instance Method Details

#clean(string) ⇒ Object

remove any nasty grammar tokens from string



20
21
22
23
24
25
# File 'lib/semantic/parser.rb', line 20

def clean(string)
  string = string.gsub(".","")
  string = string.gsub(/\s+/," ")
  string = string.downcase
  return string
end

#remove_stop_words(list) ⇒ Object

stop words are common words which have no search value



28
29
30
# File 'lib/semantic/parser.rb', line 28

def remove_stop_words(list)
  list.select {|word| word unless @stopwords.include? word }
end

#tokenise_and_filter(string) ⇒ Object



14
15
16
17
# File 'lib/semantic/parser.rb', line 14

def tokenise_and_filter(string)
  word_list = tokenise_and_stem(string)
  remove_stop_words(word_list)
end

#tokenise_and_stem(string) ⇒ Object



32
33
34
35
36
37
# File 'lib/semantic/parser.rb', line 32

def tokenise_and_stem(string)
  string = clean(string)
  words = string.split(" ")

  words.map {|word| word.stem }
end