Class: Semantic::Parser
- Inherits:
-
Object
- Object
- Semantic::Parser
- Defined in:
- lib/semantic/parser.rb
Instance Method Summary collapse
-
#clean(string) ⇒ Object
remove any nasty grammar tokens from string.
-
#initialize ⇒ Parser
constructor
A new instance of Parser.
-
#remove_stop_words(list) ⇒ Object
stop words are common words which have no search value.
- #tokenise_and_filter(string) ⇒ Object
- #tokenise_and_stem(string) ⇒ Object
Constructor Details
#initialize ⇒ Parser
Returns a new instance of Parser.
6 7 8 9 10 11 12 |
# File 'lib/semantic/parser.rb', line 6 def initialize #English stopwords from ftp://ftp.cs.cornell.edu/pub/smart/english.stop #TODO: nicer way to reference stop file location? File.open(File.dirname(__FILE__)+'/../../resources/english.stop', 'r') do |file| @stopwords = file.read().split() end end |
Instance Method Details
#clean(string) ⇒ Object
remove any nasty grammar tokens from string
20 21 22 23 24 25 |
# File 'lib/semantic/parser.rb', line 20 def clean(string) string = string.gsub(".","") string = string.gsub(/\s+/," ") string = string.downcase return string end |
#remove_stop_words(list) ⇒ Object
stop words are common words which have no search value
28 29 30 |
# File 'lib/semantic/parser.rb', line 28 def remove_stop_words(list) list.select {|word| word unless @stopwords.include? word } end |
#tokenise_and_filter(string) ⇒ Object
14 15 16 17 |
# File 'lib/semantic/parser.rb', line 14 def tokenise_and_filter(string) word_list = tokenise_and_stem(string) remove_stop_words(word_list) end |
#tokenise_and_stem(string) ⇒ Object
32 33 34 35 36 37 |
# File 'lib/semantic/parser.rb', line 32 def tokenise_and_stem(string) string = clean(string) words = string.split(" ") words.map {|word| word.stem } end |