Class: Tagelizer
- Inherits:
-
Object
- Object
- Tagelizer
- Defined in:
- lib/tagelizer.rb
Instance Attribute Summary collapse
-
#dictionary ⇒ Object
Returns the value of attribute dictionary.
-
#locale ⇒ Object
readonly
Returns the value of attribute locale.
-
#options ⇒ Object
readonly
Returns the value of attribute options.
Instance Method Summary collapse
- #actual_options ⇒ Object
- #build_speller ⇒ Object
- #build_stemmer ⇒ Object
- #corrected_word(word) ⇒ Object
- #dictionaries ⇒ Object
-
#initialize(locale = 'en') ⇒ Tagelizer
constructor
A new instance of Tagelizer.
- #parse(text) ⇒ Object
- #remove_duplicates(list) ⇒ Object
- #speller ⇒ Object
- #stemmer ⇒ Object
Constructor Details
#initialize(locale = 'en') ⇒ Tagelizer
Returns a new instance of Tagelizer.
10 11 12 13 14 |
# File 'lib/tagelizer.rb', line 10 def initialize(locale = 'en') @dictionary= (dictionaries.include?(locale) ? locale : "en") @minwordsize = 2 @options = {'ignore-case' => true} end |
Instance Attribute Details
#dictionary ⇒ Object
Returns the value of attribute dictionary.
42 43 44 |
# File 'lib/tagelizer.rb', line 42 def dictionary @dictionary end |
#locale ⇒ Object (readonly)
Returns the value of attribute locale.
9 10 11 |
# File 'lib/tagelizer.rb', line 9 def locale @locale end |
#options ⇒ Object (readonly)
Returns the value of attribute options.
9 10 11 |
# File 'lib/tagelizer.rb', line 9 def @options end |
Instance Method Details
#actual_options ⇒ Object
59 60 61 62 63 64 |
# File 'lib/tagelizer.rb', line 59 def .keys.inject({}) do |hash, key| hash[key] = [key].to_s hash end end |
#build_speller ⇒ Object
24 25 26 27 28 29 30 31 32 |
# File 'lib/tagelizer.rb', line 24 def build_speller speller = Aspell.new(dictionary) speller.suggestion_mode = 'normal' .each do |key, value| speller.set_option key, value end speller end |
#build_stemmer ⇒ Object
38 39 40 |
# File 'lib/tagelizer.rb', line 38 def build_stemmer Lingua::Stemmer.new(:language => dictionary) end |
#corrected_word(word) ⇒ Object
55 56 57 |
# File 'lib/tagelizer.rb', line 55 def corrected_word(word) speller.check(word) ? word : speller.suggest(word).first end |
#dictionaries ⇒ Object
51 52 53 |
# File 'lib/tagelizer.rb', line 51 def dictionaries @dictionaries ||= Aspell.list_dicts.collect { |dict| dict.code } end |
#parse(text) ⇒ Object
16 17 18 |
# File 'lib/tagelizer.rb', line 16 def parse( text ) remove_duplicates(text.split(" ").collect {|i| /(\w*)/.match(i)[1]}.select {|i| i.size > @minwordsize}.collect {|w| corrected_word(w)}) end |
#remove_duplicates(list) ⇒ Object
66 67 68 69 70 71 72 73 |
# File 'lib/tagelizer.rb', line 66 def remove_duplicates list if list.empty? [] else tmp = list.pop remove_duplicates(list.select { |word| stemmer.stem(word) != stemmer.stem(tmp) }) + [tmp] end end |
#speller ⇒ Object
20 21 22 |
# File 'lib/tagelizer.rb', line 20 def speller @speller ||= build_speller end |
#stemmer ⇒ Object
34 35 36 |
# File 'lib/tagelizer.rb', line 34 def stemmer @stemmer ||= build_stemmer end |