Class: Yasc::SpellingCorrector
- Inherits:
-
Object
- Object
- Yasc::SpellingCorrector
- Defined in:
- lib/yasc/spelling_corrector.rb
Constant Summary collapse
- NWORDS =
NWORDS = train(words(open(‘norvig.com/big.txt’) {|f| f.read }))
train(words(File.new('big.txt').read))
- LETTERS =
("a".."z").to_a.join
Class Method Summary collapse
- .correct(word) ⇒ Object
- .edits1(word) ⇒ Object
- .known(words) ⇒ Object
- .known_edits2(word) ⇒ Object
- .train(features) ⇒ Object
- .words(text) ⇒ Object
Class Method Details
.correct(word) ⇒ Object
38 39 40 41 |
# File 'lib/yasc/spelling_corrector.rb', line 38 def correct word (known([word]) or known(edits1(word)) or known_edits2(word) or [word]).max {|a,b| NWORDS[a] <=> NWORDS[b] } end |
.edits1(word) ⇒ Object
15 16 17 18 19 20 21 22 23 24 25 |
# File 'lib/yasc/spelling_corrector.rb', line 15 def edits1 word n = word.length deletion = (0...n).collect {|i| word[0...i]+word[i+1..-1] } transposition = (0...n-1).collect {|i| word[0...i]+word[i+1,1]+word[i,1]+word[i+2..-1] } alteration = [] n.times {|i| LETTERS.each_byte {|l| alteration << word[0...i]+l.chr+word[i+1..-1] } } insertion = [] (n+1).times {|i| LETTERS.each_byte {|l| insertion << word[0...i]+l.chr+word[i..-1] } } result = deletion + transposition + alteration + insertion result.empty? ? nil : result end |
.known(words) ⇒ Object
33 34 35 36 |
# File 'lib/yasc/spelling_corrector.rb', line 33 def known words result = words.find_all {|w| NWORDS.has_key?(w) } result.empty? ? nil : result end |
.known_edits2(word) ⇒ Object
27 28 29 30 31 |
# File 'lib/yasc/spelling_corrector.rb', line 27 def known_edits2 word result = [] edits1(word).each {|e1| edits1(e1).each {|e2| result << e2 if NWORDS.has_key?(e2) }} result.empty? ? nil : result end |
.train(features) ⇒ Object
9 10 11 12 13 |
# File 'lib/yasc/spelling_corrector.rb', line 9 def train features model = Hash.new(1) features.each {|f| model[f] += 1 } return model end |
.words(text) ⇒ Object
5 6 7 |
# File 'lib/yasc/spelling_corrector.rb', line 5 def words text text.downcase.scan(/[a-z]+/) end |