Class: Yasc::SpellingCorrector

Inherits:
Object
  • Object
show all
Defined in:
lib/yasc/spelling_corrector.rb

Constant Summary collapse

NWORDS =

NWORDS = train(words(open(‘norvig.com/big.txt’) {|f| f.read }))

train(words(File.new('big.txt').read))
LETTERS =
("a".."z").to_a.join

Class Method Summary collapse

Class Method Details

.correct(word) ⇒ Object



38
39
40
41
# File 'lib/yasc/spelling_corrector.rb', line 38

def correct word
  (known([word]) or known(edits1(word)) or known_edits2(word) or
    [word]).max {|a,b| NWORDS[a] <=> NWORDS[b] }
end

.edits1(word) ⇒ Object



15
16
17
18
19
20
21
22
23
24
25
# File 'lib/yasc/spelling_corrector.rb', line 15

def edits1 word
  n = word.length
  deletion = (0...n).collect {|i| word[0...i]+word[i+1..-1] }
  transposition = (0...n-1).collect {|i| word[0...i]+word[i+1,1]+word[i,1]+word[i+2..-1] }
  alteration = []
  n.times {|i| LETTERS.each_byte {|l| alteration << word[0...i]+l.chr+word[i+1..-1] } }
  insertion = []
  (n+1).times {|i| LETTERS.each_byte {|l| insertion << word[0...i]+l.chr+word[i..-1] } }
  result = deletion + transposition + alteration + insertion
  result.empty? ? nil : result
end

.known(words) ⇒ Object



33
34
35
36
# File 'lib/yasc/spelling_corrector.rb', line 33

def known words
  result = words.find_all {|w| NWORDS.has_key?(w) }
  result.empty? ? nil : result
end

.known_edits2(word) ⇒ Object



27
28
29
30
31
# File 'lib/yasc/spelling_corrector.rb', line 27

def known_edits2 word
  result = []
  edits1(word).each {|e1| edits1(e1).each {|e2| result << e2 if NWORDS.has_key?(e2) }}
  result.empty? ? nil : result
end

.train(features) ⇒ Object



9
10
11
12
13
# File 'lib/yasc/spelling_corrector.rb', line 9

def train features
  model = Hash.new(1)
  features.each {|f| model[f] += 1 }
  return model
end

.words(text) ⇒ Object



5
6
7
# File 'lib/yasc/spelling_corrector.rb', line 5

def words text
  text.downcase.scan(/[a-z]+/)
end