10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
|
# File 'lib/taxamatch_rb/phonetizer.rb', line 10
def self.near_match(a_word, normalize_ending = false)
a_word = a_word.strip rescue ''
return '' if a_word == ''
a_word = Taxamatch::Normalizer.normalize a_word
case a_word
when /^AE/
a_word = 'E' + a_word[2..-1]
when /^CN/
a_word = 'N' + a_word[2..-1]
when /^CT/
a_word = 'T' + a_word[2..-1]
when /^CZ/
a_word = 'C' + a_word[2..-1]
when /^DJ/
a_word = 'J' + a_word[2..-1]
when /^EA/
a_word = 'E' + a_word[2..-1]
when /^EU/
a_word = 'U' + a_word[2..-1]
when /^GN/
a_word = 'N' + a_word[2..-1]
when /^KN/
a_word = 'N' + a_word[2..-1]
when /^MC/
a_word = 'MAC' + a_word[2..-1]
when /^MN/
a_word = 'N' + a_word[2..-1]
when /^OE/
a_word = 'E' + a_word[2..-1]
when /^QU/
a_word = 'Q' + a_word[2..-1]
when /^PS/
a_word = 'S' + a_word[2..-1]
when /^PT/
a_word = 'T' + a_word[2..-1]
when /^TS/
a_word = 'S' + a_word[2..-1]
when /^WR/
a_word = 'R' + a_word[2..-1]
when /^X/
a_word = 'Z' + a_word[1..-1]
end
first_char = a_word.split('')[0]
rest_chars = a_word.split('')[1..-1].join('')
rest_chars.gsub!('AE', 'I')
rest_chars.gsub!('IA', 'A')
rest_chars.gsub!('OE', 'I')
rest_chars.gsub!('OI', 'A')
rest_chars.gsub!('SC', 'S')
rest_chars.gsub!('H', '')
rest_chars.tr!('EOUYKZ', 'IAIICS')
a_word = (first_char + rest_chars).squeeze
if normalize_ending && a_word.size > 4
a_word = self.normalize_ending(a_word)
end
a_word
end
|