Class: Phonetic::DMSoundex
- Defined in:
- lib/phonetic/dm_soundex.rb,
lib/phonetic/dm_soundex/map.rb,
lib/phonetic/dm_soundex/code.rb
Overview
Daitch–Mokotoff Soundex (D–M Soundex) is a phonetic algorithm invented in 1985 by Jewish genealogists Gary Mokotoff and Randy Daitch.
Defined Under Namespace
Classes: Code
Constant Summary collapse
- MAP =
{ 'A' => { 'self' => ['0', '', ''], # A 'I' => ['0', '1', ''], # AI 'J' => ['0', '1', ''], # AJ 'Y' => ['0', '1', ''], # AY 'U' => ['0', '7', ''] # AU }, 'Ą' => ['', '', ['6', '']], 'E' => { 'self' => ['0', '', ''], # E 'I' => ['0', '1', ''], # EI 'Y' => ['0', '1', ''], # EY 'J' => ['0', '1', ''], # EJ 'U' => ['1', '1', ''] # EU }, 'O' => { 'self' => ['0', '', ''], # O 'I' => ['0', '1', ''], # OI 'J' => ['0', '1', ''], # OJ 'Y' => ['0', '1', ''] # OY }, 'U' => { 'self' => ['0', '', ''], # U 'I' => ['0', '1', ''], # UI 'J' => ['0', '1', ''], # UJ 'Y' => ['0', '1', ''], # UY 'E' => ['0', '', ''] # UE }, 'I' => { 'self' => ['0', '', ''], # I 'A' => ['1', '', ''], # IA 'E' => ['1', '', ''], # IE 'O' => ['1', '', ''], # IO 'U' => ['1', '', ''] # IU }, 'Y' => ['1', '', ''], # Y 'J' => [['1', '4'], ['', '4'], ['', '4']], # J 'B' => ['7', '7', '7'], # B 'C' => { 'self' => [['5', '4'], ['5', '4'], ['5', '4']], # C 'H' => { 'self' => [['5', '4'], ['5', '4'], ['5', '4']], # CH 'S' => ['5', '54', '54'] # CHS }, 'K' => [['5', '45'], ['5', '45'], ['5', '45']], # CK 'S' => { 'self' => ['4', '4', '4'], # CS 'Z' => ['4', '4', '4'] # CSZ }, 'Z' => { 'self' => ['4', '4', '4'], # CZ 'S' => ['4', '4', '4'] # CZS } }, 'D' => { 'self' => ['3', '3', '3'], # D 'R' => { # DR 'S' => ['4', '4', '4'], # DRS 'Z' => ['4', '4', '4'] # DRZ }, 'S' => { 'self' => ['4', '4', '4'], # DS 'H' => ['4', '4', '4'] # DSH }, 'T' => ['3', '3', '3'], # DT 'Z' => { 'self' =>['4', '4', '4'], # DZ 'H' => ['4', '4', '4'], # DZH 'S' => ['4', '4', '4'] # DZS } }, 'F' => { 'self' => ['7', '7', '7'], # F 'B' => ['7', '7', '7'] # FB }, 'G' => ['5', '5', '5'], # G 'H' => ['5', '5', ''], # H 'K' => { 'self' => ['5', '5', '5'], # K 'H' => ['5', '5', '5'], # KH 'S' => ['5', '54', '54'] # KS }, 'L' => ['8', '8', '8'], # L 'M' => { 'self' => ['6', '6', '6'], # M 'N' => ['', '66', '66'] # MN }, 'N' => { 'self' => ['6', '6', '6'], # N 'M' => ['', '66', '66'] # NM }, 'P' => { 'self' => ['7', '7', '7'], # P 'F' => ['7', '7', '7'], # PF 'H' => ['7', '7', '7'] # PH }, 'R' => { 'self' => ['9', '9', '9'], # R 'S' => [['94', '4'], ['94', '4'], ['94', '4']], # RS 'Z' => [['4', '94'], ['4', '94'], ['4', '94']] # RZ }, 'Q' => ['5', '5', '5'], # Q 'S' => { 'self' => ['4', '4', '4'], # S 'C' => { 'self' => ['2', '4', '4'], # SC 'H' => { 'self' => ['4', '4', '4'], # SCH 'T' => { 'self' => ['2', '43', '43'], # SCHT 'S' => { # SCHTS 'C' => { # SCHTSC 'H' => ['2', '4', '4'] # SCHTSCH }, 'H' => ['2', '4', '4'] # SCHTSH }, 'C' => { # SCHTC 'H' => ['2', '4', '4'] # SCHTCH } } } }, 'D' => ['2', '43', '43'], # SD 'H' => { 'self' => ['4', '4', '4'], # SH 'C' => { # SHC 'H' => ['2', '4', '4'] # SHCH }, 'D' => ['2', '43', '43'], # SHD 'T' => { 'self' => ['2', '43', '43'], # SHT 'C' => { # SHTC 'H' => ['2', '4', '4'] # SHTCH }, 'S' => { # SHTS 'H' => ['2', '4', '4'] # SHTSH } } }, 'T' => { 'self' => ['2', '43', '43'], # ST 'C' => { # STC 'H' => ['2', '4', '4'] # STCH }, 'S' => { # STS 'C' => { # STSC 'H' => ['2', '4', '4'] # STSCH }, 'D' => ['2', '43', '43'], # SCHD 'H' => ['2', '4', '4'] # STSH }, 'R' => { # STR 'S' => ['2', '4', '4'], # STRS 'Z' => ['2', '4', '4'] # STRZ } }, 'Z' => { 'self' => ['4', '4', '4'], # SZ 'C' => { # SZC 'S' => ['2', '4', '4'], # SZCS 'Z' => ['2', '4', '4'] # SZCZ }, 'D' => ['2', '43', '43'], # SZD 'T' => ['2', '43', '43'] # SZT } }, 'T' => { 'self' => ['3', '3', '3'], # T 'C' => { 'self' => ['4', '4', '4'], # TC 'H' => ['4', '4', '4'] # TCH }, 'H' => ['3', '3', '3'], # TH 'R' => { # TR 'C' => { # TRC 'H' => ['4', '4', '4'] # TRCH }, 'S' => ['4', '4', '4'], # TRS 'Z' => ['4', '4', '4'] # TRZ }, 'S' => { 'self' => ['4', '4', '4'], # TS 'H' => ['4', '4', '4'], # TSH 'C' => { # TSC 'H' => ['4', '4', '4'] # TSCH }, 'Z' => ['4', '4', '4'] # TSZ }, 'T' => { # TT 'C' => { # TTC 'H' => ['4', '4', '4'] # TTCH }, 'S' => { 'self' => ['4', '4', '4'], # TTS 'C' => { # TTSC 'H' => ['4', '4', '4'] # TTSCH }, 'Z' => ['4', '4', '4'] # TTSZ }, 'Z' => ['4', '4', '4'] # TTZ }, 'Z' => { 'self' => ['4', '4', '4'], # TZ 'S' => ['4', '4', '4'] # TZS } }, 'X' => ['5', '54', '54'], # X 'V' => ['7', '7', '7'], # V 'W' => ['7', '7', '7'], # W 'Z' => { 'self' => ['4', '4', '4'], # Z 'H' => { 'self' => ['4', '4', '4'], # ZH 'S' => { # ZHS 'H' => ['4', '4', '4'] # ZHSH } }, 'S' => { 'self' => ['4', '4', '4'], # ZS 'C' => { # ZSC 'H' => ['4', '4', '4'] # ZSCH } } } }
Class Method Summary collapse
- .encode(str, options = {}) ⇒ Object
-
.encode_word(word, options = {}) ⇒ Object
Encode word to its D-M Soundex codes.
Class Method Details
.encode(str, options = {}) ⇒ Object
15 16 17 |
# File 'lib/phonetic/dm_soundex.rb', line 15 def self.encode(str, = {}) encode_word(str, ) end |
.encode_word(word, options = {}) ⇒ Object
Encode word to its D-M Soundex codes.
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
# File 'lib/phonetic/dm_soundex.rb', line 20 def self.encode_word(word, = {}) w = word.strip.upcase.gsub(/[^A-Z]+/, '') i = 0 code = Code.new while i < w.size if w[i] != w[i + 1] c = find_code(MAP, w, i) if c len = c[3] + 1 if i == 0 code.add c[0] elsif w[i + len] =~ /[AEIOUJY]/ code.add c[1] else code.add c[2] end i += c[3] end end i += 1 end code.results end |