Class: Phonetic::DMSoundex

Inherits:
Algorithm show all
Defined in:
lib/phonetic/dm_soundex.rb,
lib/phonetic/dm_soundex/map.rb,
lib/phonetic/dm_soundex/code.rb

Overview

Daitch–Mokotoff Soundex (D–M Soundex) is a phonetic algorithm invented in 1985 by Jewish genealogists Gary Mokotoff and Randy Daitch.

Examples:

Phonetic::DMSoundex.encode('Anja') # => ['060000', '064000']
Phonetic::DMSoundex.encode('Schwarz') # => ['474000', '479400']
Phonetic::DMSoundex.encode('Schtolteheim') # => ['283560']

Defined Under Namespace

Classes: Code

Constant Summary collapse

MAP =
{
  'A' => {
    'self' => ['0', '', ''], # A
    'I' => ['0', '1', ''],   # AI
    'J' => ['0', '1', ''],   # AJ
    'Y' => ['0', '1', ''],   # AY
    'U' => ['0', '7', '']    # AU
  },
  'Ą' => ['', '', ['6', '']],
  'E' => {
    'self' => ['0', '', ''], # E
    'I' => ['0', '1', ''],   # EI
    'Y' => ['0', '1', ''],   # EY
    'J' => ['0', '1', ''],   # EJ
    'U' => ['1', '1', '']    # EU
  },
  'O' => {
    'self' => ['0', '', ''], # O
    'I' => ['0', '1', ''],   # OI
    'J' => ['0', '1', ''],   # OJ
    'Y' => ['0', '1', '']    # OY
  },
  'U' => {
    'self' => ['0', '', ''], # U
    'I' => ['0', '1', ''],   # UI
    'J' => ['0', '1', ''],   # UJ
    'Y' => ['0', '1', ''],   # UY
    'E' => ['0', '', '']     # UE
  },
  'I' => {
    'self' => ['0', '', ''], # I
    'A' => ['1', '', ''],    # IA
    'E' => ['1', '', ''],    # IE
    'O' => ['1', '', ''],    # IO
    'U' => ['1', '', '']     # IU
  },
  'Y' => ['1', '', ''],                               # Y
  'J' => [['1', '4'], ['', '4'], ['', '4']],          # J
  'B' => ['7', '7', '7'],                             # B
  'C' => {
    'self' => [['5', '4'], ['5', '4'], ['5', '4']],   # C
    'H' => {
      'self' => [['5', '4'], ['5', '4'], ['5', '4']], # CH
      'S' => ['5', '54', '54']                        # CHS
    },
    'K' => [['5', '45'], ['5', '45'], ['5', '45']],   # CK
    'S' => {
      'self' => ['4', '4', '4'], # CS
      'Z' => ['4', '4', '4']     # CSZ
    },
    'Z' => {
      'self' => ['4', '4', '4'], # CZ
      'S' => ['4', '4', '4']     # CZS
    }
  },
  'D' => {
    'self' => ['3', '3', '3'],   # D
    'R' => {                     # DR
      'S' => ['4', '4', '4'],    # DRS
      'Z' => ['4', '4', '4']     # DRZ
    },
    'S' => {
      'self' => ['4', '4', '4'], # DS
      'H' => ['4', '4', '4']     # DSH
    },
    'T' => ['3', '3', '3'],      # DT
    'Z' => {
      'self' =>['4', '4', '4'],  # DZ
      'H' => ['4', '4', '4'],    # DZH
      'S' => ['4', '4', '4']     # DZS
    }
  },
  'F' => {
    'self' => ['7', '7', '7'],  # F
    'B' => ['7', '7', '7']      # FB
  },
  'G' => ['5', '5', '5'],       # G
  'H' => ['5', '5', ''],        # H
  'K' => {
    'self' => ['5', '5', '5'],  # K
    'H' => ['5', '5', '5'],     # KH
    'S' => ['5', '54', '54']    # KS
  },
  'L' => ['8', '8', '8'],       # L
  'M' => {
    'self' => ['6', '6', '6'],  # M
    'N' => ['', '66', '66']     # MN
  },
  'N' => {
    'self' => ['6', '6', '6'],  # N
    'M' => ['', '66', '66']     # NM
  },
  'P' => {
    'self' => ['7', '7', '7'],  # P
    'F' => ['7', '7', '7'],     # PF
    'H' => ['7', '7', '7']      # PH
  },
  'R' => {
    'self' => ['9', '9', '9'],  # R
    'S' => [['94', '4'], ['94', '4'], ['94', '4']], # RS
    'Z' => [['4', '94'], ['4', '94'], ['4', '94']]  # RZ
  },
  'Q' => ['5', '5', '5'],               # Q
  'S' => {
    'self' => ['4', '4', '4'],          # S
    'C' => {
      'self' => ['2', '4', '4'],        # SC
      'H' => {
        'self' => ['4', '4', '4'],      # SCH
        'T' => {
          'self' => ['2', '43', '43'],  # SCHT
          'S' => {                      # SCHTS
            'C' => {                    # SCHTSC
              'H' => ['2', '4', '4']    # SCHTSCH
            },
            'H' => ['2', '4', '4']      # SCHTSH
          },
          'C' => {                      # SCHTC
            'H' => ['2', '4', '4']      # SCHTCH
          }
        }
      }
    },
    'D' => ['2', '43', '43'],           # SD
    'H' => {
      'self' => ['4', '4', '4'],        # SH
      'C' => {                          # SHC
        'H' => ['2', '4', '4']          # SHCH
      },
      'D' => ['2', '43', '43'],         # SHD
      'T' => {
        'self' => ['2', '43', '43'],    # SHT
        'C' => {                        # SHTC
          'H' => ['2', '4', '4']        # SHTCH
        },
        'S' => {                        # SHTS
          'H' => ['2', '4', '4']        # SHTSH
        }
      }
    },
    'T' => {
      'self' => ['2', '43', '43'],      # ST
      'C' => {                          # STC
        'H' => ['2', '4', '4']          # STCH
      },
      'S' => {                          # STS
        'C' => {                        # STSC
          'H' => ['2', '4', '4']        # STSCH
        },
        'D' => ['2', '43', '43'],       # SCHD
        'H' => ['2', '4', '4']          # STSH
      },
      'R' => {                          # STR
        'S' => ['2', '4', '4'],         # STRS
        'Z' => ['2', '4', '4']          # STRZ
      }
    },
    'Z' => {
      'self' => ['4', '4', '4'],        # SZ
      'C' => {                          # SZC
        'S' => ['2', '4', '4'],         # SZCS
        'Z' => ['2', '4', '4']          # SZCZ
      },
      'D' => ['2', '43', '43'],         # SZD
      'T' => ['2', '43', '43']          # SZT
    }
  },
  'T' => {
    'self' => ['3', '3', '3'],          # T
    'C' => {
      'self' => ['4', '4', '4'],        # TC
      'H' => ['4', '4', '4']            # TCH
    },
    'H' => ['3', '3', '3'],             # TH
    'R' => {                            # TR
      'C' => {                          # TRC
        'H' => ['4', '4', '4']          # TRCH
      },
      'S' => ['4', '4', '4'],           # TRS
      'Z' => ['4', '4', '4']            # TRZ
    },
    'S' => {
      'self' => ['4', '4', '4'],        # TS
      'H' => ['4', '4', '4'],           # TSH
      'C' => {                          # TSC
        'H' => ['4', '4', '4']          # TSCH
      },
      'Z' => ['4', '4', '4']            # TSZ
    },
    'T' => {                            # TT
      'C' => {                          # TTC
        'H' => ['4', '4', '4']          # TTCH
      },
      'S' => {
        'self' => ['4', '4', '4'],      # TTS
        'C' => {                        # TTSC
          'H' => ['4', '4', '4']        # TTSCH
        },
        'Z' => ['4', '4', '4']          # TTSZ
      },
      'Z' => ['4', '4', '4']            # TTZ
    },
    'Z' => {
      'self' => ['4', '4', '4'],        # TZ
      'S' => ['4', '4', '4']            # TZS
    }
  },
  'X' => ['5', '54', '54'],             # X
  'V' => ['7', '7', '7'],               # V
  'W' => ['7', '7', '7'],               # W
  'Z' => {
    'self' => ['4', '4', '4'],          # Z
    'H' => {
      'self' => ['4', '4', '4'],        # ZH
      'S' => {                          # ZHS
        'H' => ['4', '4', '4']          # ZHSH
      }
    },
    'S' => {
      'self' => ['4', '4', '4'],        # ZS
      'C' => {                          # ZSC
        'H' => ['4', '4', '4']          # ZSCH
      }
    }
  }
}

Class Method Summary collapse

Class Method Details

.encode(str, options = {}) ⇒ Object



15
16
17
# File 'lib/phonetic/dm_soundex.rb', line 15

def self.encode(str, options = {})
  encode_word(str, options)
end

.encode_word(word, options = {}) ⇒ Object

Encode word to its D-M Soundex codes.



20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# File 'lib/phonetic/dm_soundex.rb', line 20

def self.encode_word(word, options = {})
  w = word.strip.upcase.gsub(/[^A-Z]+/, '')
  i = 0
  code = Code.new
  while i < w.size
    if w[i] != w[i + 1]
      c = find_code(MAP, w, i)
      if c
        len = c[3] + 1
        if i == 0
          code.add c[0]
        elsif w[i + len] =~ /[AEIOUJY]/
          code.add c[1]
        else
          code.add c[2]
        end
        i += c[3]
      end
    end
    i += 1
  end
  code.results
end