Class: Phonetic::DoubleMetaphone

Inherits:
Algorithm show all
Defined in:
lib/phonetic/double_metaphone.rb,
lib/phonetic/double_metaphone/code.rb

Overview

The Double Metaphone phonetic encoding algorithm is the second generation of the Metaphone algorithm. Its original implementation was described by Lawrence Philips in the June 2000 issue of C/C++ Users Journal.

This implementation based on the PHP implementation by Stephen Woodbridge and contains modifications of algorithm by Kevin Atkinson.

Examples:

Phonetic::DoubleMetaphone.encode('czerny') # => ['SRN', 'XRN']
Phonetic::DoubleMetaphone.encode('dumb')   # => ['TM', 'TM']
Phonetic::DoubleMetaphone.encode('edgar')  # => ['ATKR', 'ATKR']
# or use alias:
Phonetic::Metaphone2.encode('czerny') # => ['SRN', 'XRN']
Phonetic::Metaphone2.encode('dumb')   # => ['TM', 'TM']
Phonetic::Metaphone2.encode('edgar')  # => ['ATKR', 'ATKR']

See Also:

Defined Under Namespace

Classes: Code

Constant Summary collapse

START_OF_WORD_MAP =
{
  # skip these when at start of word
  /^([GKP]N|WR|PS)/ => ['', '', 1],
  # initial 'X' is pronounced 'Z' e.g. 'Xavier'
  /^X/ => ['S', 'S', 1],
  # all init vowels now map to 'A'
  /^[AEIOUY]/ => ['A', 'A', 1],
  # special case 'caesar'
  /^CAESAR/ => ['S', 'S', 1],
  # special case 'sugar-'
  /^SUGAR/ => ['X', 'S', 1],
  # -ges-, -gep-, -gel-, -gie- at beginning
  /^G(Y|E[SPBLYIR]|I[BLNE])/ => ['K', 'J', 2],
  # keep H if first & before vowel
  /^H[AEIOUY]/ => ['H', 'H', 2],
  # german & anglicisations, e.g. 'smith' match 'schmidt', 'snider' match 'schneider'
  /^S[MNLW]/ => ['S', 'X', 1],
  # ghislane, ghiradelli
  /^GHI/ => ['J', 'J', 2],
  /^GH/ => ['K', 'K', 2],
  # greek roots e.g. 'chemistry', 'chorus'
  /^CH(ARAC|ARIS|OR[^E]|YM|EM)/ => ['K', 'K', 2],
  # Wasserman should match Vasserman
  /^W[AEIOUY]/ => ['A', 'F', 0],
  # need Uomo to match Womo
  /^WH/ => ['A', 'A', 0]
}

Class Method Summary collapse

Class Method Details

.encode(str, options = { size: 4 }) ⇒ Object



101
102
103
# File 'lib/phonetic/double_metaphone.rb', line 101

def self.encode(str, options = { size: 4 })
  encode_word(str, options)
end

.encode_word(word, options = { size: 4 }) ⇒ Object

Encode word to its Double Metaphone code.



55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# File 'lib/phonetic/double_metaphone.rb', line 55

def self.encode_word(word, options = { size: 4 })
  code_size = options[:size] || 4
  w = word.strip.upcase
  code = Code.new
  i = 0
  len = w.size
  last = len - 1
  # pad the original string so that we can index beyond the edge of the world
  w += ' ' * 5
  i += encode_start_of_word(w, code)
  while i < len && (code.first.size < code_size || code.last.size < code_size)
    case w[i]
    when 'A', 'E', 'I', 'O', 'U', 'Y'
      i += 1
    when 'B'
      # "-mb", e.g", "dumb", already skipped over...
      i += gen_encode(w, i, 'P', 'P', code)
    when 'Ç', 'ç'
      code.add 'S', 'S'
      i += 1
    when 'C', 'D'
      i += char_encode(w, i, len, code)
    when 'F', 'K', 'N'
      i += gen_encode(w, i, w[i], w[i], code)
    when 'G', 'H', 'J', 'L', 'M'
      i += char_encode(w, i, len, code)
    when 'Ñ', 'ñ'
      code.add 'N', 'N'
      i += 1
    when 'P'
      i += encode_p(w, i, len, code)
    when 'Q'
      i += gen_encode(w, i, 'K', 'K', code)
    when 'R', 'S', 'T'
      i += char_encode(w, i, len, code)
    when 'V'
      i += gen_encode(w, i, 'F', 'F', code)
    when 'W', 'X', 'Z'
      i += char_encode(w, i, len, code)
    else
      i += 1
    end
  end
  code.results(code_size)
end