Class: Phonetic::DoubleMetaphone

Inherits:

Object
Algorithm
Phonetic::DoubleMetaphone

Defined in:: lib/phonetic/double_metaphone.rb,
lib/phonetic/double_metaphone/code.rb

Overview

The Double Metaphone phonetic encoding algorithm is the second generation of the Metaphone algorithm. Its original implementation was described by Lawrence Philips in the June 2000 issue of C/C++ Users Journal.

This implementation based on the PHP implementation by Stephen Woodbridge and contains modifications of algorithm by Kevin Atkinson.

Examples:

Phonetic::DoubleMetaphone.encode('czerny') # => ['SRN', 'XRN']
Phonetic::DoubleMetaphone.encode('dumb')   # => ['TM', 'TM']
Phonetic::DoubleMetaphone.encode('edgar')  # => ['ATKR', 'ATKR']
# or use alias:
Phonetic::Metaphone2.encode('czerny') # => ['SRN', 'XRN']
Phonetic::Metaphone2.encode('dumb')   # => ['TM', 'TM']
Phonetic::Metaphone2.encode('edgar')  # => ['ATKR', 'ATKR']

Defined Under Namespace

Classes: Code

Constant Summary collapse

START_OF_WORD_MAP =

{
  # skip these when at start of word
  /^([GKP]N|WR|PS)/ => ['', '', 1],
  # initial 'X' is pronounced 'Z' e.g. 'Xavier'
  /^X/ => ['S', 'S', 1],
  # all init vowels now map to 'A'
  /^[AEIOUY]/ => ['A', 'A', 1],
  # special case 'caesar'
  /^CAESAR/ => ['S', 'S', 1],
  # special case 'sugar-'
  /^SUGAR/ => ['X', 'S', 1],
  # -ges-, -gep-, -gel-, -gie- at beginning
  /^G(Y|E[SPBLYIR]|I[BLNE])/ => ['K', 'J', 2],
  # keep H if first & before vowel
  /^H[AEIOUY]/ => ['H', 'H', 2],
  # german & anglicisations, e.g. 'smith' match 'schmidt', 'snider' match 'schneider'
  /^S[MNLW]/ => ['S', 'X', 1],
  # ghislane, ghiradelli
  /^GHI/ => ['J', 'J', 2],
  /^GH/ => ['K', 'K', 2],
  # greek roots e.g. 'chemistry', 'chorus'
  /^CH(ARAC|ARIS|OR[^E]|YM|EM)/ => ['K', 'K', 2],
  # Wasserman should match Vasserman
  /^W[AEIOUY]/ => ['A', 'F', 0],
  # need Uomo to match Womo
  /^WH/ => ['A', 'A', 0]
}

Class Method Summary collapse

.encode(str, options = { size: 4 }) ⇒ Object
.encode_word(word, options = { size: 4 }) ⇒ Object

Encode word to its Double Metaphone code.

Class Method Details

.encode(str, options = { size: 4 }) ⇒ `Object`



101
102
103

# File 'lib/phonetic/double_metaphone.rb', line 101

def self.encode(str, options = { size: 4 })
  encode_word(str, options)
end

.encode_word(word, options = { size: 4 }) ⇒ `Object`

Encode word to its Double Metaphone code.

# File 'lib/phonetic/double_metaphone.rb', line 55

def self.encode_word(word, options = { size: 4 })
  code_size = options[:size] || 4
  w = word.strip.upcase
  code = Code.new
  i = 0
  len = w.size
  last = len - 1
  # pad the original string so that we can index beyond the edge of the world
  w += ' ' * 5
  i += encode_start_of_word(w, code)
  while i < len && (code.first.size < code_size || code.last.size < code_size)
    case w[i]
    when 'A', 'E', 'I', 'O', 'U', 'Y'
      i += 1
    when 'B'
      # "-mb", e.g", "dumb", already skipped over...
      i += gen_encode(w, i, 'P', 'P', code)
    when 'Ç', 'ç'
      code.add 'S', 'S'
      i += 1
    when 'C', 'D'
      i += char_encode(w, i, len, code)
    when 'F', 'K', 'N'
      i += gen_encode(w, i, w[i], w[i], code)
    when 'G', 'H', 'J', 'L', 'M'
      i += char_encode(w, i, len, code)
    when 'Ñ', 'ñ'
      code.add 'N', 'N'
      i += 1
    when 'P'
      i += encode_p(w, i, len, code)
    when 'Q'
      i += gen_encode(w, i, 'K', 'K', code)
    when 'R', 'S', 'T'
      i += char_encode(w, i, len, code)
    when 'V'
      i += gen_encode(w, i, 'F', 'F', code)
    when 'W', 'X', 'Z'
      i += char_encode(w, i, len, code)
    else
      i += 1
    end
  end
  code.results(code_size)
end