Module: FuzzyTools::Helpers

Extended by:
Helpers
Included in:
Helpers
Defined in:
lib/fuzzy_tools/helpers.rb

Constant Summary collapse

SOUNDEX_LETTERS_TO_CODES =
{
  'A' => 0, 'B' => 1, 'C' => 2, 'D' => 3, 'E' => 0, 'F' => 1,
  'G' => 2, 'H' => 0, 'I' => 0, 'J' => 2, 'K' => 2,
  'L' => 4, 'M' => 5, 'N' => 5, 'O' => 0, 'P' => 1,
  'Q' => 2, 'R' => 6, 'S' => 2, 'T' => 3, 'U' => 0,
  'V' => 1, 'W' => 0, 'X' => 2, 'Y' => 0, 'Z' => 2
}

Instance Method Summary collapse

Instance Method Details

#bigrams(str) ⇒ Object



14
15
16
# File 'lib/fuzzy_tools/helpers.rb', line 14

def bigrams(str)
  ngrams(str, 2)
end

#ngrams(str, n) ⇒ Object



26
27
28
29
30
31
# File 'lib/fuzzy_tools/helpers.rb', line 26

def ngrams(str, n)
  ends   = "_" * (n - 1)
  str    = "#{ends}#{str}#{ends}"

  (0..str.length - n).map { |i| str[i,n] }
end

#soundex(str) ⇒ Object

Ruby port of the C below



44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# File 'lib/fuzzy_tools/helpers.rb', line 44

def soundex(str)
  soundex = "Z000"
  chars = str.upcase.chars.to_a
  first_letter = chars.shift until (last_numeral = first_letter && SOUNDEX_LETTERS_TO_CODES[first_letter]) || chars.size == 0

  return soundex unless last_numeral

  soundex[0] = first_letter

  i = 1
  while i < 4 && chars.size > 0
    char = chars.shift
    next unless numeral = SOUNDEX_LETTERS_TO_CODES[char]
    if numeral != last_numeral
      last_numeral = numeral
      if numeral != 0
        soundex[i] = numeral.to_s
        i += 1
      end
    end
  end

  soundex
end

#term_counts(enumerator) ⇒ Object



5
6
7
8
9
10
11
12
# File 'lib/fuzzy_tools/helpers.rb', line 5

def term_counts(enumerator)
  {}.tap do |counts|
    enumerator.each do |e|
      counts[e] ||= 0
      counts[e]  += 1
    end
  end
end

#tetragrams(str) ⇒ Object



22
23
24
# File 'lib/fuzzy_tools/helpers.rb', line 22

def tetragrams(str)
  ngrams(str, 4)
end

#trigrams(str) ⇒ Object



18
19
20
# File 'lib/fuzzy_tools/helpers.rb', line 18

def trigrams(str)
  ngrams(str, 3)
end