Module: FuzzyTools::Helpers
Constant Summary collapse
- SOUNDEX_LETTERS_TO_CODES =
{ 'A' => 0, 'B' => 1, 'C' => 2, 'D' => 3, 'E' => 0, 'F' => 1, 'G' => 2, 'H' => 0, 'I' => 0, 'J' => 2, 'K' => 2, 'L' => 4, 'M' => 5, 'N' => 5, 'O' => 0, 'P' => 1, 'Q' => 2, 'R' => 6, 'S' => 2, 'T' => 3, 'U' => 0, 'V' => 1, 'W' => 0, 'X' => 2, 'Y' => 0, 'Z' => 2 }
Instance Method Summary collapse
- #bigrams(str) ⇒ Object
- #ngrams(str, n) ⇒ Object
-
#soundex(str) ⇒ Object
Ruby port of the C below.
- #term_counts(enumerator) ⇒ Object
- #tetragrams(str) ⇒ Object
- #trigrams(str) ⇒ Object
Instance Method Details
#bigrams(str) ⇒ Object
14 15 16 |
# File 'lib/fuzzy_tools/helpers.rb', line 14 def bigrams(str) ngrams(str, 2) end |
#ngrams(str, n) ⇒ Object
26 27 28 29 30 31 |
# File 'lib/fuzzy_tools/helpers.rb', line 26 def ngrams(str, n) ends = "_" * (n - 1) str = "#{ends}#{str}#{ends}" (0..str.length - n).map { |i| str[i,n] } end |
#soundex(str) ⇒ Object
Ruby port of the C below
44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
# File 'lib/fuzzy_tools/helpers.rb', line 44 def soundex(str) soundex = "Z000" chars = str.upcase.chars.to_a first_letter = chars.shift until (last_numeral = first_letter && SOUNDEX_LETTERS_TO_CODES[first_letter]) || chars.size == 0 return soundex unless last_numeral soundex[0] = first_letter i = 1 while i < 4 && chars.size > 0 char = chars.shift next unless numeral = SOUNDEX_LETTERS_TO_CODES[char] if numeral != last_numeral last_numeral = numeral if numeral != 0 soundex[i] = numeral.to_s i += 1 end end end soundex end |
#term_counts(enumerator) ⇒ Object
5 6 7 8 9 10 11 12 |
# File 'lib/fuzzy_tools/helpers.rb', line 5 def term_counts(enumerator) {}.tap do |counts| enumerator.each do |e| counts[e] ||= 0 counts[e] += 1 end end end |
#tetragrams(str) ⇒ Object
22 23 24 |
# File 'lib/fuzzy_tools/helpers.rb', line 22 def tetragrams(str) ngrams(str, 4) end |
#trigrams(str) ⇒ Object
18 19 20 |
# File 'lib/fuzzy_tools/helpers.rb', line 18 def trigrams(str) ngrams(str, 3) end |