Module: Hotwater

Defined in:
lib/hotwater.rb,
lib/hotwater/version.rb,
lib/hotwater/jaro_ffi.rb,
lib/hotwater/ngram_ffi.rb,
lib/hotwater/levenshtein_ffi.rb,
lib/hotwater/damerau_levenshtein_ffi.rb

Defined Under Namespace

Modules: C

Constant Summary collapse

VERSION =
"0.1.2"

Class Method Summary collapse

Class Method Details

.damerau_levenshtein_distance(s1, s2) ⇒ Integer

compute Damerau Levenshtein edit distance between 2 strings



14
15
16
17
18
# File 'lib/hotwater/damerau_levenshtein_ffi.rb', line 14

def damerau_levenshtein_distance(s1, s2)
  result = C::damerau_levenshtein_distance(s1, s2)
  raise("memory allocation error") if result == -1
  result
end

.jaro_distance(s1, s2) ⇒ Float

compute Jaro edit distance between 2 strings



17
18
19
20
21
# File 'lib/hotwater/jaro_ffi.rb', line 17

def jaro_distance(s1, s2)
  result = C::jaro_distance(s1, s2)
  raise("memory allocation error") if result < 0.0
  result
end

.jaro_winkler_distance(s1, s2, long_tolerance = false) ⇒ Float

compute Jaro-Winkler edit distance between 2 strings

setting ‘long_tolerance = true` increases the probability of a match when the number of matched characters is large. This option allows for a little more tolerance when the strings are large. It is not an appropriate test when comparing fixed length fields such as phone and social security numbers.



34
35
36
37
38
# File 'lib/hotwater/jaro_ffi.rb', line 34

def jaro_winkler_distance(s1, s2, long_tolerance = false)
  result = C::jaro_winkler_distance(s1, s2, long_tolerance)
  raise("memory allocation error") if result < 0.0
  result
end

.levenshtein_distance(s1, s2) ⇒ Integer

compute Levenshtein edit distance between 2 strings



14
15
16
17
18
# File 'lib/hotwater/levenshtein_ffi.rb', line 14

def levenshtein_distance(s1, s2)
  result = C::levenshtein_distance(s1, s2)
  raise("memory allocation error") if result == -1
  result
end

.ngram_distance(s1, s2, n = 2) ⇒ Float

compute N-Gram distance between 2 strings



15
16
17
18
19
# File 'lib/hotwater/ngram_ffi.rb', line 15

def ngram_distance(s1, s2, n = 2)
  result = C::ngram_distance(s1, s2, n)
  raise("memory allocation error") if result == -1
  result
end

.normalized_damerau_levenshtein_distance(s1, s2) ⇒ Float

compute normalized Damerau Levenshtein edit distance between 2 strings normalization weight the edit distance using the string lengths where an edit on a small string has more impact than on a longer string



26
27
28
29
30
31
32
# File 'lib/hotwater/damerau_levenshtein_ffi.rb', line 26

def normalized_damerau_levenshtein_distance(s1, s2)
  result = C::damerau_levenshtein_distance(s1, s2)
  raise("memory allocation error") if result == -1
  return 0.0 if result == 0.0
  max = [s1.size, s2.size].max
  (max - result.to_f) / max
end

.normalized_levenshtein_distance(s1, s2) ⇒ Float

compute normalized Levenshtein edit distance between 2 strings normalization weight the edit distance using the string lengths where an edit on a small string has more impact than on a longer string



26
27
28
29
30
31
32
# File 'lib/hotwater/levenshtein_ffi.rb', line 26

def normalized_levenshtein_distance(s1, s2)
  result = C::levenshtein_distance(s1, s2)
  raise("memory allocation error") if result == -1
  return 0.0 if result == 0.0
  max = [s1.size, s2.size].max
  (max - result.to_f) / max
end