Class: FuzzyMatch::Score::PureRuby

Inherits:
FuzzyMatch::Score show all
Defined in:
lib/fuzzy_match/score/pure_ruby.rb

Constant Summary collapse

SPACE =
' '

Instance Attribute Summary

Attributes inherited from FuzzyMatch::Score

#str1, #str2

Instance Method Summary collapse

Methods inherited from FuzzyMatch::Score

#<=>, #initialize, #inspect

Constructor Details

This class inherits a constructor from FuzzyMatch::Score

Instance Method Details

#dices_coefficient_similarObject



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# File 'lib/fuzzy_match/score/pure_ruby.rb', line 8

def dices_coefficient_similar
  @dices_coefficient_similar ||= begin
    if str1 == str2
      1.0
    elsif str1.length == 1 and str2.length == 1
      0.0
    else
      pairs1 = (0..str1.length-2).map do |i|
        str1[i,2]
      end.reject do |pair|
        pair.include? SPACE
      end
      pairs2 = (0..str2.length-2).map do |i|
        str2[i,2]
      end.reject do |pair|
        pair.include? SPACE
      end
      union = pairs1.size + pairs2.size
      intersection = 0
      pairs1.each do |p1|
        0.upto(pairs2.size-1) do |i|
          if p1 == pairs2[i]
            intersection += 1
            pairs2.slice!(i)
            break
          end
        end
      end
      (2.0 * intersection) / union
    end
  end
end

#levenshtein_similarObject

extracted/adapted from the text gem version 1.0.2 normalization added for utf-8 strings lib/text/levenshtein.rb



44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# File 'lib/fuzzy_match/score/pure_ruby.rb', line 44

def levenshtein_similar
  @levenshtein_similar ||= begin
    if utf8?
      unpack_rule = 'U*'
    else
      unpack_rule = 'C*'
    end
    s = str1.unpack(unpack_rule)
    t = str2.unpack(unpack_rule)
    n = s.length
    m = t.length
  
    if n == 0 or m == 0
      0.0
    else
      d = (0..m).to_a
      x = nil
      (0...n).each do |i|
        e = i+1
        (0...m).each do |j|
          cost = (s[i] == t[j]) ? 0 : 1
          x = [
            d[j+1] + 1, # insertion
            e + 1,      # deletion
            d[j] + cost # substitution
          ].min
          d[j] = e
          e = x
        end
        d[m] = x
      end
      # normalization logic from https://github.com/flori/amatch/blob/master/ext/amatch_ext.c#L301
      # if (b_len > a_len) {
      #     result = rb_float_new(1.0 - ((double) v[p][b_len]) / b_len);
      # } else {
      #     result = rb_float_new(1.0 - ((double) v[p][b_len]) / a_len);
      # }
      1.0 - x.to_f / [n, m].max
    end
  end
end