Module: FuzzyString

Defined in:
ext/fuzzy-string/fuzzy-string.c

Constant Summary collapse

VERSION =
rb_str_new2(RUBY_FUZZY_VERSION)

Class Method Summary collapse

Class Method Details

.jaro_winkler_distance(s1, s2) ⇒ Object



144
145
146
# File 'ext/fuzzy-string/fuzzy-string.c', line 144

VALUE fuzzy_jaro_winkler_distance(VALUE self, VALUE s1, VALUE s2) {
    return DBL2NUM(c_jaro_winkler_distance(CSTRING(s1), CSTRING(s2)));
}

.levenstein_distance(s1, s2) ⇒ Object



148
149
150
# File 'ext/fuzzy-string/fuzzy-string.c', line 148

VALUE fuzzy_levenstein_distance(VALUE self, VALUE s1, VALUE s2) {
    return INT2NUM(c_levenstein_distance(CSTRING(s1), CSTRING(s2)));
}

.soundex(string) ⇒ Object



187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
# File 'ext/fuzzy-string/fuzzy-string.c', line 187

VALUE fuzzy_soundex(VALUE self, VALUE string) {
    if (TYPE(string) != T_STRING)
        rb_raise(rb_eArgError, "invalid argument, expect string");

    static int code[] = { 0, 1, 2, 3, 0, 1, 2, 0, 0, 2, 2, 4, 5, 5, 0, 1, 2, 6, 2, 3, 0, 1, 0, 2, 0, 2 };
                       /* a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z */
    static char key[5];
    register char ch;
    register int last;
    register int count;
    const char *cstring = RSTRING_PTR(string);

    /* Set up default key, complete with trailing '0's */
    strcpy(key, "Z000");

    /* Advance to the first letter.  If none present,
       return default key */
    while (*cstring && !isalpha(*cstring))
        ++cstring;
    if (*cstring == 0)
        return rb_str_new2(key);

    /* Pull out the first letter, uppercase it, and
       set up for main loop */
    key[0] = toupper(*cstring);
    last = code[key[0] - 'A'];
    ++cstring;

    /* Scan rest of string, stop at end of string or
       when the key is full */
    for (count = 1; count < 4 && *cstring; ++cstring) {
        /* If non-alpha, ignore the character altogether */
        if (isalpha(*cstring)) {
            ch = tolower(*cstring);
            /* Fold together adjacent letters sharing the same code */
            if (last != code[ch - 'a']) {
                last = code[ch - 'a'];
                /* Ignore code==0 letters except as separators */
                if (last != 0)
                    key[count++] = '0' + last;
            }
        }
    }

    return rb_str_new2(key);
}

.stem(*args) ⇒ Object



152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
# File 'ext/fuzzy-string/fuzzy-string.c', line 152

VALUE fuzzy_snowball(int argc, VALUE * argv, VALUE self) {
    VALUE word, language, result = Qnil;

    rb_scan_args(argc, argv, "11", &word, &language);
    if (NIL_P(language))
        language = fuzzy_default_language;

    if (TYPE(word) != T_STRING)
        rb_raise(rb_eArgError, "invalid word, expect string");

    struct sb_stemmer *stemmer = sb_stemmer_new(CSTRING(language), "UTF_8");
    if (stemmer) {
        const sb_symbol *stem = sb_stemmer_stem(stemmer, RSTRING_PTR(word), RSTRING_LEN(word));
        uint32_t stem_len = sb_stemmer_length(stemmer);
        result = rb_enc_str_new(stem, stem_len, rb_enc_get(word));
        sb_stemmer_delete(stemmer);
    }

    return result;
}

.stem_languagesObject



173
174
175
176
177
178
179
180
181
182
183
184
# File 'ext/fuzzy-string/fuzzy-string.c', line 173

VALUE fuzzy_snowball_languages(VALUE self) {
    VALUE languages = rb_ary_new();
    const char **list = sb_stemmer_list();
    while (*list) {
        // ignore 'porter' - it's only for backwards compatibility.
        if (strcmp(*list, "porter"))
            rb_ary_push(languages, rb_str_new2(*list));
        list++;
    }

    return languages;
}