Module: FuzzyString
- Defined in:
- ext/fuzzy-string/fuzzy-string.c
Constant Summary collapse
- VERSION =
rb_str_new2(RUBY_FUZZY_VERSION)
Class Method Summary collapse
- .jaro_winkler_distance(s1, s2) ⇒ Object
- .levenstein_distance(s1, s2) ⇒ Object
-
.soundex(string) ⇒ Object
adapted from en.literateprograms.org/Soundex_(C).
- .stem(*args) ⇒ Object
- .stem_languages ⇒ Object
Class Method Details
.jaro_winkler_distance(s1, s2) ⇒ Object
144 145 146 |
# File 'ext/fuzzy-string/fuzzy-string.c', line 144
VALUE fuzzy_jaro_winkler_distance(VALUE self, VALUE s1, VALUE s2) {
return DBL2NUM(c_jaro_winkler_distance(CSTRING(s1), CSTRING(s2)));
}
|
.levenstein_distance(s1, s2) ⇒ Object
148 149 150 |
# File 'ext/fuzzy-string/fuzzy-string.c', line 148
VALUE fuzzy_levenstein_distance(VALUE self, VALUE s1, VALUE s2) {
return INT2NUM(c_levenstein_distance(CSTRING(s1), CSTRING(s2)));
}
|
.soundex(string) ⇒ Object
adapted from en.literateprograms.org/Soundex_(C)
187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 |
# File 'ext/fuzzy-string/fuzzy-string.c', line 187
VALUE fuzzy_soundex(VALUE self, VALUE string) {
if (TYPE(string) != T_STRING)
rb_raise(rb_eArgError, "invalid argument, expect string");
static int code[] = { 0, 1, 2, 3, 0, 1, 2, 0, 0, 2, 2, 4, 5, 5, 0, 1, 2, 6, 2, 3, 0, 1, 0, 2, 0, 2 };
/* a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z */
static char key[5];
register char ch;
register int last;
register int count;
const char *cstring = RSTRING_PTR(string);
/* Set up default key, complete with trailing '0's */
strcpy(key, "Z000");
/* Advance to the first letter. If none present,
return default key */
while (*cstring && !isalpha(*cstring))
++cstring;
if (*cstring == 0)
return rb_str_new2(key);
/* Pull out the first letter, uppercase it, and
set up for main loop */
key[0] = toupper(*cstring);
last = code[key[0] - 'A'];
++cstring;
/* Scan rest of string, stop at end of string or
when the key is full */
for (count = 1; count < 4 && *cstring; ++cstring) {
/* If non-alpha, ignore the character altogether */
if (isalpha(*cstring)) {
ch = tolower(*cstring);
/* Fold together adjacent letters sharing the same code */
if (last != code[ch - 'a']) {
last = code[ch - 'a'];
/* Ignore code==0 letters except as separators */
if (last != 0)
key[count++] = '0' + last;
}
}
}
return rb_str_new2(key);
}
|
.stem(*args) ⇒ Object
152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 |
# File 'ext/fuzzy-string/fuzzy-string.c', line 152
VALUE fuzzy_snowball(int argc, VALUE * argv, VALUE self) {
VALUE word, language, result = Qnil;
rb_scan_args(argc, argv, "11", &word, &language);
if (NIL_P(language))
language = fuzzy_default_language;
if (TYPE(word) != T_STRING)
rb_raise(rb_eArgError, "invalid word, expect string");
struct sb_stemmer *stemmer = sb_stemmer_new(CSTRING(language), "UTF_8");
if (stemmer) {
const sb_symbol *stem = sb_stemmer_stem(stemmer, RSTRING_PTR(word), RSTRING_LEN(word));
uint32_t stem_len = sb_stemmer_length(stemmer);
result = rb_enc_str_new(stem, stem_len, rb_enc_get(word));
sb_stemmer_delete(stemmer);
}
return result;
}
|
.stem_languages ⇒ Object
173 174 175 176 177 178 179 180 181 182 183 184 |
# File 'ext/fuzzy-string/fuzzy-string.c', line 173
VALUE fuzzy_snowball_languages(VALUE self) {
VALUE languages = rb_ary_new();
const char **list = sb_stemmer_list();
while (*list) {
// ignore 'porter' - it's only for backwards compatibility.
if (strcmp(*list, "porter"))
rb_ary_push(languages, rb_str_new2(*list));
list++;
}
return languages;
}
|