Module: Redis::Autosuggest

Extended by:
ActiveSupport::Concern
Defined in:
lib/redis/autosuggest.rb,
lib/redis/autosuggest/file.rb,
lib/redis/autosuggest/fuzzy.rb,
lib/redis/autosuggest/config.rb,
lib/redis/autosuggest/version.rb,
lib/redis/autosuggest/rails/railtie.rb,
lib/redis/autosuggest/rails/sources.rb

Defined Under Namespace

Modules: ClassMethods, SuggestRails Classes: Railtie

Constant Summary collapse

VERSION =
"0.3.2"

Class Attribute Summary collapse

Class Method Summary collapse

Class Attribute Details

.dbObject

Returns the value of attribute db.



65
66
67
# File 'lib/redis/autosuggest/config.rb', line 65

def db
  @db
end

.fuzzy_matchObject

Returns the value of attribute fuzzy_match.



65
66
67
# File 'lib/redis/autosuggest/config.rb', line 65

def fuzzy_match
  @fuzzy_match
end

.itemidsObject

Returns the value of attribute itemids.



65
66
67
# File 'lib/redis/autosuggest/config.rb', line 65

def itemids
  @itemids
end

.itemsObject

Returns the value of attribute items.



65
66
67
# File 'lib/redis/autosuggest/config.rb', line 65

def items
  @items
end

.leaderboardObject

Returns the value of attribute leaderboard.



65
66
67
# File 'lib/redis/autosuggest/config.rb', line 65

def leaderboard
  @leaderboard
end

.max_per_substringObject

Returns the value of attribute max_per_substring.



65
66
67
# File 'lib/redis/autosuggest/config.rb', line 65

def max_per_substring
  @max_per_substring
end

.max_resultsObject

Returns the value of attribute max_results.



65
66
67
# File 'lib/redis/autosuggest/config.rb', line 65

def max_results
  @max_results
end

.max_str_sizeObject

Returns the value of attribute max_str_size.



65
66
67
# File 'lib/redis/autosuggest/config.rb', line 65

def max_str_size
  @max_str_size
end

.namespaceObject

Returns the value of attribute namespace.



64
65
66
# File 'lib/redis/autosuggest/config.rb', line 64

def namespace
  @namespace
end

.ngram_item_limitObject

Returns the value of attribute ngram_item_limit.



65
66
67
# File 'lib/redis/autosuggest/config.rb', line 65

def ngram_item_limit
  @ngram_item_limit
end

.ngram_sizeObject

Returns the value of attribute ngram_size.



65
66
67
# File 'lib/redis/autosuggest/config.rb', line 65

def ngram_size
  @ngram_size
end

.ngramsObject

Returns the value of attribute ngrams.



65
66
67
# File 'lib/redis/autosuggest/config.rb', line 65

def ngrams
  @ngrams
end

.rails_source_sizesObject

Returns the value of attribute rails_source_sizes.



65
66
67
# File 'lib/redis/autosuggest/config.rb', line 65

def rails_source_sizes
  @rails_source_sizes
end

.rails_sourcesObject

Returns the value of attribute rails_sources.



65
66
67
# File 'lib/redis/autosuggest/config.rb', line 65

def rails_sources
  @rails_sources
end

.redisObject

Returns the value of attribute redis.



64
65
66
# File 'lib/redis/autosuggest/config.rb', line 64

def redis
  @redis
end

.strict_fuzzy_matchingObject

Returns the value of attribute strict_fuzzy_matching.



65
66
67
# File 'lib/redis/autosuggest/config.rb', line 65

def strict_fuzzy_matching
  @strict_fuzzy_matching
end

.substringsObject

Returns the value of attribute substrings.



65
66
67
# File 'lib/redis/autosuggest/config.rb', line 65

def substrings
  @substrings
end

.use_leaderboardObject

Returns the value of attribute use_leaderboard.



65
66
67
# File 'lib/redis/autosuggest/config.rb', line 65

def use_leaderboard
  @use_leaderboard
end

Class Method Details

.add(*items) ⇒ Object

Add item(s) to the pool of items to autosuggest from. Each item’s initial rank is 0. Returns true if all items added were new, false otherwise.



8
9
10
11
12
13
14
15
16
17
18
19
# File 'lib/redis/autosuggest.rb', line 8

def add(*items)
  all_new_items = true
  items.each do |item|
    if item.size > @max_str_size
      all_new_items = false
      next
    end
    item = normalize(item)
    item_exists?(item) ? all_new_items = false : add_item(item)
  end
  all_new_items
end

.add_from_file(file) ⇒ Object

Add items to the autosuggest database from a file. Each line be a string representing the item



8
9
10
11
12
13
# File 'lib/redis/autosuggest/file.rb', line 8

def add_from_file(file)
  File.open(file, "r").each do |l| 
    puts "Adding #{l}"
    add(l.strip)
  end
end

.add_fuzzy(item) ⇒ Object

Add an item’s n-grams to the redis db. The n-grams will be used as candidates for autocompletions when Redis::Autosuggest.fuzzy_match is set to true.



9
10
11
12
13
14
15
# File 'lib/redis/autosuggest/fuzzy.rb', line 9

def add_fuzzy(item)
  yield_ngrams(item) do |ngram|
    if @ngrams.scard(ngram).to_i <= @ngram_item_limit
      @ngrams.sadd(ngram, "#{item}:#{compute_soundex_code(item)}")
    end
  end
end

.add_with_score(*fields) ⇒ Object

Add item(s) along with their initial scores. Returns true if all items added were new, false otherwise. add_with_score(“item1”, 4, “item2”, 1, “item3”, 0)



24
25
26
27
28
29
30
31
32
33
34
35
# File 'lib/redis/autosuggest.rb', line 24

def add_with_score(*fields)
  all_new_items = true 
  fields.each_slice(2) do |f|
    if f[0].size > @max_str_size
      all_new_items = false
      next
    end
    f[0] = normalize(f[0])
    item_exists?(f[0]) ? all_new_items = false : add_item(*f)
  end
  all_new_items
end

.add_with_score_from_file(file) ⇒ Object

Add items and their to the autosuggest database from a file. Each line be a string representing the item followed by its score Example: item1 0.4 item2 2.1 item3 5.2



21
22
23
# File 'lib/redis/autosuggest/file.rb', line 21

def add_with_score_from_file(file)
  add_with_score(*(File.open(file, "r").map { |l| l.split(" ")}.flatten))
end

.alphabet_only(str) ⇒ Object

Remove all characters not in the range ‘a-z’ from a string



109
110
111
# File 'lib/redis/autosuggest/fuzzy.rb', line 109

def alphabet_only(str)
  return str.gsub(/[^abcdefghijklmnopqrstuvwxyz]/, '')
end

.compute_soundex_code(str) ⇒ Object

Compute the soundex code of a string (only works for single words so we have to merge multi-word strings)



26
27
28
# File 'lib/redis/autosuggest/fuzzy.rb', line 26

def compute_soundex_code(str)
  return Text::Soundex.soundex(alphabet_only(str))
end

.get_id(item) ⇒ Object

Get the id associated with an item in the db



92
93
94
# File 'lib/redis/autosuggest.rb', line 92

def get_id(item)
  return @db.hmget(@itemids, normalize(item)).first
end

.get_item(id) ⇒ Object



96
97
98
# File 'lib/redis/autosuggest.rb', line 96

def get_item(id)
  return @db.hmget(@items, id).first
end

.get_leaderboard(results = @max_results) ⇒ Object

Gets the items with the highest scores from the autosuggest db



75
76
77
78
# File 'lib/redis/autosuggest.rb', line 75

def get_leaderboard(results=@max_results)
  top_ids = @db.zrevrange(@leaderboard, 0, results - 1)
  top_ids.empty? ? [] : @db.hmget(@items, top_ids)  
end

.get_score(item) ⇒ Object

Get the score of an item



81
82
83
84
# File 'lib/redis/autosuggest.rb', line 81

def get_score(item)
  item = normalize(item)
  @substrings.zscore(item, get_id(item))
end

.increment(item, incr = 1) ⇒ Object

Increment the score (by 1 by default) of an item.

Pass in a negative value to decrement the score.



53
54
55
56
57
58
# File 'lib/redis/autosuggest.rb', line 53

def increment(item, incr=1)
  item = normalize(item)
  id = get_id(item)
  each_substring(item) { |sub| @substrings.zincrby(sub, incr, id) }
  @db.zincrby(@leaderboard, incr, id) if @use_leaderboard
end

.item_exists?(item) ⇒ Boolean

Returns whether or not an item is already stored in the db

Returns:

  • (Boolean)


87
88
89
# File 'lib/redis/autosuggest.rb', line 87

def item_exists?(item)
  return !get_id(normalize(item)).nil?
end

.ngram_list(str, ngram_size = @ngram_size) ⇒ Object

Returns a list containing all of the n-grams of a specified size of a string. The list is ordered by the position of the n-gram in the string (duplicates included).



97
98
99
100
101
102
103
104
105
106
# File 'lib/redis/autosuggest/fuzzy.rb', line 97

def ngram_list(str, ngram_size=@ngram_size)
  str = alphabet_only(str).split("")
  ngram_list = []
  (0..str.size - ngram_size).each do |i|
    ngram = ""
    (0...ngram_size).each { |j| ngram << str[i + j] }
    ngram_list << ngram
  end
  ngram_list
end

.remove(item) ⇒ Object

Remove an item from the pool of items to autosuggest from. Returns true if an item was indeed removed, false otherwise.



39
40
41
42
43
44
45
46
47
48
49
# File 'lib/redis/autosuggest.rb', line 39

def remove(item)
  item = item.downcase
  id = get_id(item)
  return false if id.nil?
  @db.hdel(@items, id)
  @db.hdel(@itemids, item)
  remove_substrings(item, id)
  @redis.zrem(@leaderboard, id) if @use_leaderboard
  remove_fuzzy(item) if @fuzzy_match
  return true
end

.remove_fuzzy(item) ⇒ Object

Remove an item’s n-grams from the Redis db



18
19
20
21
22
# File 'lib/redis/autosuggest/fuzzy.rb', line 18

def remove_fuzzy(item)
  yield_ngrams(item) do |ngram| 
    @ngrams.srem(ngram, "#{item}:#{compute_soundex_code(item)}")
  end
end

.suggest(str, results = @max_results) ⇒ Object

Suggest items from the database that most closely match the queried string. Returns an array of suggestion items (an empty array if nothing found). Fuzzy matching will only occur when both of these conditions are met:

- Redis::Autosuggest.fuzzy_match == true
- The simple suggestion method (matching substrings) yields no results


65
66
67
68
69
70
71
72
# File 'lib/redis/autosuggest.rb', line 65

def suggest(str, results=@max_results)
  str = normalize(str)
  suggestion_ids = @substrings.zrevrange(str, 0, results - 1)
  if suggestion_ids.empty? && @fuzzy_match 
    return suggest_fuzzy(str, results)
  end
  suggestion_ids.empty? ? [] : @db.hmget(@items, suggestion_ids)
end

.suggest_fuzzy(str, results = @max_results, strict = @strict_fuzzy_matching) ⇒ Object

Build a candidate pool for all suitable fuzzy matches for a string by taking the union of all items in the Redis db that share an n-gram with the string. Use levenshtein distance, soundex code similarity, and the number of matching 2-grams to compute a score for each candidate. Then return the highest-scoring candidates.



35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# File 'lib/redis/autosuggest/fuzzy.rb', line 35

def suggest_fuzzy(str, results=@max_results, strict=@strict_fuzzy_matching)
  str_mul = alphabet_only(str).size
  str_soundex_code = compute_soundex_code(str)
  str_2grams = ngram_list(str, 2)
  candidates = []

  @ngrams.sunion(*ngram_list(str)).each do |candidate|
    candidate = candidate.split(":")
    candidate_str = candidate[0]
    candidate_soundex_code = candidate[1]
    candidate_score = 1.0

    # Levenshtein distance
    lev_dist = Levenshtein.distance(str, candidate_str)
    candidate_score *= Math.exp([str_mul - lev_dist, 1].max)

    # Soundex
    if str_soundex_code == candidate_soundex_code
      candidate_score *= str_mul
    elsif str_soundex_code[1..-1] == candidate_soundex_code[1..-1]
      candidate_score *= (str_mul / 2).ceil
    end

    # Compute n-grams of size 2 shared between the two strings
    same_2grams = str_2grams & ngram_list(candidate_str, 2)
    candidate_score *= Math.exp(same_2grams.size)

    if candidate_score > 1
      candidates << {
        str: candidate_str,
        score: candidate_score
      }
    end
  end
  # Sort results by score and return the highest scoring candidates
  candidates = candidates.sort { |a, b| b[:score] <=> a[:score] }
  # puts candidates.take(10).map { |cand| "#{cand[:str]} => #{cand[:score]}" }
  # If strict fuzzy matching is used, only suggestion items with scores
  # above a certain threshold will be returned.
  if strict
    suggestions = []
    candidates.each do |cand|
      # threshold ||= candidates[0][:score] / 10
      threshold = Math.exp(str.size)
      break if suggestions.size > results || cand[:score] < threshold
      suggestions << cand
    end
  else
    suggestions = candidates.take(results)
  end
  return suggestions.map { |cand| cand[:str] }
end

.yield_ngrams(str, ngram_size = @ngram_size) ⇒ Object

Yield the n-grams of a specified size for a string one at a time



89
90
91
92
# File 'lib/redis/autosuggest/fuzzy.rb', line 89

def yield_ngrams(str, ngram_size=@ngram_size)
  ngram_list = ngram_list(str, ngram_size)
  ngram_list.each { |ngram| yield ngram }
end