Class: Mongoid::Haystack::Token

Inherits:
Object
  • Object
show all
Includes:
Document
Defined in:
lib/mongoid-haystack/token.rb

Defined Under Namespace

Classes: Error

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.add(value) ⇒ Object



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# File 'lib/mongoid-haystack/token.rb', line 13

def add(value)
# handle a value or array of values - which may contain dups
#
  values = Array(value)
  values.flatten!
  values.compact!

# try very hard to create tokens efficently in bulk
#
  missing = []
  
  42.times do
    existing = where(:value.in => values)
    missing = values - existing.map(&:value)

    docs = missing.map{|value| {:_id => Token.next_hex_id, :value => value}}

    unless docs.empty?
      collection = mongo_session.with(:safe => false)[collection_name]
      collection.insert(docs, [:continue_on_error])
    else
      break
    end
  end

# fill in gaps individually iff needed... (another process may be racing to create tokens)
#
  42.times do
    existing = where(:value.in => values)
    missing = values - existing.map(&:value)

    unless missing.empty?
      missing.each do |value|
        begin
          Token.new.tap do |t|
            t.id = Token.next_hex_id
            t.value = value
            t.save!
          end
        rescue Object
          next
        end
      end
    else
      break
    end
  end

# new we should have one token per uniq value
#
  tokens = where(:value.in => values).to_a

# sigh - go boom if we failed to ensure the creation of all required
# tokens...
#
  missing = values - tokens.map(&:value)
  unless missing.size == 0
    raise(Error, "missing tokens (#{ missing.inspect })")
  end

# batch update the counts on the tokens by the number of times each
# value was seen in the list
#
#   'dog dog' #=> increment the 'dog' token's count by 2
#
  counts = {}
  token_index = tokens.inject({}){|hash, token| hash[token.value] = token; hash}
  value_index = values.inject({}){|hash, value| hash[value] ||= []; hash[value].push(value); hash}

  values.each do |value|
    token = token_index[value]

    count = value_index[value].size
    counts[count] ||= []
    counts[count].push(token.id)
  end

  counts.each do |count, token_ids|
    Token.where(:id.in => token_ids).inc(:count, count)
  end

# return an array or single token depending on whether a list or
# single value was added
#
  value.is_a?(Array) ? tokens : tokens.first
end

.next_hex_idObject



107
108
109
# File 'lib/mongoid-haystack/token.rb', line 107

def next_hex_id
  "0x#{ hex = sequence.next.to_s(16) }"
end

.sequenceObject



103
104
105
# File 'lib/mongoid-haystack/token.rb', line 103

def sequence
  Sequence.for(Token.name.scan(/[^:]+/).join('.').downcase)
end

.subtract(tokens) ⇒ Object



100
101
# File 'lib/mongoid-haystack/token.rb', line 100

def subtract(tokens)
end

.totalObject



111
112
113
# File 'lib/mongoid-haystack/token.rb', line 111

def total
  sum(:count)
end

.values_for(*args) ⇒ Object



9
10
11
# File 'lib/mongoid-haystack/token.rb', line 9

def values_for(*args)
  Haystack.tokens_for(*args)
end

Instance Method Details

#frequency(n_tokens = Token.total.value.to_f) ⇒ Object



123
124
125
126
127
128
129
# File 'lib/mongoid-haystack/token.rb', line 123

def frequency(n_tokens = Token.total.value.to_f)
  if n_tokens.zero?
    Float::Infinity
  else
    (count / n_tokens).round(2)
  end
end

#frequency_bin(n_tokens = Token.total.value.to_f) ⇒ Object



131
132
133
# File 'lib/mongoid-haystack/token.rb', line 131

def frequency_bin(n_tokens = Token.total.value.to_f)
  (frequency(n_tokens) * 10).truncate
end

#rarity(n_tokens = Token.total.value.to_f) ⇒ Object



135
136
137
# File 'lib/mongoid-haystack/token.rb', line 135

def rarity(n_tokens = Token.total.value.to_f)
  ((n_tokens - count) / n_tokens).round(2)
end

#rarity_bin(n_tokens = Token.total.value.to_f) ⇒ Object



139
140
141
142
143
144
145
# File 'lib/mongoid-haystack/token.rb', line 139

def rarity_bin(n_tokens = Token.total.value.to_f)
  if n_tokens.zero?
    Float::Infinity
  else
    (rarity(n_tokens) * 10).truncate
  end
end