Class: IndexedSearch::Word

Inherits:
ActiveRecord::Base
  • Object
show all
Extended by:
Collision, ResetTable
Defined in:
app/models/indexed_search/word.rb

Constant Summary collapse

GROUP_BY_AMOUNT =
1_000

Class Method Summary collapse

Instance Method Summary collapse

Methods included from Collision

retrying_on_collision

Methods included from ResetTable

reset_auto_increment, truncate_table

Class Method Details

.create_word(word) ⇒ Object



70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# File 'app/models/indexed_search/word.rb', line 70

def self.create_word(word)
  attrs = {:word => word}
  index_match_types.each do |type|
    klass = IndexedSearch::Match.match_class(type)
    if klass.match_against_term?(word)
      vals = klass.make_index_value(word)
      atrs = klass.matcher_attribute
      if atrs.kind_of?(Array)
        # TODO: isn't this logic duplicated somewhere else?
        (0...atrs.length).to_a.each { |idx| attrs.merge!({atrs[idx] => vals[idx]}) }
      else
        attrs.merge!({atrs => vals})
      end
    end
  end
  # import would be faster but it doesn't return the id
  #import(attrs.keys, [attrs.values], :validate => false)
  create!(attrs, :without_protection => true).id
end

.decr_counts_by_ids(ids, offset = 1) ⇒ Object

quickly decrement entries_count column for certain word ids (can be used when removing entries)



96
97
98
# File 'app/models/indexed_search/word.rb', line 96

def self.decr_counts_by_ids(ids, offset=1)
  where(:id => ids).order('id').update_all("entries_count = entries_count - #{offset}")
end

.delete_emptyObject

faster version of delete_orphaned that depends on the entries_count column being up to date



186
187
188
189
190
# File 'app/models/indexed_search/word.rb', line 186

def self.delete_empty
  cnt = where(:entries_count => 0).delete_all
  reset_auto_increment
  cnt
end

.delete_orphanedObject

cleanup after reindexing/deleting from main index doesn’t hurt index for extra words to hang around, just wastes space also resets auto increment if the entire database is purged



176
177
178
179
180
# File 'app/models/indexed_search/word.rb', line 176

def self.delete_orphaned
  cnt = empty_entry.delete_all
  reset_auto_increment
  cnt
end

.existing_word_id_map(words) ⇒ Object



65
66
67
68
69
# File 'app/models/indexed_search/word.rb', line 65

def self.existing_word_id_map(words)
  id_map = {}
  where(:word => words).values_of(:id, :word).each { |id, w| id_map[w] = id }
  id_map
end

.find_or_create_word_ids(words, retry_count = 0) ⇒ Object

when indexing, the words may or may not exist in the model yet… param: array of word strings returns: array of word model ids (some may be previously existing, some may be brand new)



51
52
53
54
55
56
# File 'app/models/indexed_search/word.rb', line 51

def self.find_or_create_word_ids(words, retry_count=0)
  retrying_on_collision do
    id_map = existing_word_id_map(words)
    words.collect { |w| id_map.has_key?(w) ? id_map[w] : create_word(w) }
  end
end

.fix_counts_orphans_and_ranksObject

update entries_count, remove orphan words no longer used, and rank_limit all at once



193
194
195
# File 'app/models/indexed_search/word.rb', line 193

def self.fix_counts_orphans_and_ranks
  update_counts + delete_empty + update_ranks
end

.incr_counts_by_ids(ids, offset = 1) ⇒ Object

quickly increment entries_count column for certain word ids (can be used when adding entries)



91
92
93
# File 'app/models/indexed_search/word.rb', line 91

def self.incr_counts_by_ids(ids, offset=1)
  where(:id => ids).order('id').update_all("entries_count = entries_count + #{offset}")
end

.update_countsObject

update entries_count column for words



102
103
104
105
106
107
108
109
110
111
112
113
# File 'app/models/indexed_search/word.rb', line 102

def self.update_counts
  cnt = 0
  old_counts = Hash[scoped.values_of(:id, :entries_count)]
  old_counts.keys.in_groups_of(GROUP_BY_AMOUNT, false) do |old_id_group|
    updates = {}
    IndexedSearch::Entry.where(:word_id => old_id_group).group(:word_id).count.each do |id, new_count|
	  updates[id] = new_count if old_counts[id] != new_count
	end
    updates.invert_multi.each { |new_count, up_ids| cnt += scoped.where(:id => up_ids).order('id').update_all(:entries_count => new_count) }
  end
  cnt
end

.update_ranksObject

update rank_limit column for words



136
137
138
# File 'app/models/indexed_search/word.rb', line 136

def self.update_ranks
  update_zeroed_ranks + update_rank_limits(rank_limit_updates)
end

.update_ranks_by_ids(ids) ⇒ Object

optimized update of rank_limit column for certain word ids



116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
# File 'app/models/indexed_search/word.rb', line 116

def self.update_ranks_by_ids(ids)
  cnt = 0
  if ids.length == 1
    (c, old_lim) = where(:id => ids.first).values_of(:entries_count, :rank_limit).first
    if ! c.nil? && c  > rank_reduction_factor
      new_lim = calculate_rank_limit_for_id(ids.first)
      cnt += where(:id => ids.first).update_all(:rank_limit => new_lim) if new_lim != old_lim
    elsif ! old_lim.nil? && old_lim > 0
      cnt += zero_out_ranks_by_id(ids.first)
    end
  else
    cnt = update_zeroed_ranks
	updates = {}
    ids.in_groups_of(GROUP_BY_AMOUNT, false).each { |id_group| updates.merge!(where(:id => id_group).rank_limit_updates) }
    cnt += update_rank_limits(updates)
  end
  cnt
end

.word_id_map(words, retry_count = 0) ⇒ Object

another version that returns a word->id map hash, instead of just an ids array



58
59
60
61
62
63
64
# File 'app/models/indexed_search/word.rb', line 58

def self.word_id_map(words, retry_count=0)
  retrying_on_collision do
    id_map = existing_word_id_map(words)
    words.reject { |w| id_map.has_key?(w) }.each { |w| id_map[w] = create_word(w) }
    id_map
  end
end

Instance Method Details

#to_sObject



197
198
199
# File 'app/models/indexed_search/word.rb', line 197

def to_s
  word
end