Module: IndexedSearch::Index::ClassMethods

Defined in:
lib/indexed_search/index.rb

Instance Method Summary collapse

Instance Method Details

#collect_search_ranks(scope = nil) ⇒ Object



174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
# File 'lib/indexed_search/index.rb', line 174

def collect_search_ranks(scope=nil)
  word_list = Set.new
  wrd_rnk_map = Hash.of { Hash.new(0) }
  (scope || scoped).each do |row|
    row.search_index_info.each do |txt, amnt|
      words = IndexedSearch::Query.split_into_words(txt)
      word_list += words
      words.each { |word| wrd_rnk_map[row.id_for_index][word] += amnt }
    end
  end
  wrd_id_map = IndexedSearch::Word.word_id_map(word_list.to_a)
  srch_rnks = Hash.of { {} }
  wrd_rnk_map.each { |id, data| data.each { |wrd, rnk| srch_rnks[id][wrd_id_map[wrd]] = rnk } }
  srch_rnks
end

#create_search_indexObject

main entry points for indexing a whole model in one go uses some alternate more complicated/fragile queries to avoid loading entire table into memory! beware that you shouldn’t add/remove rows concurrently while updating…



90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# File 'lib/indexed_search/index.rb', line 90

def create_search_index
  word_count_incrs = Hash.new(0)
  search_index_scope.order(id_for_index_attr).batches_by_ids(1_000, id_for_index_attr) do |group_scope|
    IndexedSearch::Entry.transaction do
      rank_data = collect_search_ranks(group_scope)
      search_insertion_data = []
      group_scope.each do |row|
        search_insertion_data += row.make_search_insertion_data(rank_data[row.id_for_index])
        rank_data[row.id_for_index].keys.each { |word_id| word_count_incrs[word_id] += 1 }
      end
      IndexedSearch::Entry.import(search_insertion_headings, search_insertion_data, :validate => false)
    end
  end
  word_count_incrs.invert_multi.each { |amount, ids| IndexedSearch::Word.incr_counts_by_ids(ids, amount) }
  IndexedSearch::Word.update_ranks_by_ids(word_count_incrs.keys)
end

#delete_search_indexObject



157
158
159
160
161
# File 'lib/indexed_search/index.rb', line 157

def delete_search_index
  search_entries.delete_all
  IndexedSearch::Entry.reset_auto_increment
  IndexedSearch::Word.fix_counts_orphans_and_ranks
end

#id_for_index_attrObject

The column from your indexed model that will be stored in the Entry model’s modelrowid attribute.

Override this in your model if you’re using a different column than what is returned by model.primary_key (usually ‘id’ unless you’ve set self.primary_key = ‘something_else’ in your model).

This column must be a unique integer key in your table.

If your table’s primary key is a composite primary key, then you must have another unique key (not composite) defined in your table and override this method to tell indexed_search which column to use.

Hint: To define an auto-increment column that is not your primary key in MySQL, use:

alter table #{@table_name} add column #{column} int(11) NOT NULL AUTO_INCREMENT UNIQUE KEY

(id would return an array in that case (if using composite_primary_keys gem, anyway), which is hard to store in a single column in the Entry model.)



207
208
209
# File 'lib/indexed_search/index.rb', line 207

def id_for_index_attr
  :id
end

#model_idObject



166
167
168
169
170
171
172
173
# File 'lib/indexed_search/index.rb', line 166

def model_id
  # kind_of? allows both STI and regular Ruby subclasses to work
  # name.constantize allows rails class reloading to work in development
  # todo: this is not very efficient and needs rethinking
  IndexedSearch::Index.models_by_id.detect {|k,v| self.new.kind_of?(v.name.constantize) }.first
rescue
  raise BadModelException.new("#{self.name} does not appear to be an indexed model, see IndexedSearch::Index.models_by_id in config/initializers/indexed_search.rb")
end

#search_entriesObject



163
164
165
# File 'lib/indexed_search/index.rb', line 163

def search_entries
  IndexedSearch::Entry.where(:modelid => model_id)
end

#search_entry_data_headingsObject



213
214
215
# File 'lib/indexed_search/index.rb', line 213

def search_entry_data_headings
  [:id, :word_id, :rank, :row_priority, :modelrowid]
end

#search_insertion_headingsObject



210
211
212
# File 'lib/indexed_search/index.rb', line 210

def search_insertion_headings
  [:word_id, :rowidx, :modelid, :modelrowid, :rank, :row_priority]
end

#update_search_indexObject



106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# File 'lib/indexed_search/index.rb', line 106

def update_search_index
  word_count_incrs = Hash.new(0)
  word_count_decrs = Hash.new(0)
  word_rank_changes = Set.new
  # reindex existing rows
  search_index_scope.order(id_for_index_attr).batches_by_ids(1_000, id_for_index_attr) do |group_scope, group_ids|
    IndexedSearch::Entry.transaction do
      # pre-cache entire group of existing index entries by model id
      entry_data_cache = Hash.of { [] }
      IndexedSearch::Entry.where(:modelid => model_id, :modelrowid => group_ids).values_of(*search_entry_data_headings).each do |entry|
        entry_data_cache[entry[4]] << entry
      end
      rank_data = collect_search_ranks(group_scope)
      # figure out what to add, update, and delete from each
      search_insertion_data = []
      inverted_update_data = Hash.of { [] }
      search_deletion_data = []
      group_scope.each do |row|
        (inserts, updates, deletions, count_decrs, rank_changes) = row.make_search_update_data(rank_data[row.id_for_index], entry_data_cache[row.id_for_index])
        search_insertion_data += row.make_search_insertion_data(inserts)
        updates.each { |id, vals| inverted_update_data[vals] << id }
        search_deletion_data += deletions
        inserts.keys.each { |word_id| word_count_incrs[word_id] += 1 }
        count_decrs.each { |word_id| word_count_decrs[word_id] += 1 }
        word_rank_changes += rank_changes
      end
      # add, update, and delete index entries for this group of models
      IndexedSearch::Entry.import(search_insertion_headings, search_insertion_data, :validate => false) unless search_insertion_data.blank?
      inverted_update_data.each { |vals, ids| IndexedSearch::Entry.where(:id => ids).update_all(vals) }
      IndexedSearch::Entry.where(:id => search_deletion_data).delete_all unless search_deletion_data.blank?
    end
  end
  # delete indexes for model rows that no longer exist
  entry_table = IndexedSearch::Entry.arel_table
  subrelation = unscoped.select(arel_table[id_for_index_attr]).
    where(entry_table[:modelid].eq(model_id).and(entry_table[:modelrowid].eq(arel_table[id_for_index_attr])))
  search_deletion_data = []
  search_entries.where("(#{subrelation.to_sql}) IS NULL").values_of(:id, :word_id).each do |id, word_id|
    search_deletion_data << id
    word_count_decrs[word_id] += 1
    word_rank_changes << word_id
  end
  IndexedSearch::Entry.where(:id => search_deletion_data).delete_all unless search_deletion_data.blank?
  # increment/decrement counts for added/removed words
  word_count_incrs.invert_multi.each { |amount, ids| IndexedSearch::Word.incr_counts_by_ids(ids, amount) }
  word_count_decrs.invert_multi.each { |amount, ids| IndexedSearch::Word.decr_counts_by_ids(ids, amount) }
  # delete orphaned words no longer used anywhere
  IndexedSearch::Word.delete_empty unless word_count_decrs.blank?
  # update word ranks
  IndexedSearch::Word.update_ranks_by_ids(word_rank_changes.to_a) unless word_rank_changes.blank?
end