Module: IndexedSearch::Index::ClassMethods
- Defined in:
- lib/indexed_search/index.rb
Instance Method Summary collapse
- #collect_search_ranks(scope = nil) ⇒ Object
-
#create_search_index ⇒ Object
main entry points for indexing a whole model in one go uses some alternate more complicated/fragile queries to avoid loading entire table into memory! beware that you shouldn’t add/remove rows concurrently while updating…
- #delete_search_index ⇒ Object
-
#id_for_index_attr ⇒ Object
The column from your indexed model that will be stored in the Entry model’s modelrowid attribute.
- #model_id ⇒ Object
- #search_entries ⇒ Object
- #search_entry_data_headings ⇒ Object
- #search_insertion_headings ⇒ Object
- #update_search_index ⇒ Object
Instance Method Details
#collect_search_ranks(scope = nil) ⇒ Object
174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 |
# File 'lib/indexed_search/index.rb', line 174 def collect_search_ranks(scope=nil) word_list = Set.new wrd_rnk_map = Hash.of { Hash.new(0) } (scope || scoped).each do |row| row.search_index_info.each do |txt, amnt| words = IndexedSearch::Query.split_into_words(txt) word_list += words words.each { |word| wrd_rnk_map[row.id_for_index][word] += amnt } end end wrd_id_map = IndexedSearch::Word.word_id_map(word_list.to_a) srch_rnks = Hash.of { {} } wrd_rnk_map.each { |id, data| data.each { |wrd, rnk| srch_rnks[id][wrd_id_map[wrd]] = rnk } } srch_rnks end |
#create_search_index ⇒ Object
main entry points for indexing a whole model in one go uses some alternate more complicated/fragile queries to avoid loading entire table into memory! beware that you shouldn’t add/remove rows concurrently while updating…
90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
# File 'lib/indexed_search/index.rb', line 90 def create_search_index word_count_incrs = Hash.new(0) search_index_scope.order(id_for_index_attr).batches_by_ids(1_000, id_for_index_attr) do |group_scope| IndexedSearch::Entry.transaction do rank_data = collect_search_ranks(group_scope) search_insertion_data = [] group_scope.each do |row| search_insertion_data += row.make_search_insertion_data(rank_data[row.id_for_index]) rank_data[row.id_for_index].keys.each { |word_id| word_count_incrs[word_id] += 1 } end IndexedSearch::Entry.import(search_insertion_headings, search_insertion_data, :validate => false) end end word_count_incrs.invert_multi.each { |amount, ids| IndexedSearch::Word.incr_counts_by_ids(ids, amount) } IndexedSearch::Word.update_ranks_by_ids(word_count_incrs.keys) end |
#delete_search_index ⇒ Object
157 158 159 160 161 |
# File 'lib/indexed_search/index.rb', line 157 def delete_search_index search_entries.delete_all IndexedSearch::Entry.reset_auto_increment IndexedSearch::Word.fix_counts_orphans_and_ranks end |
#id_for_index_attr ⇒ Object
The column from your indexed model that will be stored in the Entry model’s modelrowid attribute.
Override this in your model if you’re using a different column than what is returned by model.primary_key (usually ‘id’ unless you’ve set self.primary_key = ‘something_else’ in your model).
This column must be a unique integer key in your table.
If your table’s primary key is a composite primary key, then you must have another unique key (not composite) defined in your table and override this method to tell indexed_search which column to use.
Hint: To define an auto-increment column that is not your primary key in MySQL, use:
alter table #{@table_name} add column #{column} int(11) NOT NULL AUTO_INCREMENT UNIQUE KEY
(id would return an array in that case (if using composite_primary_keys gem, anyway), which is hard to store in a single column in the Entry model.)
207 208 209 |
# File 'lib/indexed_search/index.rb', line 207 def id_for_index_attr :id end |
#model_id ⇒ Object
166 167 168 169 170 171 172 173 |
# File 'lib/indexed_search/index.rb', line 166 def model_id # kind_of? allows both STI and regular Ruby subclasses to work # name.constantize allows rails class reloading to work in development # todo: this is not very efficient and needs rethinking IndexedSearch::Index.models_by_id.detect {|k,v| self.new.kind_of?(v.name.constantize) }.first rescue raise BadModelException.new("#{self.name} does not appear to be an indexed model, see IndexedSearch::Index.models_by_id in config/initializers/indexed_search.rb") end |
#search_entries ⇒ Object
163 164 165 |
# File 'lib/indexed_search/index.rb', line 163 def search_entries IndexedSearch::Entry.where(:modelid => model_id) end |
#search_entry_data_headings ⇒ Object
213 214 215 |
# File 'lib/indexed_search/index.rb', line 213 def search_entry_data_headings [:id, :word_id, :rank, :row_priority, :modelrowid] end |
#search_insertion_headings ⇒ Object
210 211 212 |
# File 'lib/indexed_search/index.rb', line 210 def search_insertion_headings [:word_id, :rowidx, :modelid, :modelrowid, :rank, :row_priority] end |
#update_search_index ⇒ Object
106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
# File 'lib/indexed_search/index.rb', line 106 def update_search_index word_count_incrs = Hash.new(0) word_count_decrs = Hash.new(0) word_rank_changes = Set.new # reindex existing rows search_index_scope.order(id_for_index_attr).batches_by_ids(1_000, id_for_index_attr) do |group_scope, group_ids| IndexedSearch::Entry.transaction do # pre-cache entire group of existing index entries by model id entry_data_cache = Hash.of { [] } IndexedSearch::Entry.where(:modelid => model_id, :modelrowid => group_ids).values_of(*search_entry_data_headings).each do |entry| entry_data_cache[entry[4]] << entry end rank_data = collect_search_ranks(group_scope) # figure out what to add, update, and delete from each search_insertion_data = [] inverted_update_data = Hash.of { [] } search_deletion_data = [] group_scope.each do |row| (inserts, updates, deletions, count_decrs, rank_changes) = row.make_search_update_data(rank_data[row.id_for_index], entry_data_cache[row.id_for_index]) search_insertion_data += row.make_search_insertion_data(inserts) updates.each { |id, vals| inverted_update_data[vals] << id } search_deletion_data += deletions inserts.keys.each { |word_id| word_count_incrs[word_id] += 1 } count_decrs.each { |word_id| word_count_decrs[word_id] += 1 } word_rank_changes += rank_changes end # add, update, and delete index entries for this group of models IndexedSearch::Entry.import(search_insertion_headings, search_insertion_data, :validate => false) unless search_insertion_data.blank? inverted_update_data.each { |vals, ids| IndexedSearch::Entry.where(:id => ids).update_all(vals) } IndexedSearch::Entry.where(:id => search_deletion_data).delete_all unless search_deletion_data.blank? end end # delete indexes for model rows that no longer exist entry_table = IndexedSearch::Entry.arel_table subrelation = unscoped.select(arel_table[id_for_index_attr]). where(entry_table[:modelid].eq(model_id).and(entry_table[:modelrowid].eq(arel_table[id_for_index_attr]))) search_deletion_data = [] search_entries.where("(#{subrelation.to_sql}) IS NULL").values_of(:id, :word_id).each do |id, word_id| search_deletion_data << id word_count_decrs[word_id] += 1 word_rank_changes << word_id end IndexedSearch::Entry.where(:id => search_deletion_data).delete_all unless search_deletion_data.blank? # increment/decrement counts for added/removed words word_count_incrs.invert_multi.each { |amount, ids| IndexedSearch::Word.incr_counts_by_ids(ids, amount) } word_count_decrs.invert_multi.each { |amount, ids| IndexedSearch::Word.decr_counts_by_ids(ids, amount) } # delete orphaned words no longer used anywhere IndexedSearch::Word.delete_empty unless word_count_decrs.blank? # update word ranks IndexedSearch::Word.update_ranks_by_ids(word_rank_changes.to_a) unless word_rank_changes.blank? end |