Module: ActsAsFerret
- Defined in:
- lib/act_methods.rb,
lib/index.rb,
lib/without_ar.rb,
lib/blank_slate.rb,
lib/local_index.rb,
lib/multi_index.rb,
lib/unix_daemon.rb,
lib/bulk_indexer.rb,
lib/rdig_adapter.rb,
lib/remote_index.rb,
lib/class_methods.rb,
lib/ferret_result.rb,
lib/ferret_server.rb,
lib/acts_as_ferret.rb,
lib/more_like_this.rb,
lib/search_results.rb,
lib/instance_methods.rb,
lib/remote_functions.rb,
lib/remote_multi_index.rb,
lib/ferret_find_methods.rb
Overview
:nodoc:
Defined Under Namespace
Modules: ActMethods, ClassMethods, FerretFindMethods, InstanceMethods, MoreLikeThis, RdigAdapter, Remote, RemoteFunctions, ResultAttributes, WithoutAR Classes: AbstractIndex, ActsAsFerretError, BlankSlate, BulkIndexer, FerretResult, IndexAlreadyDefined, IndexLogger, IndexNotDefined, LocalIndex, MultiIndex, MultiIndexBase, RemoteIndex, RemoteMultiIndex, SearchResults
Constant Summary collapse
- DEFAULT_FIELD_OPTIONS =
Default ferret configuration for index fields
{ :store => :no, :highlight => :yes, :index => :yes, :term_vector => :with_positions_offsets, :boost => 1.0 }
- @@multi_indexes =
global Hash containing all multi indexes created by all classes using the plugin key is the concatenation of alphabetically sorted names of the classes the searcher searches.
Hash.new
- @@ferret_indexes =
global Hash containing the ferret indexes of all classes using the plugin key is the index name.
Hash.new
- @@index_using_classes =
mapping from class name to index name
{}
- @@logger =
Logger.new "#{RAILS_ROOT}/log/acts_as_ferret.log"
- @@raise_drb_errors =
false
- @@remote =
nil
Class Method Summary collapse
- .append_features(base) ⇒ Object
- .build_field_config(fields) ⇒ Object
- .change_index_dir(name, new_dir) ⇒ Object
- .close_multi_indexes ⇒ Object
-
.combine_conditions(conditions, additional_conditions = []) ⇒ Object
combine our conditions with those given by user, if any.
-
.conditions_for_model(model, conditions = {}) ⇒ Object
check for per-model conditions and return these if provided.
-
.create_index_instance(definition) ⇒ Object
creates a new Index instance.
-
.define_index(name, options = {}) ⇒ Object
Declares an index.
- .ensure_directory(dir) ⇒ Object
- .ferret_indexes ⇒ Object
-
.field_infos(index_definition) ⇒ Object
builds a FieldInfos instance for creation of an index.
- .filter_include_list_for_model(model, include_options) ⇒ Object
-
.find(query, models_or_index_name, options = {}, ar_options = {}) ⇒ Object
models_or_index_name may be an index name as declared in config/aaf.rb, a single class or an array of classes to limit search to these classes.
-
.find_ids(query, models_or_index_name, options = {}, &block) ⇒ Object
find ids of records.
-
.find_index(models_or_index_name) ⇒ Object
returns an index instance suitable for searching/updating the named index.
-
.find_last_index_version(basedir) ⇒ Object
find the most recent version of an index.
-
.get_index(name) ⇒ Object
returns the index with the given name.
-
.get_index_for(*classes) ⇒ Object
returns the index used by the given class.
- .index_using_classes ⇒ Object
-
.init_index_basedir ⇒ Object
make sure the default index base dir exists.
- .load_config ⇒ Object
-
.multi_index(indexes) ⇒ Object
returns a MultiIndex instance operating on a MultiReader.
- .multi_indexes ⇒ Object
- .raise_drb_errors? ⇒ Boolean
- .rebuild_index(name) ⇒ Object
-
.register_class_with_index(clazz, index_name, options = {}) ⇒ Object
called internally by the acts_as_ferret method.
- .remote? ⇒ Boolean
-
.retrieve_records(id_arrays, find_options = {}) ⇒ Object
retrieves search result records from a data structure like this: { ‘Model1’ => { ‘1’ => [ rank, score ], ‘2’ => [ rank, score ] }.
-
.total_hits(query, models_or_index_name, options = {}) ⇒ Object
count hits for a query.
Class Method Details
.append_features(base) ⇒ Object
533 534 535 536 |
# File 'lib/acts_as_ferret.rb', line 533 def self.append_features(base) super base.extend(ClassMethods) end |
.build_field_config(fields) ⇒ Object
506 507 508 509 510 511 512 513 514 515 516 |
# File 'lib/acts_as_ferret.rb', line 506 def self.build_field_config(fields) field_config = {} case fields when Array fields.each { |name| field_config[name] = field_config_for name } when Hash fields.each { |name, | field_config[name] = field_config_for name, } else raise InvalidArgumentError.new(":fields option must be Hash or Array") end if fields return field_config end |
.change_index_dir(name, new_dir) ⇒ Object
394 395 396 |
# File 'lib/acts_as_ferret.rb', line 394 def self.change_index_dir(name, new_dir) get_index(name).change_index_dir new_dir end |
.close_multi_indexes ⇒ Object
562 563 564 565 566 567 568 569 570 571 572 573 |
# File 'lib/acts_as_ferret.rb', line 562 def self.close_multi_indexes # close combined index readers, just in case # this seems to fix a strange test failure that seems to relate to a # multi_index looking at an old version of the content_base index. multi_indexes.each_pair do |key, index| # puts "#{key} -- #{self.name}" # TODO only close those where necessary (watch inheritance, where # self.name is base class of a class where key is made from) index.close #if key =~ /#{self.name}/ end multi_indexes.clear end |
.combine_conditions(conditions, additional_conditions = []) ⇒ Object
combine our conditions with those given by user, if any
496 497 498 499 500 501 502 503 504 |
# File 'lib/acts_as_ferret.rb', line 496 def self.combine_conditions(conditions, additional_conditions = []) if additional_conditions && additional_conditions.any? cust_opts = (Array === additional_conditions) ? additional_conditions.dup : [ additional_conditions ] logger.debug "cust_opts: #{cust_opts.inspect}" conditions.first << " and " << cust_opts.shift conditions.concat(cust_opts) end return conditions end |
.conditions_for_model(model, conditions = {}) ⇒ Object
check for per-model conditions and return these if provided
428 429 430 431 432 433 434 |
# File 'lib/acts_as_ferret.rb', line 428 def self.conditions_for_model(model, conditions = {}) if Hash === conditions key = model.name.underscore.to_sym conditions = conditions[key] end return conditions end |
.create_index_instance(definition) ⇒ Object
creates a new Index instance.
386 387 388 |
# File 'lib/acts_as_ferret.rb', line 386 def self.create_index_instance(definition) (remote? ? RemoteIndex : LocalIndex).new(definition) end |
.define_index(name, options = {}) ⇒ Object
Declares an index.
Use this method to define your indexes in a global initializer (i.e. config/initializers/aaf.rb). This is especially useful if you want to have multiple classes share the same index for cross-model searching as you only need a single call to declare the index for all models.
This method is also used internally to declare an index when you use the acts_as_ferret call inside your class (which in turn can be omitted if the initializer is used). Returns the created index instance.
Options are:
models
-
Hash of model classes and their per-class option hashes which should use this index. Any models mentioned here will automatically use the index, there is no need to explicitly call
acts_as_ferret
in the model class definition.
164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 |
# File 'lib/acts_as_ferret.rb', line 164 def self.define_index(name, = {}) name = name.to_sym pending_classes = nil if ferret_indexes.has_key?(name) # seems models have been already loaded. remove that index for now, # re-register any already loaded classes later on. idx = get_index(name) pending_classes = idx.index_definition[:registered_models] pending_classes_configs = idx.registered_models_config idx.close ferret_indexes.delete(name) end index_definition = { :index_dir => "#{ActsAsFerret::index_dir}/#{name}", :name => name, :single_index => false, :reindex_batch_size => 1000, :ferret => {}, :ferret_fields => {}, # list of indexed fields that will be filled later :enabled => true, # used for class-wide disabling of Ferret :mysql_fast_batches => true, # turn off to disable the faster, id based batching mechanism for MySQL :raise_drb_errors => false # handle DRb connection errors by default }.update( ) index_definition[:registered_models] = [] # build ferret configuration index_definition[:ferret] = { :or_default => false, :handle_parse_errors => true, :default_field => nil, # will be set later on #:max_clauses => 512, #:analyzer => Ferret::Analysis::StandardAnalyzer.new, # :wild_card_downcase => true }.update( [:ferret] || {} ) index_definition[:user_default_field] = index_definition[:ferret][:default_field] unless remote? ActsAsFerret::ensure_directory index_definition[:index_dir] index_definition[:index_base_dir] = index_definition[:index_dir] index_definition[:index_dir] = find_last_index_version(index_definition[:index_dir]) logger.debug "using index in #{index_definition[:index_dir]}" end # these properties are somewhat vital to the plugin and shouldn't # be overwritten by the user: index_definition[:ferret].update( :key => :key, :path => index_definition[:index_dir], :auto_flush => true, # slower but more secure in terms of locking problems TODO disable when running in drb mode? :create_if_missing => true ) # field config index_definition[:ferret_fields] = build_field_config( [:fields] ) index_definition[:ferret_fields].update build_field_config( [:additional_fields] ) idx = ferret_indexes[name] = create_index_instance( index_definition ) # re-register early loaded classes if pending_classes pending_classes.each { |clazz| idx.register_class clazz, { :force_re_registration => true }.merge(pending_classes_configs[clazz]) } end if models = [:models] models.each do |clazz, config| clazz.send :include, ActsAsFerret::WithoutAR unless clazz.respond_to?(:acts_as_ferret) clazz.acts_as_ferret config.merge(:index => name) end end return idx end |
.ensure_directory(dir) ⇒ Object
518 519 520 |
# File 'lib/acts_as_ferret.rb', line 518 def self.ensure_directory(dir) FileUtils.mkdir_p dir unless (File.directory?(dir) || File.symlink?(dir)) end |
.ferret_indexes ⇒ Object
104 |
# File 'lib/acts_as_ferret.rb', line 104 def self.ferret_indexes; @@ferret_indexes end |
.field_infos(index_definition) ⇒ Object
builds a FieldInfos instance for creation of an index
539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 |
# File 'lib/acts_as_ferret.rb', line 539 def self.field_infos(index_definition) # default attributes for fields fi = Ferret::Index::FieldInfos.new(:store => :no, :index => :yes, :term_vector => :no, :boost => 1.0) # unique key composed of classname and id fi.add_field(:key, :store => :no, :index => :untokenized) # primary key fi.add_field(:id, :store => :yes, :index => :untokenized) # class_name fi.add_field(:class_name, :store => :yes, :index => :untokenized) # other fields index_definition[:ferret_fields].each_pair do |field, | = .dup .delete :via .delete :boost if [:boost].is_a?(Symbol) # dynamic boost fi.add_field(field, ) end return fi end |
.filter_include_list_for_model(model, include_options) ⇒ Object
353 354 355 356 357 358 359 360 |
# File 'lib/acts_as_ferret.rb', line 353 def self.filter_include_list_for_model(model, ) = [] = Array() .each do |include_option| << include_option if model.reflections.has_key?(include_option.is_a?(Hash) ? include_option.keys[0].to_sym : include_option.to_sym) end return end |
.find(query, models_or_index_name, options = {}, ar_options = {}) ⇒ Object
models_or_index_name may be an index name as declared in config/aaf.rb, a single class or an array of classes to limit search to these classes.
306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 |
# File 'lib/acts_as_ferret.rb', line 306 def self.find(query, models_or_index_name, = {}, = {}) models = case models_or_index_name when Array models_or_index_name when Class [ models_or_index_name ] else nil end index = find_index(models_or_index_name) multi = (MultiIndexBase === index or index.shared?) unless [:per_page] [:limit] ||= .delete :limit [:offset] ||= .delete :offset end if [:limit] || [:per_page] # need pagination [:page] = if [:per_page] [:page] ? [:page].to_i : 1 else nil end limit = [:limit] || [:per_page] offset = [:offset] || ([:page] ? ([:page] - 1) * limit : 0) .delete :offset [:limit] = :all if multi or (([:conditions] || [:order]) && [:sort]) # do pagination as the last step after everything has been fetched [:late_pagination] = { :limit => limit, :offset => offset } elsif [:conditions] or [:order] # late limiting in AR call unless limit == :all [:limit] = limit [:offset] = offset end else [:limit] = limit [:offset] = offset end end ActsAsFerret::logger.debug "options: #{.inspect}\nar_options: #{.inspect}" total_hits, result = index.find_records query, .merge(:models => models), ActsAsFerret::logger.debug "Query: #{query}\ntotal hits: #{total_hits}, results delivered: #{result.size}" SearchResults.new(result, total_hits, [:page], [:per_page]) end |
.find_ids(query, models_or_index_name, options = {}, &block) ⇒ Object
find ids of records
285 286 287 288 |
# File 'lib/acts_as_ferret.rb', line 285 def self.find_ids(query, models_or_index_name, = {}, &block) = , models_or_index_name find_index(models_or_index_name).find_ids query, , &block end |
.find_index(models_or_index_name) ⇒ Object
returns an index instance suitable for searching/updating the named index. Will return a read only MultiIndex when multiple model classes are given that do not share the same physical index.
293 294 295 296 297 298 299 300 301 302 |
# File 'lib/acts_as_ferret.rb', line 293 def self.find_index(models_or_index_name) case models_or_index_name when Symbol get_index models_or_index_name when String get_index models_or_index_name.to_sym else get_index_for models_or_index_name end end |
.find_last_index_version(basedir) ⇒ Object
find the most recent version of an index
399 400 401 402 403 404 405 406 407 408 409 410 411 412 |
# File 'lib/acts_as_ferret.rb', line 399 def self.find_last_index_version(basedir) # check for versioned index versions = Dir.entries(basedir).select do |f| dir = File.join(basedir, f) File.directory?(dir) && File.file?(File.join(dir, 'segments')) && f =~ /^\d+(_\d+)?$/ end if versions.any? # select latest version versions.sort! File.join basedir, versions.last else basedir end end |
.get_index(name) ⇒ Object
returns the index with the given name.
266 267 268 269 270 271 272 273 274 275 276 |
# File 'lib/acts_as_ferret.rb', line 266 def self.get_index(name) name = name.to_sym rescue nil unless ferret_indexes.has_key?(name) if @aaf_config_loaded raise IndexNotDefined.new(name.to_s) else load_config and return get_index name end end ferret_indexes[name] end |
.get_index_for(*classes) ⇒ Object
returns the index used by the given class.
If multiple classes are given, either the single index shared by these classes, or a multi index (to be used for search only) across the indexes of all models, is returned.
367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 |
# File 'lib/acts_as_ferret.rb', line 367 def self.get_index_for(*classes) classes.flatten! raise ArgumentError.new("no class specified") unless classes.any? classes.map!(&:constantize) unless Class === classes.first logger.debug "index_for #{classes.inspect}" index = if classes.size > 1 indexes = classes.map { |c| get_index_for c }.uniq indexes.size > 1 ? multi_index(indexes) : indexes.first else clazz = classes.first clazz = clazz.superclass while clazz && !@@index_using_classes.has_key?(clazz.name) get_index @@index_using_classes[clazz.name] end raise IndexNotDefined.new("no index found for class: #{classes.map(&:name).join(',')}") if index.nil? return index end |
.index_using_classes ⇒ Object
108 |
# File 'lib/acts_as_ferret.rb', line 108 def self.index_using_classes; @@index_using_classes end |
.init_index_basedir ⇒ Object
make sure the default index base dir exists. by default, all indexes are created under RAILS_ROOT/index/RAILS_ENV
525 526 527 528 |
# File 'lib/acts_as_ferret.rb', line 525 def self.init_index_basedir index_base = "#{RAILS_ROOT}/index" @@index_dir = "#{index_base}/#{RAILS_ENV}" end |
.load_config ⇒ Object
256 257 258 259 260 261 262 263 |
# File 'lib/acts_as_ferret.rb', line 256 def self.load_config # using require_dependency to make the reloading in dev mode work. require_dependency "#{RAILS_ROOT}/config/aaf.rb" ActsAsFerret::logger.info "loaded configuration file aaf.rb" rescue LoadError ensure @aaf_config_loaded = true end |
.multi_index(indexes) ⇒ Object
returns a MultiIndex instance operating on a MultiReader
415 416 417 418 419 420 421 422 423 424 425 |
# File 'lib/acts_as_ferret.rb', line 415 def self.multi_index(indexes) index_names = indexes.dup index_names = index_names.map(&:to_s) if Symbol === index_names.first if String === index_names.first indexes = index_names.map{ |name| get_index name } else index_names = index_names.map{ |i| i.index_name.to_s } end key = index_names.sort.join(",") ActsAsFerret::multi_indexes[key] ||= (remote? ? ActsAsFerret::RemoteMultiIndex : ActsAsFerret::MultiIndex).new(indexes) end |
.multi_indexes ⇒ Object
99 |
# File 'lib/acts_as_ferret.rb', line 99 def self.multi_indexes; @@multi_indexes end |
.raise_drb_errors? ⇒ Boolean
126 |
# File 'lib/acts_as_ferret.rb', line 126 def self.raise_drb_errors?; @@raise_drb_errors end |
.rebuild_index(name) ⇒ Object
390 391 392 |
# File 'lib/acts_as_ferret.rb', line 390 def self.rebuild_index(name) get_index(name).rebuild_index end |
.register_class_with_index(clazz, index_name, options = {}) ⇒ Object
called internally by the acts_as_ferret method
returns the index
243 244 245 246 247 248 249 250 251 252 253 254 |
# File 'lib/acts_as_ferret.rb', line 243 def self.register_class_with_index(clazz, index_name, = {}) index_name = index_name.to_sym @@index_using_classes[clazz.name] = index_name unless index = ferret_indexes[index_name] # index definition on the fly # default to all attributes of this class [:fields] ||= clazz.new.attributes.keys.map { |k| k.to_sym } index = define_index index_name, end index.register_class(clazz, ) return index end |
.remote? ⇒ Boolean
130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
# File 'lib/acts_as_ferret.rb', line 130 def self.remote? if @@remote.nil? if ENV["FERRET_USE_LOCAL_INDEX"] || ActsAsFerret::Remote::Server.running @@remote = false else @@remote = ActsAsFerret::Remote::Config.new.uri rescue false end if @@remote logger.info "Will use remote index server which should be available at #{@@remote}" else logger.info "Will use local index." end end @@remote end |
.retrieve_records(id_arrays, find_options = {}) ⇒ Object
retrieves search result records from a data structure like this: { ‘Model1’ => { ‘1’ => [ rank, score ], ‘2’ => [ rank, score ] }
TODO: in case of STI AR will filter out hits from other classes for us, but this will lead to less results retrieved –> scoping of ferret query to self.class is still needed. from the ferret ML (thanks Curtis Hatter) > I created a method in my base STI class so I can scope my query. For scoping > I used something like the following line: > > query << “ role:#‘*’ : self.class” > > Though you could make it more generic by simply asking > “self.descends_from_active_record?” which is how rails decides if it should > scope your “find” query for STI models. You can check out “base.rb” in > activerecord to see that. but maybe better do the scoping in find_ids_with_ferret…
454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 |
# File 'lib/acts_as_ferret.rb', line 454 def self.retrieve_records(id_arrays, = {}) result = [] # get objects for each model id_arrays.each do |model, id_array| next if id_array.empty? # logger.debug "id array from index: #{id_array.inspect}" model_class = model.constantize # merge conditions conditions = conditions_for_model model_class, [:conditions] conditions = combine_conditions([ "#{model_class.table_name}.#{model_class.primary_key} in (?)", id_array.keys ], conditions) # check for include association that might only exist on some models in case of multi_search = nil if = [:include] = filter_include_list_for_model(model_class, ) end # fetch tmp_result = model_class.find(:all, .merge(:conditions => conditions, :include => )) # set scores and rank tmp_result.each do |record| record.ferret_rank, record.ferret_score = id_array[record.id.to_s] end # merge with result array result += tmp_result end # order results as they were found by ferret, unless an AR :order # option was given # logger.debug "unsorted result: #{result.map{|a| "#{a.id} / #{a.title} / #{a.ferret_rank}"}.inspect}" result.sort! { |a, b| a.ferret_rank <=> b.ferret_rank } unless [:order] # logger.debug "sorted result: #{result.map{|a| "#{a.id} / #{a.ferret_rank}"}.inspect}" return result end |
.total_hits(query, models_or_index_name, options = {}) ⇒ Object
count hits for a query
279 280 281 282 |
# File 'lib/acts_as_ferret.rb', line 279 def self.total_hits(query, models_or_index_name, = {}) = , models_or_index_name find_index(models_or_index_name).total_hits query, end |