Module: ActsAsFerret

Defined in:
lib/acts_as_ferret.rb,
lib/acts_as_ferret/index.rb,
lib/acts_as_ferret/railtie.rb,
lib/acts_as_ferret/version.rb,
lib/acts_as_ferret/without_ar.rb,
lib/acts_as_ferret/act_methods.rb,
lib/acts_as_ferret/blank_slate.rb,
lib/acts_as_ferret/local_index.rb,
lib/acts_as_ferret/multi_index.rb,
lib/acts_as_ferret/bulk_indexer.rb,
lib/acts_as_ferret/rdig_adapter.rb,
lib/acts_as_ferret/remote_index.rb,
lib/acts_as_ferret/class_methods.rb,
lib/acts_as_ferret/ferret_result.rb,
lib/acts_as_ferret/server/config.rb,
lib/acts_as_ferret/server/server.rb,
lib/acts_as_ferret/more_like_this.rb,
lib/acts_as_ferret/search_results.rb,
lib/acts_as_ferret/instance_methods.rb,
lib/acts_as_ferret/remote_functions.rb,
lib/acts_as_ferret/remote_multi_index.rb,
lib/acts_as_ferret/server/unix_daemon.rb,
lib/acts_as_ferret/ferret_find_methods.rb

Overview

:nodoc:

Defined Under Namespace

Modules: ActMethods, ClassMethods, FerretFindMethods, InstanceMethods, MoreLikeThis, RdigAdapter, RemoteFunctions, ResultAttributes, Server, WithoutAR Classes: AbstractIndex, ActsAsFerretError, BlankSlate, BulkIndexer, FerretResult, IndexAlreadyDefined, IndexLogger, IndexNotDefined, LocalIndex, MultiIndex, MultiIndexBase, Railtie, RemoteIndex, RemoteMultiIndex, SearchResults

Constant Summary collapse

DEFAULT_FIELD_OPTIONS =

Default ferret configuration for index fields

{
  :store       => :no, 
  :highlight   => :yes, 
  :index       => :yes, 
  :term_vector => :with_positions_offsets,
  :boost       => 1.0
}
VERSION =
'0.5.4'
@@multi_indexes =

global Hash containing all multi indexes created by all classes using the plugin key is the concatenation of alphabetically sorted names of the classes the searcher searches.

Hash.new
@@ferret_indexes =

global Hash containing the ferret indexes of all classes using the plugin key is the index name.

Hash.new
@@index_using_classes =

mapping from class name to index name

{}
@@logger =
Logger.new "#{Rails.root || '.'}/log/acts_as_ferret.log"
@@raise_drb_errors =
false
@@remote =
nil

Class Method Summary collapse

Class Method Details

.append_features(base) ⇒ Object



546
547
548
549
# File 'lib/acts_as_ferret.rb', line 546

def self.append_features(base)
  super
  base.extend(ClassMethods)
end

.build_field_config(fields) ⇒ Object



519
520
521
522
523
524
525
526
527
528
529
# File 'lib/acts_as_ferret.rb', line 519

def self.build_field_config(fields)
  field_config = {}
  case fields
  when Array
    fields.each { |name| field_config[name] = field_config_for name }
  when Hash
    fields.each { |name, options| field_config[name] = field_config_for name, options }
  else raise InvalidArgumentError.new(":fields option must be Hash or Array")
  end if fields
  return field_config
end

.change_index_dir(name, new_dir) ⇒ Object



397
398
399
# File 'lib/acts_as_ferret.rb', line 397

def self.change_index_dir(name, new_dir)
  get_index(name).change_index_dir new_dir
end

.close_multi_indexesObject



575
576
577
578
579
580
581
582
583
584
585
586
# File 'lib/acts_as_ferret.rb', line 575

def self.close_multi_indexes
  # close combined index readers, just in case
  # this seems to fix a strange test failure that seems to relate to a
  # multi_index looking at an old version of the content_base index.
  multi_indexes.each_pair do |key, index|
    # puts "#{key} -- #{self.name}"
    # TODO only close those where necessary (watch inheritance, where
    # self.name is base class of a class where key is made from)
    index.close #if key =~ /#{self.name}/
  end
  multi_indexes.clear
end

.combine_conditions(conditions, additional_conditions = []) ⇒ Object

combine our conditions with those given by user, if any



495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
# File 'lib/acts_as_ferret.rb', line 495

def self.combine_conditions(conditions, additional_conditions = [])

  any_additional_conditions = false

  if additional_conditions
    if additional_conditions.kind_of?(Enumerable)
      # We are pre-ruby 1.9.x
      any_additional_conditions = additional_conditions.any?
    else
      # This ruby 1.9.x - String is no longer an Enumerable
      # http://www.ivanenviroman.com/string-is-not-an-enumerable-in-ruby-1-9/
      any_additional_conditions = ! additional_conditions.empty?
    end
  end
  
  if any_additional_conditions
    cust_opts = (Array === additional_conditions) ? additional_conditions.dup : [ additional_conditions ]
    logger.debug "cust_opts: #{cust_opts.inspect}"
    conditions.first << " and " << cust_opts.shift
    conditions.concat(cust_opts)
  end
  return conditions
end

.conditions_for_model(model, conditions = {}) ⇒ Object

check for per-model conditions and return these if provided



431
432
433
434
435
436
437
# File 'lib/acts_as_ferret.rb', line 431

def self.conditions_for_model(model, conditions = {})
  if Hash === conditions
    key = model.name.underscore.to_sym
    conditions = conditions[key]
  end
  return conditions
end

.create_index_instance(definition) ⇒ Object

creates a new Index instance.



389
390
391
# File 'lib/acts_as_ferret.rb', line 389

def self.create_index_instance(definition)
  (remote? ? RemoteIndex : LocalIndex).new(definition)
end

.define_index(name, options = {}) ⇒ Object

Declares an index.

Use this method to define your indexes in a global initializer (i.e. config/initializers/aaf.rb). This is especially useful if you want to have multiple classes share the same index for cross-model searching as you only need a single call to declare the index for all models.

This method is also used internally to declare an index when you use the acts_as_ferret call inside your class (which in turn can be omitted if the initializer is used). Returns the created index instance.

Options are:

models

Hash of model classes and their per-class option hashes which should use this index. Any models mentioned here will automatically use the index, there is no need to explicitly call acts_as_ferret in the model class definition.



167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
# File 'lib/acts_as_ferret.rb', line 167

def self.define_index(name, options = {})
  name = name.to_sym
  pending_classes = nil
  if ferret_indexes.has_key?(name)
    # seems models have been already loaded. remove that index for now,
    # re-register any already loaded classes later on.
    idx = get_index(name)
    pending_classes = idx.index_definition[:registered_models]
    pending_classes_configs = idx.registered_models_config
    idx.close
    ferret_indexes.delete(name)
  end

  index_definition = {
    :index_dir => "#{ActsAsFerret::index_dir}/#{name}",
    :name => name,
    :single_index => false,
    :reindex_batch_size => 1000,
    :ferret => {},
    :ferret_fields => {},             # list of indexed fields that will be filled later
    :enabled => true,                 # used for class-wide disabling of Ferret
    :mysql_fast_batches => true,      # turn off to disable the faster, id based batching mechanism for MySQL
    :raise_drb_errors => false        # handle DRb connection errors by default
  }.update( options )

  index_definition[:registered_models] = []
  
  # build ferret configuration
  index_definition[:ferret] = {
    :or_default          => false, 
    :handle_parse_errors => true,
    :default_field       => nil,              # will be set later on
    #:max_clauses => 512,
    #:analyzer => Ferret::Analysis::StandardAnalyzer.new,
    # :wild_card_downcase => true
  }.update( options[:ferret] || {} )

  index_definition[:user_default_field] = index_definition[:ferret][:default_field]

  unless remote?
    ActsAsFerret::ensure_directory index_definition[:index_dir] 
    index_definition[:index_base_dir] = index_definition[:index_dir]
    index_definition[:index_dir] = find_last_index_version(index_definition[:index_dir])
    logger.debug "using index in #{index_definition[:index_dir]}"
  end
  
  # these properties are somewhat vital to the plugin and shouldn't
  # be overwritten by the user:
  index_definition[:ferret].update(
    :key               => :key,
    :path              => index_definition[:index_dir],
    :auto_flush        => true, # slower but more secure in terms of locking problems TODO disable when running in drb mode?
    :create_if_missing => true
  )

  # field config
  index_definition[:ferret_fields] = build_field_config( options[:fields] )
  index_definition[:ferret_fields].update build_field_config( options[:additional_fields] )

  idx = ferret_indexes[name] = create_index_instance( index_definition )

  # re-register early loaded classes
  if pending_classes
    pending_classes.each { |clazz| idx.register_class clazz, { :force_re_registration => true }.merge(pending_classes_configs[clazz]) }
  end

  if models = options[:models]
    models.each do |clazz, config|
      clazz.send :include, ActsAsFerret::WithoutAR unless clazz.respond_to?(:acts_as_ferret)
      clazz.acts_as_ferret config.merge(:index => name)
    end
  end

  return idx
end

.ensure_directory(dir) ⇒ Object



531
532
533
# File 'lib/acts_as_ferret.rb', line 531

def self.ensure_directory(dir)
  FileUtils.mkdir_p dir unless (File.directory?(dir) || File.symlink?(dir))
end

.ferret_indexesObject



107
# File 'lib/acts_as_ferret.rb', line 107

def self.ferret_indexes; @@ferret_indexes end

.field_infos(index_definition) ⇒ Object

builds a FieldInfos instance for creation of an index



552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
# File 'lib/acts_as_ferret.rb', line 552

def self.field_infos(index_definition)
  # default attributes for fields
  fi = Ferret::Index::FieldInfos.new(:store => :no, 
                                      :index => :yes, 
                                      :term_vector => :no,
                                      :boost => 1.0)
  # unique key composed of classname and id
  fi.add_field(:key, :store => :no, :index => :untokenized)
  # primary key
  fi.add_field(:id, :store => :yes, :index => :untokenized) 
  # class_name
  fi.add_field(:class_name, :store => :yes, :index => :untokenized)

  # other fields
  index_definition[:ferret_fields].each_pair do |field, options|
    options = options.dup
    options.delete :via
    options.delete :boost if options[:boost].is_a?(Symbol) # dynamic boost
    fi.add_field(field, options)
  end
  return fi
end

.filter_include_list_for_model(model, include_options) ⇒ Object



356
357
358
359
360
361
362
363
# File 'lib/acts_as_ferret.rb', line 356

def self.filter_include_list_for_model(model, include_options)
  filtered_include_options = []
  include_options = Array(include_options)
  include_options.each do |include_option|
    filtered_include_options << include_option if model.reflections.has_key?(include_option.is_a?(Hash) ? include_option.keys[0].to_sym : include_option.to_sym)
  end
  return filtered_include_options
end

.find(query, models_or_index_name, options = {}, ar_options = {}) ⇒ Object

models_or_index_name may be an index name as declared in config/aaf.rb, a single class or an array of classes to limit search to these classes.



309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
# File 'lib/acts_as_ferret.rb', line 309

def self.find(query, models_or_index_name, options = {}, ar_options = {})
  models = case models_or_index_name
  when Array
    models_or_index_name
  when Class
    [ models_or_index_name ]
  else
    nil
  end
  index = find_index(models_or_index_name)
  multi = (MultiIndexBase === index or index.shared?)
  unless options[:per_page]
    options[:limit] ||= ar_options.delete :limit
    options[:offset] ||= ar_options.delete :offset
  end
  if options[:limit] || options[:per_page]
    # need pagination
    options[:page] = if options[:per_page]
      options[:page] ? options[:page].to_i : 1
    else
      nil
    end
    limit = options[:limit] || options[:per_page]
    offset = options[:offset] || (options[:page] ? (options[:page] - 1) * limit : 0)
    options.delete :offset
    options[:limit] = :all
    
    if multi or ((ar_options[:conditions] || ar_options[:order]) && options[:sort])
      # do pagination as the last step after everything has been fetched
      options[:late_pagination] = { :limit => limit, :offset => offset }
    elsif ar_options[:conditions] or ar_options[:order]
      # late limiting in AR call
      unless limit == :all
        ar_options[:limit] = limit
        ar_options[:offset] = offset
      end
    else
      options[:limit] = limit
      options[:offset] = offset
    end
  end
  ActsAsFerret::logger.debug "options: #{options.inspect}\nar_options: #{ar_options.inspect}"
  total_hits, result = index.find_records query, options.merge(:models => models), ar_options
  ActsAsFerret::logger.debug "Query: #{query}\ntotal hits: #{total_hits}, results delivered: #{result.size}"
  SearchResults.new(result, total_hits, options[:page], options[:per_page])
end

.find_ids(query, models_or_index_name, options = {}, &block) ⇒ Object

find ids of records



288
289
290
291
# File 'lib/acts_as_ferret.rb', line 288

def self.find_ids(query, models_or_index_name, options = {}, &block)
  options = add_models_to_options_if_necessary options, models_or_index_name
  find_index(models_or_index_name).find_ids query, options, &block
end

.find_index(models_or_index_name) ⇒ Object

returns an index instance suitable for searching/updating the named index. Will return a read only MultiIndex when multiple model classes are given that do not share the same physical index.



296
297
298
299
300
301
302
303
304
305
# File 'lib/acts_as_ferret.rb', line 296

def self.find_index(models_or_index_name)
  case models_or_index_name
  when Symbol
    get_index models_or_index_name
  when String
    get_index models_or_index_name.to_sym
  else
    get_index_for models_or_index_name
  end
end

.find_last_index_version(basedir) ⇒ Object

find the most recent version of an index



402
403
404
405
406
407
408
409
410
411
412
413
414
415
# File 'lib/acts_as_ferret.rb', line 402

def self.find_last_index_version(basedir)
  # check for versioned index
  versions = Dir.entries(basedir).select do |f| 
    dir = File.join(basedir, f)
    File.directory?(dir) && File.file?(File.join(dir, 'segments')) && f =~ /^\d+(_\d+)?$/
  end
  if versions.any?
    # select latest version
    versions.sort!
    File.join basedir, versions.last
  else
    basedir
  end
end

.get_index(name) ⇒ Object

returns the index with the given name.



269
270
271
272
273
274
275
276
277
278
279
# File 'lib/acts_as_ferret.rb', line 269

def self.get_index(name)
  name = name.to_sym rescue nil
  unless ferret_indexes.has_key?(name)
    if @aaf_config_loaded
      raise IndexNotDefined.new(name.to_s)
    else
      load_config and return get_index name
    end
  end
  ferret_indexes[name]
end

.get_index_for(*classes) ⇒ Object

returns the index used by the given class.

If multiple classes are given, either the single index shared by these classes, or a multi index (to be used for search only) across the indexes of all models, is returned.

Raises:

  • (ArgumentError)


370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
# File 'lib/acts_as_ferret.rb', line 370

def self.get_index_for(*classes)
  classes.flatten!
  raise ArgumentError.new("no class specified") unless classes.any?
  classes.map!(&:constantize) unless Class === classes.first
  logger.debug "index_for #{classes.inspect}"
  index = if classes.size > 1
    indexes = classes.map { |c| get_index_for c }.uniq
    indexes.size > 1 ? multi_index(indexes) : indexes.first
  else
    clazz = classes.first
    clazz = clazz.superclass while clazz && !@@index_using_classes.has_key?(clazz.name)
    get_index @@index_using_classes[clazz.name]
  end
  raise IndexNotDefined.new("no index found for class: #{classes.map(&:name).join(',')}") if index.nil?
  return index
end

.index_using_classesObject



111
# File 'lib/acts_as_ferret.rb', line 111

def self.index_using_classes; @@index_using_classes end

.init_index_basedirObject

make sure the default index base dir exists. by default, all indexes are created under Rails.root/index/Rails.env



538
539
540
541
# File 'lib/acts_as_ferret.rb', line 538

def self.init_index_basedir
  index_base = "#{Rails.root || '.'}/index"
  @@index_dir = "#{index_base}/#{Rails.env}"
end

.load_configObject



259
260
261
262
263
264
265
266
# File 'lib/acts_as_ferret.rb', line 259

def self.load_config
  # using require_dependency to make the reloading in dev mode work.
  require_dependency "#{Rails.root}/config/aaf.rb"
  ActsAsFerret::logger.info "loaded configuration file aaf.rb"
rescue LoadError
ensure
  @aaf_config_loaded = true
end

.multi_index(indexes) ⇒ Object

returns a MultiIndex instance operating on a MultiReader



418
419
420
421
422
423
424
425
426
427
428
# File 'lib/acts_as_ferret.rb', line 418

def self.multi_index(indexes)
  index_names = indexes.dup
  index_names = index_names.map(&:to_s) if Symbol === index_names.first
  if String === index_names.first
    indexes = index_names.map{ |name| get_index name }
  else
    index_names = index_names.map{ |i| i.index_name.to_s }
  end
  key = index_names.sort.join(",")
  ActsAsFerret::multi_indexes[key] ||= (remote? ? ActsAsFerret::RemoteMultiIndex : ActsAsFerret::MultiIndex).new(indexes)
end

.multi_indexesObject



102
# File 'lib/acts_as_ferret.rb', line 102

def self.multi_indexes; @@multi_indexes end

.raise_drb_errors?Boolean

Returns:

  • (Boolean)


129
# File 'lib/acts_as_ferret.rb', line 129

def self.raise_drb_errors?; @@raise_drb_errors end

.rebuild_index(name) ⇒ Object



393
394
395
# File 'lib/acts_as_ferret.rb', line 393

def self.rebuild_index(name)
  get_index(name).rebuild_index
end

.register_class_with_index(clazz, index_name, options = {}) ⇒ Object

called internally by the acts_as_ferret method

returns the index



246
247
248
249
250
251
252
253
254
255
256
257
# File 'lib/acts_as_ferret.rb', line 246

def self.register_class_with_index(clazz, index_name, options = {})
  index_name = index_name.to_sym
  @@index_using_classes[clazz.name] = index_name
  unless index = ferret_indexes[index_name]
    # index definition on the fly
    # default to all attributes of this class
    options[:fields] ||= clazz.new.attributes.keys.map { |k| k.to_sym }
    index = define_index index_name, options
  end
  index.register_class(clazz, options)
  return index
end

.remote?Boolean

Returns:

  • (Boolean)


133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# File 'lib/acts_as_ferret.rb', line 133

def self.remote?
  if @@remote.nil?
    if ENV["FERRET_USE_LOCAL_INDEX"] || ActsAsFerret::Server::Server.running
      @@remote = false
    else
      @@remote = ActsAsFerret::Server::Config.new.uri rescue false
    end
    if @@remote
      logger.info "Will use remote index server which should be available at #{@@remote}"
    else
      logger.info "Will use local index."
    end
  end
  @@remote
end

.retrieve_records(id_arrays, find_options = {}) ⇒ Object

retrieves search result records from a data structure like this: { ‘Model1’ => { ‘1’ => [ rank, score ], ‘2’ => [ rank, score ] }

TODO: in case of STI AR will filter out hits from other classes for us, but this will lead to less results retrieved –> scoping of ferret query to self.class is still needed. from the ferret ML (thanks Curtis Hatter) > I created a method in my base STI class so I can scope my query. For scoping > I used something like the following line: > > query << “ role:#‘*’ : self.class” > > Though you could make it more generic by simply asking > “self.descends_from_active_record?” which is how rails decides if it should > scope your “find” query for STI models. You can check out “base.rb” in > activerecord to see that. but maybe better do the scoping in find_ids_with_ferret…



457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
# File 'lib/acts_as_ferret.rb', line 457

def self.retrieve_records(id_arrays, find_options = {})
  result = []
  # get objects for each model
  id_arrays.each do |model, id_array|
    next if id_array.empty?
    model_class = model.constantize

    # merge conditions
    conditions = conditions_for_model model_class, find_options[:conditions]
    conditions = combine_conditions([ "#{model_class.table_name}.#{model_class.primary_key} in (?)", 
                                      id_array.keys ], 
                                    conditions)

    # check for include association that might only exist on some models in case of multi_search
    filtered_include_options = nil
    if include_options = find_options[:include]
      filtered_include_options = filter_include_list_for_model(model_class, include_options)
    end

    # fetch
    options = find_options.merge(:conditions => conditions, :include => filtered_include_options)
    tmp_result = model_class.where(options[:conditions]).includes(options[:include]).order(options[:order]).limit(options[:limit]).offset(options[:offset])

    # set scores and rank
    tmp_result.each do |record|
      record.ferret_rank, record.ferret_score = id_array[record.id.to_s]
    end
    # merge with result array
    result += tmp_result
  end
  
  # order results as they were found by ferret, unless an AR :order
  # option was given
  result.sort! { |a, b| a.ferret_rank <=> b.ferret_rank } unless find_options[:order]
  return result
end

.total_hits(query, models_or_index_name, options = {}) ⇒ Object

count hits for a query



282
283
284
285
# File 'lib/acts_as_ferret.rb', line 282

def self.total_hits(query, models_or_index_name, options = {})
  options = add_models_to_options_if_necessary options, models_or_index_name
  find_index(models_or_index_name).total_hits query, options
end