Class: Index::Index

Inherits:

Object

Object
Index::Index

show all

Defined in:: lib/ferret_extensions.rb

Instance Attribute Summary collapse

#batch_size ⇒ Object

Returns the value of attribute batch_size.
#logger ⇒ Object

Returns the value of attribute logger.

Instance Method Summary collapse

#batch_delete(docs) ⇒ Object

If docs is a Hash or an Array then a batch delete will be performed.
#bulk_index(model, ids, options = {}) ⇒ Object
#doc_number(id) ⇒ Object

search for the first document with arg in the id field and return it’s internal document number.
#index_model(model) ⇒ Object
#index_models(models) ⇒ Object
#update_batch(document_analyzer_pairs) ⇒ Object

bulk-inserts a number of ferret documents.

Instance Attribute Details

#batch_size ⇒ `Object`

Returns the value of attribute batch_size.



36
37
38

# File 'lib/ferret_extensions.rb', line 36

def batch_size
  @batch_size
end

#logger ⇒ `Object`

Returns the value of attribute logger.



36
37
38

# File 'lib/ferret_extensions.rb', line 36

def logger
  @logger
end

Instance Method Details

#batch_delete(docs) ⇒ `Object`

If docs is a Hash or an Array then a batch delete will be performed. If docs is an Array then it will be considered an array of id‘s. If it is a Hash, then its keys will be used instead as the Array of document id’s. If the id is an Integers then it is considered a Ferret document number and the corresponding document will be deleted. If the id is a String or a Symbol then the id will be considered a term and the documents that contain that term in the :id_field will be deleted.

docs: An Array of docs to be deleted, or a Hash (in which case the keys

are used)

ripped from Ferret trunk.

Raises:

(ArgumentError)

# File 'lib/ferret_extensions.rb', line 111

def batch_delete(docs)
  docs = docs.keys if docs.is_a?(Hash)
  raise ArgumentError, "must pass Array or Hash" unless docs.is_a? Array
  ids = []
  terms = []
  docs.each do |doc|
    case doc
    when String   then terms << doc
    when Symbol   then terms << doc.to_s
    when Integer  then ids << doc
    else
      raise ArgumentError, "Cannot delete for arg of type #{id.class}"
    end
  end
  if ids.size > 0
    ensure_reader_open
    ids.each {|id| @reader.delete(id)}
  end
  if terms.size > 0
    ensure_writer_open()
    terms.each { |t| @writer.delete(@id_field, t) }
    # TODO with Ferret trunk this would work:
    # @writer.delete(@id_field, terms)
  end
  return self
end

#bulk_index(model, ids, options = {}) ⇒ `Object`

# File 'lib/ferret_extensions.rb', line 56

def bulk_index(model, ids, options = {})
  options.reverse_merge! :optimize => true
  orig_flush = @auto_flush
  @auto_flush = false
  bulk_indexer = ActsAsFerret::BulkIndexer.new(:batch_size => @batch_size, :logger => logger, 
                                               :model => model, :index => self, :total => ids.size)
  model.records_for_bulk_index(ids, @batch_size) do |records, offset|
    logger.debug "#{model} bulk indexing #{records.size} at #{offset}"
    bulk_indexer.index_records(records, offset)
  end
  logger.info 'finishing bulk index...'
  flush
  if options[:optimize]
    logger.info 'optimizing...'
    optimize 
  end
  @auto_flush = orig_flush
end

#doc_number(id) ⇒ `Object`

search for the first document with arg in the id field and return it’s internal document number. The id field is either :id or whatever you set :id_field parameter to when you create the Index object.

# File 'lib/ferret_extensions.rb', line 141

def doc_number(id)
  @dir.synchronize do
    ensure_reader_open()
    term_doc_enum = @reader.term_docs_for(@id_field, id.to_s)
    return term_doc_enum.next? ? term_doc_enum.doc : nil
  end
end

#index_model(model) ⇒ `Object`

# File 'lib/ferret_extensions.rb', line 46

def index_model(model)
  bulk_indexer = ActsAsFerret::BulkIndexer.new(:batch_size => @batch_size, :logger => logger, 
                                               :model => model, :index => self, :reindex => true)
  logger.info "reindexing model #{model.name}"

  model.records_for_rebuild(@batch_size) do |records, offset|
    bulk_indexer.index_records(records, offset)
  end
end

#index_models(models) ⇒ `Object`

# File 'lib/ferret_extensions.rb', line 38

def index_models(models)
  models.each { |model| index_model model }
  flush
  optimize
  close
  ActsAsFerret::close_multi_indexes
end

#update_batch(document_analyzer_pairs) ⇒ `Object`

bulk-inserts a number of ferret documents. The argument has to be an array of two-element arrays each holding the document data and the analyzer to use for this document (which may be nil).

# File 'lib/ferret_extensions.rb', line 79

def update_batch(document_analyzer_pairs)
  ids = document_analyzer_pairs.collect {|da| da.first[@id_field] }
  @dir.synchrolock do
    batch_delete(ids)
    ensure_writer_open()
    document_analyzer_pairs.each do |doc, analyzer|
      if analyzer
        old_analyzer = @writer.analyzer
        @writer.analyzer = analyzer
        @writer.add_document(doc)
        @writer.analyzer = old_analyzer
      else
        @writer.add_document(doc)
      end
    end
    flush()
  end      
end

Class: Index::Index

Instance Attribute Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#batch_size ⇒ Object

#logger ⇒ Object

Instance Method Details

#batch_delete(docs) ⇒ Object

#bulk_index(model, ids, options = {}) ⇒ Object

#doc_number(id) ⇒ Object

#index_model(model) ⇒ Object

#index_models(models) ⇒ Object

#update_batch(document_analyzer_pairs) ⇒ Object