Class: Index::Index

Inherits:
Object
  • Object
show all
Defined in:
lib/ferret_extensions.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#batch_sizeObject

Returns the value of attribute batch_size.



36
37
38
# File 'lib/ferret_extensions.rb', line 36

def batch_size
  @batch_size
end

#loggerObject

Returns the value of attribute logger.



36
37
38
# File 'lib/ferret_extensions.rb', line 36

def logger
  @logger
end

Instance Method Details

#batch_delete(docs) ⇒ Object

If docs is a Hash or an Array then a batch delete will be performed. If docs is an Array then it will be considered an array of id‘s. If it is a Hash, then its keys will be used instead as the Array of document id’s. If the id is an Integers then it is considered a Ferret document number and the corresponding document will be deleted. If the id is a String or a Symbol then the id will be considered a term and the documents that contain that term in the :id_field will be deleted.

docs

An Array of docs to be deleted, or a Hash (in which case the keys

are used)

ripped from Ferret trunk.

Raises:

  • (ArgumentError)


111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# File 'lib/ferret_extensions.rb', line 111

def batch_delete(docs)
  docs = docs.keys if docs.is_a?(Hash)
  raise ArgumentError, "must pass Array or Hash" unless docs.is_a? Array
  ids = []
  terms = []
  docs.each do |doc|
    case doc
    when String   then terms << doc
    when Symbol   then terms << doc.to_s
    when Integer  then ids << doc
    else
      raise ArgumentError, "Cannot delete for arg of type #{id.class}"
    end
  end
  if ids.size > 0
    ensure_reader_open
    ids.each {|id| @reader.delete(id)}
  end
  if terms.size > 0
    ensure_writer_open()
    terms.each { |t| @writer.delete(@id_field, t) }
    # TODO with Ferret trunk this would work:
    # @writer.delete(@id_field, terms)
  end
  return self
end

#bulk_index(model, ids, options = {}) ⇒ Object



56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# File 'lib/ferret_extensions.rb', line 56

def bulk_index(model, ids, options = {})
  options.reverse_merge! :optimize => true
  orig_flush = @auto_flush
  @auto_flush = false
  bulk_indexer = ActsAsFerret::BulkIndexer.new(:batch_size => @batch_size, :logger => logger, 
                                               :model => model, :index => self, :total => ids.size)
  model.records_for_bulk_index(ids, @batch_size) do |records, offset|
    logger.debug "#{model} bulk indexing #{records.size} at #{offset}"
    bulk_indexer.index_records(records, offset)
  end
  logger.info 'finishing bulk index...'
  flush
  if options[:optimize]
    logger.info 'optimizing...'
    optimize 
  end
  @auto_flush = orig_flush
end

#doc_number(id) ⇒ Object

search for the first document with arg in the id field and return it’s internal document number. The id field is either :id or whatever you set :id_field parameter to when you create the Index object.



141
142
143
144
145
146
147
# File 'lib/ferret_extensions.rb', line 141

def doc_number(id)
  @dir.synchronize do
    ensure_reader_open()
    term_doc_enum = @reader.term_docs_for(@id_field, id.to_s)
    return term_doc_enum.next? ? term_doc_enum.doc : nil
  end
end

#index_model(model) ⇒ Object



46
47
48
49
50
51
52
53
54
# File 'lib/ferret_extensions.rb', line 46

def index_model(model)
  bulk_indexer = ActsAsFerret::BulkIndexer.new(:batch_size => @batch_size, :logger => logger, 
                                               :model => model, :index => self, :reindex => true)
  logger.info "reindexing model #{model.name}"

  model.records_for_rebuild(@batch_size) do |records, offset|
    bulk_indexer.index_records(records, offset)
  end
end

#index_models(models) ⇒ Object



38
39
40
41
42
43
44
# File 'lib/ferret_extensions.rb', line 38

def index_models(models)
  models.each { |model| index_model model }
  flush
  optimize
  close
  ActsAsFerret::close_multi_indexes
end

#update_batch(document_analyzer_pairs) ⇒ Object

bulk-inserts a number of ferret documents. The argument has to be an array of two-element arrays each holding the document data and the analyzer to use for this document (which may be nil).



79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# File 'lib/ferret_extensions.rb', line 79

def update_batch(document_analyzer_pairs)
  ids = document_analyzer_pairs.collect {|da| da.first[@id_field] }
  @dir.synchrolock do
    batch_delete(ids)
    ensure_writer_open()
    document_analyzer_pairs.each do |doc, analyzer|
      if analyzer
        old_analyzer = @writer.analyzer
        @writer.analyzer = analyzer
        @writer.add_document(doc)
        @writer.analyzer = old_analyzer
      else
        @writer.add_document(doc)
      end
    end
    flush()
  end      
end