Class: Shelver::Indexer

Inherits:
Object
  • Object
show all
Defined in:
lib/shelver/indexer.rb

Constant Summary collapse

@@unique_id =

Class variables

0

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(opts = {}) ⇒ Indexer

This method performs initialization tasks



25
26
27
28
29
30
31
32
33
34
35
36
# File 'lib/shelver/indexer.rb', line 25

def initialize( opts={} )
  @@index_list = false unless defined?(@@index_list)
  @extractor = Extractor.new
  
  if opts[:index_full_text] == true || opts[:index_full_text] == "true"
    @index_full_text = true 
  else
    @index_full_text = false 
  end
  
  connect
end

Instance Attribute Details

#connectionObject

Member variables



20
21
22
# File 'lib/shelver/indexer.rb', line 20

def connection
  @connection
end

#extractorObject

Member variables



20
21
22
# File 'lib/shelver/indexer.rb', line 20

def extractor
  @extractor
end

#index_full_textObject

Member variables



20
21
22
# File 'lib/shelver/indexer.rb', line 20

def index_full_text
  @index_full_text
end

Class Method Details

.solrize(input_hash, solr_doc = Solr::Document.new) ⇒ Object

Populates a solr doc with values from a hash.

Accepts two forms of hashes:

> “t2”], ‘company’=>“c1”, “person”=>[“p1”, “p2”]

or

> => {‘technology’=>[“t1”, “t2”], ‘company’=>“c1”, “person”=>[“p1”, “p2”] }

Note that values for individual fields can be a single string or an array of strings.



223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
# File 'lib/shelver/indexer.rb', line 223

def self.solrize( input_hash, solr_doc=Solr::Document.new )    
  facets = input_hash.has_key?(:facets) ? input_hash[:facets] : input_hash
  facets.each_pair do |facet_name, value|
    case value.class.to_s
    when "String"
      solr_doc << Solr::Field.new( :"#{facet_name}_facet" => "#{value}" )
    when "Array"
      value.each { |v| solr_doc << Solr::Field.new( :"#{facet_name}_facet" => "#{v}" ) } 
    end
  end
  
  if input_hash.has_key?(:symbols) 
    input_hash[:symbols].each do |symbol_name, value|
      case value.class.to_s
      when "String"
        solr_doc << Solr::Field.new( :"#{symbol_name}_s" => "#{value}" )
     when "Array"
        value.each { |v| solr_doc << Solr::Field.new( :"#{symbol_name}_s" => "#{v}" ) } 
      end
    end
  end
  return solr_doc
end

.unique_idObject



13
14
15
# File 'lib/shelver/indexer.rb', line 13

def self.unique_id
  @@unique_id
end

Instance Method Details

#deleteDocument(id) ⇒ Object

This method deletes a document from the Solr search index by id



212
213
214
# File 'lib/shelver/indexer.rb', line 212

def deleteDocument( id )
  connection.delete( id )
end

#extract_rels_ext(obj, ds_name, solr_doc = Solr::Document.new) ⇒ Object



87
88
89
90
# File 'lib/shelver/indexer.rb', line 87

def extract_rels_ext( obj, ds_name, solr_doc=Solr::Document.new )
  rels_ext_ds = Repository.get_datastream( obj, ds_name )
  extractor.extract_rels_ext( rels_ext_ds.content, solr_doc )
end

#extract_xml_to_solr(obj, ds_name, solr_doc = Solr::Document.new) ⇒ Object

This method extracts the facet categories from the given Fedora object’s external tag datastream



79
80
81
82
# File 'lib/shelver/indexer.rb', line 79

def extract_xml_to_solr( obj, ds_name, solr_doc=Solr::Document.new )
  xml_ds = Repository.get_datastream( obj, ds_name )
  extractor.xml_to_solr( xml_ds.content, solr_doc )
end

#generate_dates(solr_doc) ⇒ Object

This method generates the month and day facets from the date_t in solr_doc



96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# File 'lib/shelver/indexer.rb', line 96

def generate_dates(solr_doc)
  
  # This will check for valid dates, but it seems most of the dates are currently invalid....
  #date_check =  /^(19|20)\d\d([- \/.])(0[1-9]|1[012])\2(0[1-9]|[12][0-9]|3[01])/

 #if there is not date_t, add on with easy-to-find value
 if solr_doc[:date_t].nil?
      solr_doc << Solr::Field.new( :date_t => "9999-99-99")
 end #if

  # unless date_check !~  solr_doc[:date_t]     
  date_obj = Date._parse(solr_doc[:date_t])
  
  if date_obj[:mon].nil? 
     solr_doc << Solr::Field.new(:month_facet => 99)
  elsif 0 < date_obj[:mon] && date_obj[:mon] < 13
    solr_doc << Solr::Field.new( :month_facet => date_obj[:mon].to_s.rjust(2, '0'))
  else
    solr_doc << Solr::Field.new( :month_facet => 99)
  end
    
  if  date_obj[:mday].nil?
    solr_doc << Solr::Field.new( :day_facet => 99)
  elsif 0 < date_obj[:mday] && date_obj[:mday] < 32   
    solr_doc << Solr::Field.new( :day_facet => date_obj[:mday].to_s.rjust(2, '0'))
  else
     solr_doc << Solr::Field.new( :day_facet => 99)
  end
  
  return solr_doc
#      end
      
end

#index(obj) ⇒ Object

This method adds a document to the Solr search index



176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
# File 'lib/shelver/indexer.rb', line 176

def index( obj )
 # print "Indexing '#{obj.pid}'..."
  begin
    
    solr_doc = create_document( obj )
    connection.add( solr_doc )
 
   # puts connection.url
   #puts solr_doc
   #  puts "done"
 
  rescue Exception => e
     p "unable to index #{obj.pid}.  Failed with #{e.inspect}"
  end
 
end

#printResults(query_str) ⇒ Object

This method prints out the results of the given query string by iterating through all the hits



203
204
205
206
207
# File 'lib/shelver/indexer.rb', line 203

def printResults( query_str )
  query( query_str ) do |hit|
    puts hit.inspect
  end
end

#query(query_str) ⇒ Object

This method queries the Solr search index and returns a response



196
197
198
# File 'lib/shelver/indexer.rb', line 196

def query( query_str )
  response = conn.query( query_str )
end