Class: Shelver::Indexer

Inherits:

Object

Object
Shelver::Indexer

show all

Defined in:: lib/shelver/indexer.rb

Constant Summary collapse

@@unique_id = Class variables

Instance Attribute Summary collapse

#connection ⇒ Object

Member variables.
#extractor ⇒ Object

Member variables.
#index_full_text ⇒ Object

Member variables.

Class Method Summary collapse

.solrize(input_hash, solr_doc = Solr::Document.new) ⇒ Object

Populates a solr doc with values from a hash.
.unique_id ⇒ Object

Instance Method Summary collapse

#deleteDocument(id) ⇒ Object

This method deletes a document from the Solr search index by id.
#extract_rels_ext(obj, ds_name, solr_doc = Solr::Document.new) ⇒ Object
#extract_xml_to_solr(obj, ds_name, solr_doc = Solr::Document.new) ⇒ Object

This method extracts the facet categories from the given Fedora object’s external tag datastream.
#generate_dates(solr_doc) ⇒ Object

This method generates the month and day facets from the date_t in solr_doc.
#index(obj) ⇒ Object

This method adds a document to the Solr search index.
#initialize(opts = {}) ⇒ Indexer constructor

This method performs initialization tasks.
#printResults(query_str) ⇒ Object

This method prints out the results of the given query string by iterating through all the hits.
#query(query_str) ⇒ Object

This method queries the Solr search index and returns a response.

Constructor Details

#initialize(opts = {}) ⇒ `Indexer`

This method performs initialization tasks

# File 'lib/shelver/indexer.rb', line 25

def initialize( opts={} )
  @@index_list = false unless defined?(@@index_list)
  @extractor = Extractor.new
  
  if opts[:index_full_text] == true || opts[:index_full_text] == "true"
    @index_full_text = true 
  else
    @index_full_text = false 
  end
  
  connect
end

Instance Attribute Details

#connection ⇒ `Object`

Member variables



20
21
22

# File 'lib/shelver/indexer.rb', line 20

def connection
  @connection
end

#extractor ⇒ `Object`

Member variables



20
21
22

# File 'lib/shelver/indexer.rb', line 20

def extractor
  @extractor
end

#index_full_text ⇒ `Object`

Member variables



20
21
22

# File 'lib/shelver/indexer.rb', line 20

def index_full_text
  @index_full_text
end

Class Method Details

.solrize(input_hash, solr_doc = Solr::Document.new) ⇒ `Object`

Populates a solr doc with values from a hash.

Accepts two forms of hashes:

> “t2”], ‘company’=>“c1”, “person”=>[“p1”, “p2”]

> => {‘technology’=>[“t1”, “t2”], ‘company’=>“c1”, “person”=>[“p1”, “p2”] }

Note that values for individual fields can be a single string or an array of strings.

# File 'lib/shelver/indexer.rb', line 223

def self.solrize( input_hash, solr_doc=Solr::Document.new )    
  facets = input_hash.has_key?(:facets) ? input_hash[:facets] : input_hash
  facets.each_pair do |facet_name, value|
    case value.class.to_s
    when "String"
      solr_doc << Solr::Field.new( :"#{facet_name}_facet" => "#{value}" )
    when "Array"
      value.each { |v| solr_doc << Solr::Field.new( :"#{facet_name}_facet" => "#{v}" ) } 
    end
  end
  
  if input_hash.has_key?(:symbols) 
    input_hash[:symbols].each do |symbol_name, value|
      case value.class.to_s
      when "String"
        solr_doc << Solr::Field.new( :"#{symbol_name}_s" => "#{value}" )
     when "Array"
        value.each { |v| solr_doc << Solr::Field.new( :"#{symbol_name}_s" => "#{v}" ) } 
      end
    end
  end
  return solr_doc
end

.unique_id ⇒ `Object`



13
14
15

# File 'lib/shelver/indexer.rb', line 13

def self.unique_id
  @@unique_id
end

Instance Method Details

#deleteDocument(id) ⇒ `Object`

This method deletes a document from the Solr search index by id



212
213
214

# File 'lib/shelver/indexer.rb', line 212

def deleteDocument( id )
  connection.delete( id )
end

#extract_rels_ext(obj, ds_name, solr_doc = Solr::Document.new) ⇒ `Object`

# File 'lib/shelver/indexer.rb', line 87

def extract_rels_ext( obj, ds_name, solr_doc=Solr::Document.new )
  rels_ext_ds = Repository.get_datastream( obj, ds_name )
  extractor.extract_rels_ext( rels_ext_ds.content, solr_doc )
end

#extract_xml_to_solr(obj, ds_name, solr_doc = Solr::Document.new) ⇒ `Object`

This method extracts the facet categories from the given Fedora object’s external tag datastream

# File 'lib/shelver/indexer.rb', line 79

def extract_xml_to_solr( obj, ds_name, solr_doc=Solr::Document.new )
  xml_ds = Repository.get_datastream( obj, ds_name )
  extractor.xml_to_solr( xml_ds.content, solr_doc )
end

#generate_dates(solr_doc) ⇒ `Object`

This method generates the month and day facets from the date_t in solr_doc

# File 'lib/shelver/indexer.rb', line 96

def generate_dates(solr_doc)
  
  # This will check for valid dates, but it seems most of the dates are currently invalid....
  #date_check =  /^(19|20)\d\d([- \/.])(0[1-9]|1[012])\2(0[1-9]|[12][0-9]|3[01])/

 #if there is not date_t, add on with easy-to-find value
 if solr_doc[:date_t].nil?
      solr_doc << Solr::Field.new( :date_t => "9999-99-99")
 end #if

  # unless date_check !~  solr_doc[:date_t]     
  date_obj = Date._parse(solr_doc[:date_t])
  
  if date_obj[:mon].nil? 
     solr_doc << Solr::Field.new(:month_facet => 99)
  elsif 0 < date_obj[:mon] && date_obj[:mon] < 13
    solr_doc << Solr::Field.new( :month_facet => date_obj[:mon].to_s.rjust(2, '0'))
  else
    solr_doc << Solr::Field.new( :month_facet => 99)
  end
    
  if  date_obj[:mday].nil?
    solr_doc << Solr::Field.new( :day_facet => 99)
  elsif 0 < date_obj[:mday] && date_obj[:mday] < 32   
    solr_doc << Solr::Field.new( :day_facet => date_obj[:mday].to_s.rjust(2, '0'))
  else
     solr_doc << Solr::Field.new( :day_facet => 99)
  end
  
  return solr_doc
#      end
      
end

#index(obj) ⇒ `Object`

This method adds a document to the Solr search index

# File 'lib/shelver/indexer.rb', line 176

def index( obj )
 # print "Indexing '#{obj.pid}'..."
  begin
    
    solr_doc = create_document( obj )
    connection.add( solr_doc )
 
   # puts connection.url
   #puts solr_doc
   #  puts "done"
 
  rescue Exception => e
     p "unable to index #{obj.pid}.  Failed with #{e.inspect}"
  end
 
end

#printResults(query_str) ⇒ `Object`

This method prints out the results of the given query string by iterating through all the hits

# File 'lib/shelver/indexer.rb', line 203

def printResults( query_str )
  query( query_str ) do |hit|
    puts hit.inspect
  end
end

#query(query_str) ⇒ `Object`

This method queries the Solr search index and returns a response



196
197
198

# File 'lib/shelver/indexer.rb', line 196

def query( query_str )
  response = conn.query( query_str )
end

Class: Shelver::Indexer

Constant Summary collapse

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(opts = {}) ⇒ Indexer

Instance Attribute Details

#connection ⇒ Object

#extractor ⇒ Object

#index_full_text ⇒ Object

Class Method Details

.solrize(input_hash, solr_doc = Solr::Document.new) ⇒ Object

> “t2”], ‘company’=>“c1”, “person”=>[“p1”, “p2”]

> => {‘technology’=>[“t1”, “t2”], ‘company’=>“c1”, “person”=>[“p1”, “p2”] }

.unique_id ⇒ Object

Instance Method Details

#deleteDocument(id) ⇒ Object

#extract_rels_ext(obj, ds_name, solr_doc = Solr::Document.new) ⇒ Object

#extract_xml_to_solr(obj, ds_name, solr_doc = Solr::Document.new) ⇒ Object

#generate_dates(solr_doc) ⇒ Object

#index(obj) ⇒ Object

#printResults(query_str) ⇒ Object

#query(query_str) ⇒ Object

#initialize(opts = {}) ⇒ `Indexer`

#connection ⇒ `Object`

#extractor ⇒ `Object`

#index_full_text ⇒ `Object`

.solrize(input_hash, solr_doc = Solr::Document.new) ⇒ `Object`

.unique_id ⇒ `Object`

#deleteDocument(id) ⇒ `Object`

#extract_rels_ext(obj, ds_name, solr_doc = Solr::Document.new) ⇒ `Object`

#extract_xml_to_solr(obj, ds_name, solr_doc = Solr::Document.new) ⇒ `Object`

#generate_dates(solr_doc) ⇒ `Object`

#index(obj) ⇒ `Object`

#printResults(query_str) ⇒ `Object`

#query(query_str) ⇒ `Object`