Class: Cul::Fedora::Solr

Inherits:
Object
  • Object
show all
Defined in:
lib/cul-fedora/solr.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ Solr

Returns a new instance of Solr.



13
14
15
16
# File 'lib/cul-fedora/solr.rb', line 13

def initialize(options = {})
  @url = options[:url] || options["url"] || raise(ArgumentError, "must provide url")
  @logger = options[:logger] || options["logger"]
end

Instance Attribute Details

#urlObject (readonly)

Returns the value of attribute url.



11
12
13
# File 'lib/cul-fedora/solr.rb', line 11

def url
  @url
end

Instance Method Details

#delete_indexObject



30
31
32
33
34
# File 'lib/cul-fedora/solr.rb', line 30

def delete_index
  logger.info "Deleting Solr index..."
  rsolr.delete_by_query("*:*")
  rsolr.commit
end

#delete_removed(fedora_server, fedora_item_pids = nil) ⇒ Object



36
37
38
39
40
41
42
43
44
45
46
47
# File 'lib/cul-fedora/solr.rb', line 36

def delete_removed(fedora_server, fedora_item_pids = nil)
  
  removed = identify_removed(fedora_server)
  logger.info "Deleting items removed from Fedora..."
  removed.each do |id|
    logger.info "Deleting " + id + "..."
    rsolr.delete_by_query("id:" + id.to_s.gsub(/:/,'\\:'))
  end
  
  rsolr.commit
  
end

#identify_removed(fedora_server, fedora_item_pids = nil) ⇒ Object



49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# File 'lib/cul-fedora/solr.rb', line 49

def identify_removed(fedora_server, fedora_item_pids = nil)
  start = 0
  rows = 500
  removed = []
  results = rsolr.select({:q => "", :fl => "id", :start => start, :rows => rows})
  logger.info "Identifying items removed from Fedora..."
  while(!results["response"]["docs"].empty?)
    
    logger.info("Checking Solr index from " + start.to_s + " to " + (start + rows).to_s + "...")
    results["response"]["docs"].each do |doc|
      
      if(fedora_item_pids.nil?)
        if(!fedora_server.item(doc["id"]).exists?)
		logger.info "Noting item removed from fedora:  " + doc["id"].to_s + "..."
          removed << doc["id"].to_s
        end
      else
        if(!fedora_item_pids.include?(doc["id"].to_s))
          logger.info "Noting removed item " + doc["id"] + "..."
          removed << doc["id"].to_s
        end
      end
      
    end
    
    start = start + rows
    results = rsolr.get 'select', :params => {:q => "", :fl => "id", :start => start, :rows => rows}
  end
  return removed
end

#ingest(options = {}) ⇒ Object



80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
# File 'lib/cul-fedora/solr.rb', line 80

def ingest(options = {})
  
  format = options.delete(:format) || raise(ArgumentError, "needs format")
  fedora_server = options.delete(:fedora_server) || raise(ArgumentError, "needs fedora server")

  items = options.delete(:items) || []
  items = [items] unless items.kind_of?(Array)
  collections = options.delete(:collections) || []
  collections = [collections] unless collections.kind_of?(Array)
  ignore = options.delete(:ignore) || []
  ignore = [ignore] unless ignore.kind_of?(Array)

  delete = options.delete(:delete_removed) || false
  overwrite = options.delete(:overwrite) || false
  skip = options.delete(:skip) || []

  indexed_count = 0
  
  logger.info "Preparing the items for indexing..."
  collections.each do |collection|
    items |= collection.listMembers
  end

  items.sort!

  results = Hash.new { |h,k| h[k] = [] }
  errors = []

  item_pids = []
  items.each do |item|
    item_pids << item.pid
  end
  if delete == true
    delete_removed(fedora_server, item_pids)
  end

  logger.info "Preparing to index " + items.length.to_s + " items..."

  items.each do |i|
    
    if(ignore.index(i.pid).nil? == false || skip.index(i.pid).nil? == false)
      logger.info "Ignoring/skipping " + i.pid + "..."
      results[:skipped] << i.pid
      next
    end
     
    if item_exists?(i)
      unless overwrite == true
        results[:skipped] << i.pid
        next
      end
    end    

    logger.info "Indexing " + i.pid + "..."

    result_hash = i.send("index_for_#{format}", options)

    results[result_hash[:status]] << i.pid

    case result_hash[:status]
    when :success
      begin
        rsolr.add(result_hash[:results])
        indexed_count += 1
      rescue Exception => e
        errors << i.pid
        logger.error e.message
      end
    when :error
      errors << i.pid
      logger.error result_hash[:error_message]
    end

  end
  
  logger.info "Committing changes to Solr..."
  rsolr.commit

  return {:results => results, :errors => errors, :indexed_count => indexed_count}

end

#item_exists?(item) ⇒ Boolean

Returns:

  • (Boolean)


22
23
24
# File 'lib/cul-fedora/solr.rb', line 22

def item_exists?(item)
  !rsolr.find(:filters => {:id => item.pid_escaped})["response"]["docs"].empty?
end

#loggerObject



18
19
20
# File 'lib/cul-fedora/solr.rb', line 18

def logger
  @logger ||= Logger.new(STDOUT)
end

#rsolrObject



26
27
28
# File 'lib/cul-fedora/solr.rb', line 26

def rsolr
  @rsolr ||= RSolr.connect(:url => @url)
end