Class: BentoSearch::PrimoEngine

Inherits:
Object
  • Object
show all
Extended by:
HTTPClientPatch::IncludeClient
Includes:
SearchEngine
Defined in:
app/search_engines/bento_search/primo_engine.rb

Overview

Some but not all hits have language_codes provided by api.

Constant Summary collapse

@@highlight_start =
'<span class="searchword">'
@@highlight_end =
'</span>'

Constants included from SearchEngine

SearchEngine::DefaultPerPage

Class Method Summary collapse

Instance Method Summary collapse

Methods included from HTTPClientPatch::IncludeClient

include_http_client

Methods included from SearchEngine

#display_configuration, #engine_id, #fill_in_search_metadata_for, #initialize, #normalized_search_arguments, #public_settable_search_args, #search

Methods included from SearchEngine::Capabilities

#max_per_page, #multi_field_search?, #search_keys, #semantic_search_keys, #semantic_search_map, #sort_keys

Class Method Details

.default_configurationObject



295
296
297
298
299
300
301
302
303
# File 'app/search_engines/bento_search/primo_engine.rb', line 295

def self.default_configuration
  {
    :loc => 'adaptor,primo_central_multiple_fe',
    # "eng" or "fre" or "ger" (Code for the representation of name of language conform to ISO-639)
    :lang => "eng",
    :fixed_params => {},
    :highlighting => true
  }
end

.required_configurationObject



291
292
293
# File 'app/search_engines/bento_search/primo_engine.rb', line 291

def self.required_configuration
  [:host_port, :institution]
end

Instance Method Details

#authenticated_end_user?(args) ⇒ Boolean

From config or args, args over-ride config

Returns:

  • (Boolean)


204
205
206
207
208
209
210
211
212
213
214
# File 'app/search_engines/bento_search/primo_engine.rb', line 204

def authenticated_end_user?(args)
  config = configuration.auth ? true : false
  arg = args[:auth]
  if ! arg.nil?
    arg ? true : false
  elsif ! config.nil?
    config ? true : false
  else
    false
  end
end

#construct_query(args) ⇒ Object



222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
# File 'app/search_engines/bento_search/primo_engine.rb', line 222

def construct_query(args)
  url = "http://#{configuration.host_port}/PrimoWebServices/xservice/search/brief"
  url += "?institution=#{configuration.institution}"
  url += "&loc=#{CGI.escape configuration.loc}"

  url += "&lang=#{CGI.escape configuration.lang}"

  url += "&bulkSize=#{args[:per_page]}" if args[:per_page]
  # primo indx is 1-based record index, our :start is 0-based.
  url += "&indx=#{args[:start].to_i + 1}"

  if (defn = self.sort_definitions[ args[:sort] ]) &&
      (value = defn[:implementation])

    url += "&sortField=#{CGI.escape value}"
  end


  url += "&onCampus=#{ authenticated_end_user?(args) ? 'true' : 'false'}"


  field = args[:search_field].present? ? args[:search_field] : "any"
  query = "#{field},contains,#{prepared_query args[:query]}"

  # Primo seems to have problems with colons in query, even
  # though docs don't say it should
  #safe_query = query.gsub(":", " ")
  url += "&query=#{CGI.escape query.gsub(":", " ")}"

  url += "&highlight=true" if configuration.highlighting

  configuration.fixed_params.each_pair do |key, value|
    [value].flatten.each do |v|
      url += "&#{CGI.escape key.to_s}=#{CGI.escape v.to_s}"
    end
  end


  return url
end

#handle_highlight_tags(str) ⇒ Object

replace Primo API’s snippet highlighting tags with our own, with proper attention to html_safe. See BentoSearch::Util method.

generally needs to be called on any values that come from ‘display’ section of API response, as they may have snippet tags.



166
167
168
169
170
171
172
173
174
175
# File 'app/search_engines/bento_search/primo_engine.rb', line 166

def handle_highlight_tags(str)

  str = BentoSearch::Util.handle_highlight_tags(
    str,
    :start_tag => @@highlight_start,
    :end_tag => @@highlight_end,
    :enabled => configuration.highlighting
  )

end

#handle_snippet_value(str) ⇒ Object

add elipses on the end, fix html highlighting



153
154
155
156
157
158
159
# File 'app/search_engines/bento_search/primo_engine.rb', line 153

def handle_snippet_value(str)    
  # primo doesn't put elipses tags on ends of snippet usually, which is
  # confusing. let's add them ourselves.
  str = "\u2026#{str}\u2026" if str

  return handle_highlight_tags str
end

#map_format(str) ⇒ Object

Try to map from primocentral’s ‘rsrctype’ to our own internal taxonomy of formats

Need docs on what the complete Primo vocabulary here is, we’re just guessing from what we see.



182
183
184
185
186
187
188
189
# File 'app/search_engines/bento_search/primo_engine.rb', line 182

def map_format(str)
  case str
  when "article", "newspaper_article", "review"
    then "Article"
  when "book"           then "Book"
  when "dissertation"   then :dissertation
  end
end

#prepared_query(str) ⇒ Object

Docs say we need to replace any commas with spaces



217
218
219
# File 'app/search_engines/bento_search/primo_engine.rb', line 217

def prepared_query(str)
  str.gsub(/\,/, ' ')
end

#search_field_definitionsObject



264
265
266
267
268
269
270
271
272
273
274
# File 'app/search_engines/bento_search/primo_engine.rb', line 264

def search_field_definitions
  # others are avail too, this is not exhaustive.
  {
    nil         => {:semantic => :general},
    "creator"   => {:semantic => :author},
    "title"     => {:semantic => :title},
    "sub"       => {:semantic => :subject},
    "isbn"      => {:semantic => :isbn},
    "issn"      => {:semantic => :issn}
  }
end

#search_implementation(args) ⇒ Object



61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# File 'app/search_engines/bento_search/primo_engine.rb', line 61

def search_implementation(args)

  url = construct_query(args)

  results = BentoSearch::Results.new

  response = http_client.get(url)
  if response.status != 200
    results.error ||= {}
    results.error[:status] = response.status
    results.error[:body] = response.body
    return results
  end


  response_xml = Nokogiri::XML response.body
  # namespaces really do nobody any good
  response_xml.remove_namespaces!


  if error = response_xml.at_xpath("./SEGMENTS/JAGROOT/RESULT/ERROR")
    results.error ||= {}
    results.error[:code]    = error["CODE"]
    results.error[:message] = error["MESSAGE"]
    return results
  end

  results.total_items = response_xml.at_xpath("./SEGMENTS/JAGROOT/RESULT/DOCSET")["TOTALHITS"].to_i

  response_xml.xpath("./SEGMENTS/JAGROOT/RESULT/DOCSET/DOC").each do |doc_xml|
    item = BentoSearch::ResultItem.new
    # Data in primo response is confusing in many different places in
    # variant formats. We try to pick out the best to take things from,
    # but we're guessing, it's under-documented.

    item.title      = handle_highlight_tags text_at_xpath(doc_xml, "./PrimoNMBib/record/display/title")

    # I think this is primo unique ID. Have no idea how to look things
    # up by unique id though.
    item.unique_id         = text_at_xpath(doc_xml, "./PrimoNMBib/record/control/recordid")

    item.custom_data["snippet"] = handle_snippet_value text_at_xpath(doc_xml, "./PrimoNMBib/record/display/snippet")

    # straight snippets
    item.snippets              = doc_xml.xpath("./PrimoNMBib/record/display/snippet").collect do |node|
      handle_snippet_value( node.text )
    end

    item.abstract   = text_at_xpath(doc_xml, "./PrimoNMBib/record/addata/abstract")      


    doc_xml.xpath("./PrimoNMBib/record/facets/creatorcontrib").each do |author_node|
      item.authors << BentoSearch::Author.new(:display => author_node.text)
    end


    item.journal_title  = text_at_xpath(doc_xml, "./PrimoNMBib/record/addata/jtitle")
    # check btitle for book chapters, the book they are in.
    if item.journal_title.blank? && doc_xml.at_xpath("./PrimoNMBib/record/display/ispartof")
      item.journal_title = text_at_xpath(doc_xml, "./PrimoNMBib/record/addata/btitle")
    end

    item.publisher      = handle_highlight_tags text_at_xpath(doc_xml, "./PrimoNMBib/record/display/publisher")
    item.volume         = text_at_xpath doc_xml, "./PrimoNMBib/record/addata/volume"
    item.issue          = text_at_xpath doc_xml, "./PrimoNMBib/record/addata/issue"
    item.start_page     = text_at_xpath doc_xml, "./PrimoNMBib/record/addata/spage"
    item.end_page       = text_at_xpath doc_xml, "./PrimoNMBib/record/addata/epage"
    item.doi            = text_at_xpath doc_xml, "./PrimoNMBib/record/addata/doi"
    item.issn           = text_at_xpath doc_xml, "./PrimoNMBib/record/addata/issn"
    item.isbn           = text_at_xpath doc_xml, "./PrimoNMBib/record/addata/isbn"

    item.language_code  = text_at_xpath doc_xml, "./PrimoNMBib/record/display/language"

    if (date = text_at_xpath doc_xml, "./PrimoNMBib/record/search/creationdate")
      item.year = date[0,4] # first four chars
    end

    if fmt_str = text_at_xpath(doc_xml, "./PrimoNMBib/record/search/rsrctype")
      # 'article', 'book_chapter'. abuse rails to turn into nice titlelized english.
      item.format_str     = fmt_str.titleize

      item.format         = map_format fmt_str
    end

    results << item
  end


  return results
end

#sort_definitionsObject



276
277
278
279
280
281
282
283
284
285
286
287
288
289
# File 'app/search_engines/bento_search/primo_engine.rb', line 276

def sort_definitions
  {
    "title_asc"       => {:implementation => "stitle"},
    "date_desc"       => {:implementation => "scdate"},
    "author_asc"      => {:implementation => "screator"},
    # not clear if popularity is truly different than relevance
    # or not.
    "popularity"      => {:implementation => "popularity"},
    # according to EL, you get 'relevance' results by default,
    # by passing no 'sort' param. I don't think there's a value
    # you can actually pass, just have to pass none.
    "relevance"       => {}
  }
end

#text_at_xpath(xml, xpath) ⇒ Object

Returns the text() at the xpath, if the xpath is non-nil and the text is non-blank



193
194
195
196
197
198
199
# File 'app/search_engines/bento_search/primo_engine.rb', line 193

def text_at_xpath(xml, xpath)
  node = xml.at_xpath(xpath)
  return nil if node.nil?
  text = node.text
  return nil if node.blank?
  return text
end