Class: BentoSearch::EdsEngine

Inherits:
Object
  • Object
show all
Extended by:
HTTPClientPatch::IncludeClient
Includes:
SearchEngine
Defined in:
app/search_engines/bento_search/eds_engine.rb

Overview

EDS docs:

Defined Under Namespace

Modules: CitationMessDecorator Classes: EdsCommException, Helper, Holding

Constant Summary collapse

HttpTimeout =

Can’t change http timeout in config, because we keep an http client at class-wide level, and config is not class-wide. We used to keep in constant, but that’s not good for custom setting, we now use class_attribute, but in a weird backwards-compat way for anyone who might be using the constant.

4
AuthHeader =
"x-authenticationToken"
SessionTokenHeader =
"x-sessionToken"
@@remembered_auth =
nil
@@remembered_auth_lock =
Mutex.new

Constants included from SearchEngine

SearchEngine::DefaultPerPage

Class Method Summary collapse

Instance Method Summary collapse

Methods included from HTTPClientPatch::IncludeClient

include_http_client

Methods included from SearchEngine

#display_configuration, #engine_id, #fill_in_search_metadata_for, #initialize, #normalized_search_arguments, #public_settable_search_args, #search

Methods included from SearchEngine::Capabilities

#max_per_page, #multi_field_search?, #search_keys, #semantic_search_keys, #semantic_search_map, #sort_keys

Class Method Details

.default_configurationObject



610
611
612
613
614
615
616
617
618
619
# File 'app/search_engines/bento_search/eds_engine.rb', line 610

def self.default_configuration
  {
    :auth_url => 'https://eds-api.ebscohost.com/authservice/rest/uidauth',
    :base_url => "http://eds-api.ebscohost.com/edsapi/rest/",
    :highlighting => true,
    :truncate_highlighted => 280,
    :assume_first_custom_link_openurl => false,
    :search_mode => 'all' # any | bool | all | smart ; http://support.epnet.com/knowledge_base/detail.php?topic=996&id=1288&page=1
  }
end

.http_timeoutObject



105
106
107
# File 'app/search_engines/bento_search/eds_engine.rb', line 105

def self.http_timeout
  defined?(@http_timeout) ? @http_timeout : HttpTimeout
end

.remembered_authObject

Class variable to save current known good auth uses a mutex to be threadsafe. sigh.



122
123
124
125
126
# File 'app/search_engines/bento_search/eds_engine.rb', line 122

def self.remembered_auth
  @@remembered_auth_lock.synchronize do
    @@remembered_auth
  end
end

.remembered_auth=(token) ⇒ Object

Set class variable with current known good auth. uses a mutex to be threadsafe.



129
130
131
132
133
# File 'app/search_engines/bento_search/eds_engine.rb', line 129

def self.remembered_auth=(token)
  @@remembered_auth_lock.synchronize do
    @@remembered_auth = token
  end
end

.required_configurationObject



142
143
144
# File 'app/search_engines/bento_search/eds_engine.rb', line 142

def self.required_configuration
  %w{user_id password profile}
end

Instance Method Details

#at_xpath_text(noko, xpath) ⇒ Object

if the xpath responds, return #text of it, else nil.



477
478
479
480
481
482
483
484
485
# File 'app/search_engines/bento_search/eds_engine.rb', line 477

def at_xpath_text(noko, xpath)
  node = noko.at_xpath(xpath)

  if node.nil?
    return node
  else
    return node.text
  end
end

#authenticated_end_user?(args) ⇒ Boolean

From config or args, args over-ride config

Returns:

  • (Boolean)


147
148
149
150
151
152
153
154
155
156
157
# File 'app/search_engines/bento_search/eds_engine.rb', line 147

def authenticated_end_user?(args)
  config = configuration.auth ? true : false
  arg = args[:auth]
  if ! arg.nil?
    arg ? true : false
  elsif ! config.nil?
    config ? true : false
  else
    false
  end
end

#construct_search_url(args) ⇒ Object



159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
# File 'app/search_engines/bento_search/eds_engine.rb', line 159

def construct_search_url(args)
  query = "AND,"
  if args[:search_field]
    query += "#{args[:search_field]}:"
  end
  # Can't have any commas in query, it turns out, although
  # this is not documented.
  query += args[:query].gsub(",", " ")

  url = "#{configuration.base_url}search?view=detailed&query=#{CGI.escape query}"

  url += "&searchmode=#{CGI.escape configuration.search_mode}"

  url += "&highlight=#{configuration.highlighting ? 'y' : 'n' }"

  if args[:per_page]
    url += "&resultsperpage=#{args[:per_page]}"
  end
  if args[:page]
    url += "&pagenumber=#{args[:page]}"
  end

  if args[:sort]
    if (defn = self.sort_definitions[args[:sort]]) &&
         (value = defn[:implementation] )
      url += "&sort=#{CGI.escape value}"
    end
  end

  if configuration.only_source_types.present?
    # facetfilter=1,SourceType:Research Starters,SourceType:Books
    url += "&facetfilter=" + CGI.escape("1," + configuration.only_source_types.collect {|t| "SourceType:#{t}"}.join(","))
  end


  return url
end

#element_by_group(noko, group) ⇒ Object

Difficult to get individual elements out of an EDS XML <Record> response, requires weird xpath, so we do it for you. element_by_group(nokogiri_element, “Ti”)

Returns string or nil



436
437
438
# File 'app/search_engines/bento_search/eds_engine.rb', line 436

def element_by_group(noko, group)
  at_xpath_text(noko, "./Items/Item[child::Group[text()='#{group}']]/Data")
end

#get_auth_tokenObject

Has to make an HTTP request to get EBSCO’s auth token. returns the auth token. We aren’t bothering to keep track of the expiration ourselves, can’t neccesarily trust it anyway.

Raises an EdsCommException on error.



578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
# File 'app/search_engines/bento_search/eds_engine.rb', line 578

def get_auth_token
  # Can't send params as form-encoded, actually need to send a JSON or XML
  # body, argh.

  body = "    {\n      \"UserId\":\"\#{configuration.user_id}\",\n      \"Password\":\"\#{configuration.password}\"\n    }\n  EOS\n\n  s_time = Time.now\n  response = http_client.post(configuration.auth_url, body, {'Accept' => \"application/json\", \"Content-type\" => \"application/json\"})\n  Rails.logger.debug(\"EDS timing AUTH: \#{Time.now - s_time}s\")\n\n  unless HTTP::Status.successful? response.status\n    raise EdsCommException.new(\"Could not get auth\", response.status, response.body)\n  end\n\n  response_hash = nil\n  begin\n    response_hash = MultiJson.load response.body\n  rescue MultiJson::DecodeError\n  end\n\n  unless response_hash.kind_of?(Hash) && response_hash.has_key?(\"AuthToken\")\n    raise EdsCommException.new(\"AuthToken not found in auth response\", response.status, response.body)\n  end\n\n  return response_hash[\"AuthToken\"]\nend\n"

#get_with_auth(url, session_token = nil) ⇒ Object

Give it a url pointing at EDS API. Second arg must be a session_token if EDS request requires one. It will

  • Make a GET request

  • with memo-ized auth token added to headers

  • for XML, with all namespaces removed!

  • Parse JSON into a hash and return hash

  • Try ONCE more to get if EBSCO says bad auth token

  • Raise an EdsCommException if can’t auth after second try, or other error message, or JSON can’t be parsed.



525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
# File 'app/search_engines/bento_search/eds_engine.rb', line 525

def get_with_auth(url, session_token = nil)
  auth_token = self.class.remembered_auth
  unless auth_token
    auth_token = self.class.remembered_auth = get_auth_token
  end

  response = nil
  response_xml = nil
  caught_exception = nil

  begin
    headers = {AuthHeader => auth_token, 'Accept' => 'application/xml'}
    headers[SessionTokenHeader] = session_token if session_token

    s_time = Time.now
    response = http_client.get(url, nil, headers)
    Rails.logger.debug("EDS timing GET: #{Time.now - s_time}:#{url}")

    response_xml = Nokogiri::XML(response.body)
    response_xml.remove_namespaces!

    if (at_xpath_text(response_xml, "//ErrorNumber") == "104") || (at_xpath_text(response_xml, "//ErrorDescription") == "Auth Token Invalid")
      # bad auth, try again just ONCE
      Rails.logger.debug("EDS auth failed, getting auth again")

      headers[AuthHeader] = self.class.remembered_auth = get_auth_token
      response = http_client.get(url, nil, headers)
      response_xml = Nokogiri::XML(response.body)
      response_xml.remove_namespaces!
    end
  rescue BentoSearch::RubyTimeoutClass, HTTPClient::ConfigurationError, HTTPClient::BadResponseError, Nokogiri::SyntaxError => e
    caught_exception = e
  end

  if response.nil? || response_xml.nil? || caught_exception ||  (! HTTP::Status.successful? response.status)
    exception = EdsCommException.new("Error fetching URL: #{caught_exception.message if caught_exception} : #{url}")
    if response
      exception.http_body = response.body
      exception.http_status = response.status
    end
    raise exception
  end

  return response_xml
end

#helperObject

an object that includes some Rails helper modules for text handling.



137
138
139
# File 'app/search_engines/bento_search/eds_engine.rb', line 137

def helper
  @helper ||= Helper.new
end

#prepare_eds_payload(str, html_safe = false) ⇒ Object

If EDS has put highlighting tags in a field, we need to HTML escape the literal values, while still using the highlighting tokens to put HTML tags around highlighted terms.

Second param, if to assume EDS literals are safe HTML, as they seem to be.



494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
# File 'app/search_engines/bento_search/eds_engine.rb', line 494

def prepare_eds_payload(str, html_safe = false)
  return str if str.blank?

  unless configuration.highlighting
    str = str.html_safe if html_safe
    return str
  end

  parts =
  str.split(%r{(</?highlight>)}).collect do |substr|
    case substr
    when "<highlight>" then "<b class='bento_search_highlight'>".html_safe
    when "</highlight>" then "</b>".html_safe
    # Yes, EDS gives us HTML in the literals, we're choosing to trust it.
    else substr.html_safe
    end
  end

  return helper.safe_join(parts, '')
end

#search_field_definitionsObject



629
630
631
632
633
634
635
636
637
638
639
640
# File 'app/search_engines/bento_search/eds_engine.rb', line 629

def search_field_definitions
  {
    "TX" => {:semantic => :general},
    "AU" => {:semantic => :author},
    "TI" => {:semantic => :title},
    "SU" => {:semantic => :subject},
    "SO" => {}, # source, journal name
    "AB" => {}, # abstract
    "IS" => {:semantic => :issn},
    "IB" => {:semantic => :isbn},
  }
end

#search_implementation(args) ⇒ Object



199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
# File 'app/search_engines/bento_search/eds_engine.rb', line 199

def search_implementation(args)
  results = BentoSearch::Results.new

  end_user_auth = authenticated_end_user? args

  begin
    with_session(end_user_auth) do |session_token|

      url = construct_search_url(args)

      response = get_with_auth(url, session_token)

      results = BentoSearch::Results.new

      if (hits_node = at_xpath_text(response, "./SearchResponseMessageGet/SearchResult/Statistics/TotalHits"))
        results.total_items = hits_node.to_i
      end

      response.xpath("./SearchResponseMessageGet/SearchResult/Data/Records/Record").each do |record_xml|
        item = BentoSearch::ResultItem.new

        item.title   = prepare_eds_payload( element_by_group(record_xml, "Ti"), true )

        # To get a unique id, we need to pull out db code and accession number
        # and combine em with colon, accession number is not unique by itself.
        db           = record_xml.at_xpath("./Header/DbId").try(:text)
        accession    = record_xml.at_xpath("./Header/An").try(:text)
        if db && accession
          item.unique_id    = "#{db}:#{accession}"
        end


        if item.title.nil? && ! end_user_auth
          item.title = I18n.translate("bento_search.eds.record_not_available")
        end

        item.abstract = prepare_eds_payload( element_by_group(record_xml, "Ab"), true )

        # Much better way to get authors out of EDS response now...
        author_full_names = record_xml.xpath("./RecordInfo/BibRecord/BibRelationships/HasContributorRelationships/HasContributor/PersonEntity/Name/NameFull")
        author_full_names.each do |name_full_xml|
          if name_full_xml && (text = name_full_xml.text).present?
            item.authors << BentoSearch::Author.new(:display => text)
          end
        end

        if item.authors.blank?
          # Believe it or not, the authors are encoded as an escaped
          # XML-ish payload, that we need to parse again and get the
          # actual authors out of. WTF. Thanks for handling fragments
          # nokogiri.
          author_mess = element_by_group(record_xml, "Au")
          # only SOMETIMES does it have XML tags, other times it's straight text.
          # ARGH.
          author_xml = Nokogiri::XML::fragment(author_mess)
          searchLinks = author_xml.xpath(".//searchLink")
          if searchLinks.size > 0
            author_xml.xpath(".//searchLink").each do |author_node|
              item.authors << BentoSearch::Author.new(:display => author_node.text)
            end
          else
            item.authors << BentoSearch::Author.new(:display => author_xml.text)
          end
        end

        # PLink is main inward facing EBSCO link, put it as
        # main link.
        if direct_link = record_xml.at_xpath("./PLink")
          item.link = direct_link.text

          if record_xml.at_xpath("./FullText/Links/Link/Type[text() = 'pdflink']")
            item.link_is_fulltext = true
          end
        end


        # Other links may be found in CustomLinks, it seems like usually
        # there will be at least one, hopefully the first one is the OpenURL?
        #byebug if configuration.id == "articles"
        record_xml.xpath("./CustomLinks/CustomLink|./FullText/CustomLinks/CustomLink").each do |custom_link|
          # If it's in FullText section, give it a rel=alternate
          # to indicate it's fulltext
          rel = (custom_link.parent.parent.name.downcase == "fulltext") ? "alternate" : nil

          item.other_links << BentoSearch::Link.new(
            :url => custom_link.at_xpath("./Url").text,
            :rel => rel,
            :label => custom_link.at_xpath("./Text").try(:text).presence || custom_link.at_xpath("./Name").try(:text).presence || "Link"
            )
        end

        # More other links in 'URL' Item, in unpredictable format sometimes being
        # embedded XML. Really EBSCO?
        record_xml.xpath("./Items/Item[child::Group[text()='URL']]").each do |url_item|
          data_element = url_item.at_xpath("./Data")
          next unless data_element

          # SOMETIMES the url and label are in an embedded escaped XML element...
          if data_element.text.strip.start_with?("<link")
            # Ugh, once unescpaed it has bare '&' in URL queries sometimes, which
            # is not actually legal XML anymore, but Nokogiri::HTML parser will
            # let us get away with it, but then doesn't put the actual text
            # inside the 'link' item, but inside the <link> tag since it knows
            # an HTML link tag has no content. Really EDS.
            node = Nokogiri::HTML::fragment(data_element.text)
            next unless link = node.at_xpath("./link")
            next unless link["linkterm"].presence || link["linkTerm"].presence

            item.other_links << BentoSearch::Link.new(
              :url => link["linkterm"] || link["linkTerm"],
              :label => helper.strip_tags(data_element.text).presence || "Link"
              )
          else
            # it's just a straight URL in data element, with only label we've
            # got in <label> element.
            next unless data_element.text.strip.present?

            label_element = url_item.at_xpath("./Label")
            label = label_element.try(:text).try { |s| helper.strip_tags(s) }.presence || "Link"

            item.other_links << BentoSearch::Link.new(
              :url => data_element.text,
              :label => label
            )
          end
        end


        if (configuration.assume_first_custom_link_openurl &&
          (first = record_xml.xpath "./CustomLinks/CustomLink" ) &&
          (node = first.at_xpath "./Url" )
        )

          openurl = node.text

          index = openurl.index('?')
          item.openurl_kev_co = openurl.slice index..(openurl.length) if index
        end

        # Format.
        item.format_str = at_xpath_text record_xml, "./Header/PubType"
        # Can't find a list of possible PubTypes to see what's there to try
        # and map to our internal controlled vocab. oh wells.

        item.doi = at_xpath_text record_xml, "./RecordInfo/BibRecord/BibEntity/Identifiers/Identifier[child::Type[text()='doi']]/Value"

        item.start_page = at_xpath_text(record_xml, "./RecordInfo/BibRecord/BibEntity/PhysicalDescription/Pagination/StartPage")
        total_pages = at_xpath_text(record_xml, "./RecordInfo/BibRecord/BibEntity/PhysicalDescription/Pagination/PageCount")
        if total_pages.to_i != 0 && item.start_page.to_i != 0
          item.end_page = (item.start_page.to_i + total_pages.to_i - 1).to_s
        end


        # location/call number, probably only for catalog results. We only see one
        # in actual data, but XML structure allows multiple, so we'll store it as multiple.
        copy_informations = record_xml.xpath("./Holdings/Holding/HoldingSimple/CopyInformationList/CopyInformation")
        if copy_informations.present?
          item.custom_data[:holdings] =
            copy_informations.collect do |copy_information|
              Holding.new(:location => at_xpath_text(copy_information, "Sublocation"),
                          :call_number => at_xpath_text(copy_information, "ShelfLocator"))
            end
        end



        # For some EDS results, we have actual citation information,
        # for some we don't.
        container_xml = record_xml.at_xpath("./RecordInfo/BibRecord/BibRelationships/IsPartOfRelationships/IsPartOf/BibEntity")
        if container_xml
          item.source_title = at_xpath_text(container_xml, "./Titles/Title[child::Type[text()='main']]/TitleFull")
          item.volume = at_xpath_text(container_xml, "./Numbering/Number[child::Type[text()='volume']]/Value")
          item.issue = at_xpath_text(container_xml, "./Numbering/Number[child::Type[text()='issue']]/Value")

          item.issn = at_xpath_text(container_xml, "./Identifiers/Identifier[child::Type[text()='issn-print']]/Value")

          if date_xml = container_xml.at_xpath("./Dates/Date")
            item.year = at_xpath_text(date_xml, "./Y")

            date = at_xpath_text(date_xml, "./D").to_i
            month = at_xpath_text(date_xml, "./M").to_i
            if item.year.to_i != 0 && date != 0 && month != 0
              item.publication_date = Date.new(item.year.to_i, month, date)
            end
          end
        end

        # EDS annoyingly repeats a monographic title in the same place
        # we look for source/container title, take it away.
        if item.start_page.blank? && helper.strip_tags(item.title) == item.source_title
          item.source_title = nil
        end

        # Legacy EDS citation extracting. We don't really need this any more
        # because EDS api has improved, but leave it in in case anyone using
        # older versions needed it.

        # We have a single blob of human-readable citation, that's also
        # littered with XML-ish tags we need to deal with. We'll save
        # it in a custom location, and use a custom Decorator to display
        # it. Sorry it's way too hard for us to preserve <highlight>
        # tags in this mess, they will be lost. Probably don't
        # need highlighting in source anyhow.
        citation_mess = element_by_group(record_xml, "Src")
        # Argh, but sometimes it's in SrcInfo _without_ tags instead
        if citation_mess
          citation_txt = Nokogiri::XML::fragment(citation_mess).text
          # But strip off some "count of references" often on the end
          # which are confusing and useless.
          item.custom_data["citation_blob"] = citation_txt.gsub(/ref +\d+ +ref\.$/, '')
        else
          # try another location
          item.custom_data["citation_blob"] = element_by_group(record_xml, "SrcInfo")
        end

        item.extend CitationMessDecorator

        results << item
      end
    end

    return results
  rescue EdsCommException => e
    results.error ||= {}
    results.error[:exception] = e
    results.error[:http_status] = e.http_status
    results.error[:http_body] = e.http_body
    return results
  end

end

#sort_definitionsObject



621
622
623
624
625
626
627
# File 'app/search_engines/bento_search/eds_engine.rb', line 621

def sort_definitions
  {
    "date_desc"     => {:implementation => "date"},
    "relevance"     => {:implementation => "relevance" }
    #       "date_asc"      => {:implementaiton => "date2"}
  }
end

#with_session(auth = false, &block) ⇒ Object

Wraps calls to the EDS api with CreateSession and EndSession requests to EDS. Will pass sessionID in yield from block.

Second optional arg is whether this is an authenticated user, else guest access will be used.

with_session(true) do |session_token|
  # can make more requests using session_token,
  # EndSession will be called for you at end of block.
end


450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
# File 'app/search_engines/bento_search/eds_engine.rb', line 450

def with_session(auth = false, &block)
  auth_token = self.class.remembered_auth
  if auth_token.nil?
    auth_token = self.class.remembered_auth = get_auth_token
  end


  create_url = "#{configuration.base_url}createsession?profile=#{configuration.profile}&guest=#{auth ? 'n' : 'y'}"
  response_xml = get_with_auth(create_url)

  session_token = nil
  unless response_xml && (session_token = at_xpath_text(response_xml, "//SessionToken"))
    e = EdsCommException.new("Could not get SessionToken")
  end

  begin
    block.yield(session_token)
  ensure
    if auth_token && session_token
      end_url = "#{configuration.base_url}endsession?sessiontoken=#{CGI.escape session_token}"
      response_xml = get_with_auth(end_url)
    end
  end

end