Class: BentoSearch::GoogleBooksEngine

Inherits:
Object
  • Object
show all
Extended by:
HTTPClientPatch::IncludeClient
Includes:
ActionView::Helpers::SanitizeHelper, SearchEngine
Defined in:
app/search_engines/bento_search/google_books_engine.rb

Overview

developers.google.com/books/docs/v1/using developers.google.com/books/docs/v1/reference/volumes#resource

Configuration :api_key STRONGLY recommended, or google will severely rate-limit you.

Custom Data

GBS API’s “viewability” value is stored at item.custom_data PARTIAL, ALL_PAGES, NO_PAGES or UNKNOWN. developers.google.com/books/docs/v1/reference/volumes#resource

#link_is_fulltext? is also set appropriately.

You may want to use a custom decorator to display the viewability status somehow (in display_format? In an other_link?). See wiki for info on decorators.

Constant Summary

Constants included from SearchEngine

SearchEngine::DefaultPerPage

Instance Method Summary collapse

Methods included from HTTPClientPatch::IncludeClient

include_http_client

Methods included from SearchEngine

#display_configuration, #engine_id, #fill_in_search_metadata_for, #initialize, #normalized_search_arguments, #public_settable_search_args, #search

Methods included from SearchEngine::Capabilities

#search_keys, #semantic_search_keys, #semantic_search_map, #sort_keys

Instance Method Details

#get(id) ⇒ Object

Look up a single item by #unique_id. Returns a single item, or raises BentoSearch::NotFound, BentoSearch::TooManyFound, or other.



88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# File 'app/search_engines/bento_search/google_books_engine.rb', line 88

def get(id)
  # Have to use different API endpoint, can't do a fielded search.
  url = base_url + "volumes/#{CGI.escape id}"

  if configuration.api_key
    url += "?key=#{configuration.api_key}"
  end

  response = http_client.get( url )

  if response.status == 404
    raise BentoSearch::NotFound.new("ID: #{id}")
  end

  # GBS has switched to returning a 503 for bad id's???
  # Prob a bug on Google's end, but we have to deal with it.
  if response.status == 503
    raise BentoSearch::NotFound.new("ID: #{id} (503 error from Google, tests show indicates not found ID however)")
  end

  json = MultiJson.load( response.body )

  if json["error"]
    raise Exception.new("Error in get(#{id}): #{json['error'].inspect}")
  end

  return hash_to_item(json)
end

#hash_to_item(item_response) ⇒ Object

take a hash from Google json response, representing a single item, return a ResultItem obj. Used internally.



119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
# File 'app/search_engines/bento_search/google_books_engine.rb', line 119

def hash_to_item(item_response)
  v_info = item_response["volumeInfo"] || {}

  item = ResultItem.new

  item.unique_id             = item_response["id"]

  item.title          = format_title(v_info)
  item.publisher      = v_info["publisher"]
  # previewLink gives you your search results highlighted, preferable
  # if it exists.
  item.link           = v_info["previewLink"] || v_info["canonicalVolumeLink"]
  item.abstract       = sanitize v_info["description"]
  item.year           = get_year v_info["publishedDate"]
  # sometimes we have yyyy-mm, but we need a date to make a ruby Date,
  # we'll just say the 1st.
  item.publication_date = case v_info["publishedDate"]
    when /(\d\d\d\d)-(\d\d)/ then Date.parse "#{$1}-#{$2}-01"
    when /(\d\d\d\d)-(\d\d)-(\d\d)/ then Date.parse v_info["published_date"]
    else nil
  end


  item.format         = if v_info["printType"] == "MAGAZINE"
                        :serial
                      else
                        "Book"
                      end



  item.language_code  = v_info["language"]

  (v_info["authors"] || []).each do |author_name|
    item.authors << Author.new(:display => author_name)
  end

  # Find ISBN's, prefer ISBN-13
  item.isbn           = (v_info["industryIdentifiers"] || []).find {|node| node["type"] == "ISBN_13"}.try {|node| node["identifier"]}
  unless item.isbn
    # Look for ISBN-10 okay
    item.isbn         = (v_info["industryIdentifiers"] || []).find {|node| node["type"] == "ISBN_10"}.try {|node| node["identifier"]}
  end


  # only VERY occasionally does a GBS hit have an OCLC number, but let's look
  # just in case.
  item.oclcnum        = (v_info["industryIdentifiers"] || []).
    find {|node| node["type"] == "OTHER" && node["identifier"].starts_with?("OCLC:") }.
    try do |node|
      node =~ /OCLC:(.*)/ ? $1 : nil
    end

  # save viewability status in custom_data. PARTIAL, ALL_PAGES, NO_PAGES or UNKNOWN.
  # https://developers.google.com/books/docs/v1/reference/volumes#resource
  item.custom_data[:viewability] = item_response["accessInfo"].try {|h| h["viewability"]}
  item.link_is_fulltext = (item.custom_data[:viewability] == "ALL_PAGES") if item.custom_data[:viewability]

  return item
end

#max_per_pageObject

BentoBox::SearchEngine API



187
188
189
# File 'app/search_engines/bento_search/google_books_engine.rb', line 187

def max_per_page
  100
end

#multi_field_search?Boolean

Returns:

  • (Boolean)


208
209
210
# File 'app/search_engines/bento_search/google_books_engine.rb', line 208

def multi_field_search?
  true
end

#search_field_definitionsObject



191
192
193
194
195
196
197
198
199
# File 'app/search_engines/bento_search/google_books_engine.rb', line 191

def search_field_definitions
  { nil           => {:semantic => :general},
    "intitle"     => {:semantic => :title},
    "inauthor"    => {:semantic => :author},
    "inpublisher" => {:semantic => :publisher},
    "subject"     => {:semantic => :subject},
    "isbn"        => {:semantic => :isbn}
  }
end

#search_implementation(arguments) ⇒ Object



41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# File 'app/search_engines/bento_search/google_books_engine.rb', line 41

def search_implementation(arguments)
  query_url = args_to_search_url(arguments)

  results = Results.new

  begin
    response = http_client.get(query_url )
    json = MultiJson.load( response.body )
    # Can't rescue everything, or we catch VCR errors, making
    # things confusing.
  rescue BentoSearch::RubyTimeoutClass, HTTPClient::TimeoutError,
        HTTPClient::ConfigurationError, HTTPClient::BadResponseError  => e
    results.error ||= {}
    results.error[:exception] = e
  end

  # Trap json parse error, but also check for bad http
  # status, or error reported in the json. In any of those cases
  # return results obj with error status.
  #
  if ( response.nil? || json.nil? ||
      (! HTTP::Status.successful? response.status) ||
      (json && json["error"]))

   results.error ||= {}
   results.error[:status] = response.status if response
   if json && json["error"] && json["error"]["errors"] && json["error"]["errors"].kind_of?(Array)
     results.error[:message] = json["error"]["errors"].first.values.join(", ")
   end
   results.error[:error_info] = json["error"] if json && json.respond_to?("[]")

   # escape early!
   return results
  end


  results.total_items = json["totalItems"]

  (json["items"] || []).each do |item_response|
    results <<  hash_to_item(item_response)
  end

  return results
end

#sort_definitionsObject



201
202
203
204
205
206
# File 'app/search_engines/bento_search/google_books_engine.rb', line 201

def sort_definitions
  {
    "relevance" => {:implementation => nil}, # default
    "date_desc" => {:implementation => "newest"}
  }
end