Class: Rospatent::Client

Inherits:
Object
  • Object
show all
Includes:
InputValidator
Defined in:
lib/rospatent/client.rb

Overview

Main client for interacting with the Rospatent API

Instance Method Summary collapse

Methods included from InputValidator

#validate_array, #validate_date, #validate_enum, #validate_hash, #validate_params, #validate_patent_id, #validate_positive_integer, #validate_required_date, #validate_required_string, #validate_string, #validate_string_enum, #validate_string_or_array, #validate_text_with_word_count

Constructor Details

#initialize(token: nil, logger: nil, cache: nil) ⇒ Client

Create a new client instance

Parameters:

  • token (String) (defaults to: nil)

    JWT token for authentication (optional if set in configuration)

  • logger (Rospatent::Logger) (defaults to: nil)

    Custom logger instance (optional)

  • cache (Rospatent::Cache) (defaults to: nil)

    Custom cache instance (optional)

Raises:



19
20
21
22
23
24
25
26
27
28
29
30
31
32
# File 'lib/rospatent/client.rb', line 19

def initialize(token: nil, logger: nil, cache: nil)
  @token = token || Rospatent.configuration.token
  raise Errors::MissingTokenError, "API token is required" unless @token

  # Initialize logger
  @logger = logger || create_logger

  # Initialize cache
  @cache = cache || create_cache

  # Track request metrics
  @request_count = 0
  @total_duration = 0.0
end

Instance Method Details

#batch_patents(document_ids, batch_size: 10) ⇒ Enumerator

Batch process multiple patents

Parameters:

  • document_ids (Array<String>)

    Array of document IDs

  • batch_size (Integer) (defaults to: 10)

    Number of patents to process concurrently

Returns:

  • (Enumerator)

    Enumerator that yields patent documents



482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
# File 'lib/rospatent/client.rb', line 482

def batch_patents(document_ids, batch_size: 10)
  return enum_for(:batch_patents, document_ids, batch_size: batch_size) unless block_given?

  validate_array(document_ids, "document_ids", max_size: Rospatent.configuration.validation_limits[:batch_ids_max_size])
  validated_batch_size = validate_positive_integer(batch_size, "batch_size", max_value: Rospatent.configuration.validation_limits[:batch_size_max_value])

  document_ids.each_slice(validated_batch_size) do |batch|
    threads = batch.map do |doc_id|
      Thread.new do
        patent(doc_id)
      rescue StandardError => e
        @logger.log_error(e, { document_id: doc_id, operation: "batch_patents" })
        { error: e.message, document_id: doc_id }
      end
    end

    threads.each { |thread| yield thread.value }
  end
end

#classification_code(classifier_id, code:, lang: "ru") ⇒ Hash

Get detailed information about a specific classification code

Examples:

Get information about IPC code

info = client.classification_code("ipc", code: "F02K9/00", lang: "ru")

Parameters:

  • classifier_id (String)

    Classification system identifier (“ipc” or “cpc”)

  • code (String)

    Classification code to look up

  • lang (String) (defaults to: "ru")

    Language for the description (“ru” or “en”)

Returns:

  • (Hash)

    Detailed information about the classification code

Raises:



378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
# File 'lib/rospatent/client.rb', line 378

def classification_code(classifier_id, code:, lang: "ru")
  # Validate inputs
  validated_classifier = validate_enum(classifier_id, %w[ipc cpc], "classifier_id").to_s
  validated_code = validate_string(code, "code", max_length: Rospatent.configuration.validation_limits[:classification_code_max_length])
  validated_lang = validate_enum(lang, %w[ru en], "lang").to_s

  # Check cache first
  cache_key = "classification:code:#{validated_classifier}:#{validated_code}:#{validated_lang}"
  cached_result = @cache.get(cache_key)
  if cached_result
    @logger.log_cache("hit", cache_key)
    return cached_result
  end

  @logger.log_cache("miss", cache_key)

  # Build the payload
  payload = {
    code: validated_code,
    lang: validated_lang
  }

  # Make a POST request to the classification code endpoint
  result = post("/patsearch/v0.2/classification/#{validated_classifier}/code/", payload)

  # Cache the result for longer since classification codes don't change often
  @cache.set(cache_key, result, ttl: 3600) # Cache for 1 hour
  @logger.log_cache("set", cache_key, ttl: 3600)

  result
end

#classification_search(classifier_id, query:, lang: "ru") ⇒ Hash

Search within a classification system (IPC or CPC) using natural language

Examples:

Search for rocket-related IPC codes

results = client.classification_search("ipc", query: "ракета", lang: "ru")

Parameters:

  • classifier_id (String)

    Classification system identifier (“ipc” or “cpc”)

  • query (String)

    Search query in natural language

  • lang (String) (defaults to: "ru")

    Language for the search (“ru” or “en”)

Returns:

  • (Hash)

    Search results containing classification codes and descriptions

Raises:



337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
# File 'lib/rospatent/client.rb', line 337

def classification_search(classifier_id, query:, lang: "ru")
  # Validate inputs
  validated_classifier = validate_enum(classifier_id, %w[ipc cpc], "classifier_id").to_s
  validated_query = validate_string(query, "query", max_length: Rospatent.configuration.validation_limits[:classification_query_max_length])
  validated_lang = validate_enum(lang, %w[ru en], "lang").to_s

  # Check cache first
  cache_key = "classification:search:#{validated_classifier}:" \
              "#{validated_query}:#{validated_lang}"
  cached_result = @cache.get(cache_key)
  if cached_result
    @logger.log_cache("hit", cache_key)
    return cached_result
  end

  @logger.log_cache("miss", cache_key)

  # Build the payload
  payload = {
    query: validated_query,
    lang: validated_lang
  }

  # Make a POST request to the classification search endpoint
  result = post("/patsearch/v0.2/classification/#{validated_classifier}/search/", payload)

  # Cache the result
  @cache.set(cache_key, result, ttl: 1800) # Cache for 30 minutes
  @logger.log_cache("set", cache_key, ttl: 1800)

  result
end

#datasets_treeArray<Hash>

Get the list of available search datasets (collections)

Returns:

  • (Array<Hash>)

    List of available datasets organized in a tree structure



190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
# File 'lib/rospatent/client.rb', line 190

def datasets_tree
  # Check cache first
  cache_key = "datasets:tree"
  cached_result = @cache.get(cache_key)
  if cached_result
    @logger.log_cache("hit", cache_key)
    return cached_result
  end

  @logger.log_cache("miss", cache_key)

  # Make the API request
  result = get("/patsearch/v0.2/datasets/tree", {})

  # Cache the result for longer since datasets don't change often
  @cache.set(cache_key, result, ttl: 3600) # Cache for 1 hour
  @logger.log_cache("set", cache_key, ttl: 3600)

  result
end

#get(endpoint, params = {}, binary: false) ⇒ Hash, String

Execute a GET request to the API

Parameters:

  • endpoint (String)

    API endpoint

  • params (Hash) (defaults to: {})

    Query parameters (optional)

  • binary (Boolean) (defaults to: false)

    Whether to expect binary response (default: false)

Returns:

  • (Hash, String)

    Response data (Hash for JSON, String for binary)



415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
# File 'lib/rospatent/client.rb', line 415

def get(endpoint, params = {}, binary: false)
  start_time = Time.now
  request_id = generate_request_id

  @logger.log_request("GET", endpoint, params, connection.headers)
  @request_count += 1

  response = connection.get(endpoint, params) do |req|
    if binary
      req.headers["Accept"] = "*/*"
    else
      req.headers["Accept"] = "application/json"
      req.headers["Content-Type"] = "application/json"
    end
    req.headers["X-Request-ID"] = request_id
  end

  duration = Time.now - start_time
  @total_duration += duration

  @logger.log_response("GET", endpoint, response.status, duration,
                       response_size: response.body&.bytesize, request_id: request_id)

  if binary
    handle_binary_response(response, request_id)
  else
    handle_response(response, request_id)
  end
rescue Faraday::Error => e
  @logger.log_error(e, { endpoint: endpoint, params: params, request_id: request_id })
  handle_error(e)
end

#parse_abstract(patent_data, format: :text, language: "ru") ⇒ String?

Extract and parse the abstract content from a patent document Delegates to PatentParser.parse_abstract

Examples:

Get plain text abstract

abstract = client.parse_abstract(patent_doc)

Get HTML abstract in English

abstract_html = client.parse_abstract(patent_doc, format: :html, language: "en")

Parameters:

  • patent_data (Hash)

    The patent document data returned by #patent method

  • format (Symbol) (defaults to: :text)

    The desired output format (:text or :html)

  • language (String) (defaults to: "ru")

    The language code (e.g., “ru”, “en”)

Returns:

  • (String, nil)

    The parsed abstract content in the requested format or nil if not found



300
301
302
303
304
305
306
# File 'lib/rospatent/client.rb', line 300

def parse_abstract(patent_data, format: :text, language: "ru")
  # Validate inputs
  validate_enum(format, %i[text html], "format")
  validate_string(language, "language", max_length: 5) if language

  PatentParser.parse_abstract(patent_data, format: format, language: language)
end

#parse_description(patent_data, format: :text, language: "ru") ⇒ String, ...

Extract and parse the description content from a patent document Delegates to PatentParser.parse_description

Examples:

Get plain text description

description = client.parse_description(patent_doc)

Get HTML description

description_html = client.parse_description(patent_doc, format: :html)

Get description split into sections

sections = client.parse_description(patent_doc, format: :sections)

Parameters:

  • patent_data (Hash)

    The patent document data returned by #patent method

  • format (Symbol) (defaults to: :text)

    The desired output format (:text, :html, or :sections)

  • language (String) (defaults to: "ru")

    The language code (e.g., “ru”, “en”)

Returns:

  • (String, Array, nil)

    The parsed description content in the requested format or nil if not found



321
322
323
324
325
326
327
# File 'lib/rospatent/client.rb', line 321

def parse_description(patent_data, format: :text, language: "ru")
  # Validate inputs
  validate_enum(format, %i[text html sections], "format")
  validate_string(language, "language", max_length: 5) if language

  PatentParser.parse_description(patent_data, format: format, language: language)
end

#patent(document_id) ⇒ Hash

Fetch a specific patent by its document ID using dedicated endpoint The document_id must follow one of these formats:

  • Published documents: codenumbertype code_ date YYYYMMDD Example: RU134694U1_20131120

  • Unpublished applications: codenumbertype code_ date YYYYMMDD

Parameters:

  • document_id (String)

    The document ID to retrieve

Returns:

  • (Hash)

    The patent document data

Raises:



54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# File 'lib/rospatent/client.rb', line 54

def patent(document_id)
  # Validate input
  validated_id = validate_patent_id(document_id)

  # Check cache first
  cache_key = "patent:#{validated_id}"
  cached_result = @cache.get(cache_key)
  if cached_result
    @logger.log_cache("hit", cache_key)
    return cached_result
  end

  @logger.log_cache("miss", cache_key)

  # Make a GET request to the docs endpoint
  result = get("/patsearch/v0.2/docs/#{validated_id}")

  # Cache the result
  @cache.set(cache_key, result, ttl: 3600) # Cache patents for 1 hour
  @logger.log_cache("set", cache_key, ttl: 3600)

  result
end

#patent_by_components(country_code, number, doc_type, date) ⇒ Hash

Retrieve document by document components

Parameters:

  • country_code (String)

    Country code (e.g., “RU”)

  • number (String)

    Patent number

  • doc_type (String)

    Document type (e.g., “A1”)

  • date (String, Date)

    Publication date

Returns:

  • (Hash)

    Document data

Raises:



85
86
87
88
89
90
91
92
93
94
95
96
# File 'lib/rospatent/client.rb', line 85

def patent_by_components(country_code, number, doc_type, date)
  # Validate and normalize inputs
  validated_country = validate_string(country_code, "country_code", max_length: 2)
  validated_number = validate_string(number, "number")
  validated_doc_type = validate_string(doc_type, "doc_type", max_length: 3)
  validated_date = validate_date(date, "date")

  formatted_date = validated_date.strftime("%Y%m%d")
  document_id = "#{validated_country}#{validated_number}#{validated_doc_type}_#{formatted_date}"

  patent(document_id)
end

#patent_media(collection_id, country_code, doc_type, pub_date, pub_number, filename = nil) ⇒ String

Retrieve media data (PDF, images, 3D objects) for a patent document

Examples:

Retrieve and save a PDF with auto-generated filename

pdf_data = client.patent_media("National", "RU", "U1", "2013/11/20", "134694")
client.save_binary_file(pdf_data, "patent.pdf")

Retrieve and save a specific file

pdf_data = client.patent_media("National", "RU", "U1", "2013/11/20", "134694", "document.pdf")
client.save_binary_file(pdf_data, "patent.pdf")

Parameters:

  • collection_id (String)

    Dataset/collection identifier (e.g., “National”)

  • country_code (String)

    Country code of publication (e.g., “RU”)

  • doc_type (String)

    Document type code (e.g., “U1”)

  • pub_date (String, Date)

    Publication date in format YYYY/MM/DD

  • pub_number (String)

    Publication number

  • filename (String, nil) (defaults to: nil)

    Media file name (optional, defaults to “<formatted_number>.pdf”)

Returns:

  • (String)

    Binary content with ASCII-8BIT encoding

Raises:



226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
# File 'lib/rospatent/client.rb', line 226

def patent_media(collection_id, country_code, doc_type, pub_date, pub_number,
                 filename = nil)
  # Validate and normalize inputs
  validated_collection = validate_required_string(collection_id, "collection_id")
  validated_country = validate_required_string(country_code, "country_code", max_length: 2)
  validated_doc_type = validate_required_string(doc_type, "doc_type", max_length: 3)
  validated_date = validate_required_date(pub_date, "pub_date")
  validated_number = validate_required_string(pub_number, "pub_number")

  # Format publication date
  formatted_date = validated_date.strftime("%Y/%m/%d")

  # Format publication number with appropriate padding
  formatted_number = format_publication_number(validated_number, validated_country)

  # Generate default filename if not provided
  validated_filename = if filename.nil?
                         "#{formatted_number}.pdf"
                       else
                         validate_required_string(filename, "filename")
                       end

  # Construct the path
  path = "/media/#{validated_collection}/#{validated_country}/" \
         "#{validated_doc_type}/#{formatted_date}/#{formatted_number}/" \
         "#{validated_filename}"

  # Get binary data
  get(path, {}, binary: true)
end

#patent_media_by_id(document_id, collection_id, filename = nil) ⇒ String

Retrieve media using simplified patent ID format

Examples:

Retrieve and save a PDF with auto-generated filename

pdf_data = client.patent_media_by_id("RU134694U1_20131120", "National")
client.save_binary_file(pdf_data, "patent.pdf")

Retrieve and save a specific file

pdf_data = client.patent_media_by_id("RU134694U1_20131120", "National", "document.pdf")
client.save_binary_file(pdf_data, "patent.pdf")

Parameters:

  • document_id (String)

    Patent document ID (e.g., “RU134694U1_20131120”)

  • collection_id (String)

    Collection identifier (e.g., “National”)

  • filename (String, nil) (defaults to: nil)

    Filename to retrieve (optional, defaults to “<formatted_number>.pdf”)

Returns:

  • (String)

    Binary content with ASCII-8BIT encoding

Raises:



270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
# File 'lib/rospatent/client.rb', line 270

def patent_media_by_id(document_id, collection_id, filename = nil)
  # Validate inputs
  validated_id = validate_patent_id(document_id)
  validated_collection = validate_required_string(collection_id, "collection_id")

  # Validate filename if provided
  validated_filename = filename ? validate_required_string(filename, "filename") : nil

  # Parse the patent ID to extract components
  id_parts = parse_patent_id(validated_id)

  # Format the date from YYYYMMDD to YYYY/MM/DD
  formatted_date = id_parts[:date].gsub(/^(\d{4})(\d{2})(\d{2})$/, '\1/\2/\3')

  # Call the base method with extracted components
  # If no filename provided, patent_media will generate default using format_publication_number
  patent_media(validated_collection, id_parts[:country_code], id_parts[:doc_type],
               formatted_date, id_parts[:number], validated_filename)
end

#post(endpoint, payload) ⇒ Hash

Execute a POST request to the API

Parameters:

  • endpoint (String)

    API endpoint

  • payload (Hash)

    Request payload

Returns:

  • (Hash)

    Response data



452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
# File 'lib/rospatent/client.rb', line 452

def post(endpoint, payload)
  start_time = Time.now
  request_id = generate_request_id

  @logger.log_request("POST", endpoint, payload, connection.headers)
  @request_count += 1

  response = connection.post(endpoint) do |req|
    req.headers["Accept"] = "application/json"
    req.headers["Content-Type"] = "application/json"
    req.headers["X-Request-ID"] = request_id
    req.body = payload.to_json
  end

  duration = Time.now - start_time
  @total_duration += duration

  @logger.log_response("POST", endpoint, response.status, duration,
                       response_size: response.body&.bytesize, request_id: request_id)

  handle_response(response, request_id)
rescue Faraday::Error => e
  @logger.log_error(e, { endpoint: endpoint, payload: payload, request_id: request_id })
  handle_error(e)
end

#save_binary_file(binary_data, file_path) ⇒ Integer

Save binary data to a file with proper encoding handling This method ensures that binary data (PDFs, images, etc.) is written correctly

Examples:

Save a PDF file with auto-generated filename

pdf_data = client.patent_media_by_id("RU134694U1_20131120", "National")
client.save_binary_file(pdf_data, "patent.pdf")

Save a specific file

pdf_data = client.patent_media_by_id("RU134694U1_20131120", "National", "document.pdf")
client.save_binary_file(pdf_data, "patent.pdf")

Parameters:

  • binary_data (String)

    Binary data returned from patent_media methods

  • file_path (String)

    Path where to save the file

Returns:

  • (Integer)

    Number of bytes written

Raises:

  • (SystemCallError)

    If file cannot be written



529
530
531
532
533
534
535
536
537
538
# File 'lib/rospatent/client.rb', line 529

def save_binary_file(binary_data, file_path)
  validate_required_string(binary_data, "binary_data")
  validate_required_string(file_path, "file_path")

  # Ensure data is properly encoded as binary
  data_to_write = binary_data.dup.force_encoding(Encoding::ASCII_8BIT)

  # Write in binary mode to prevent any encoding conversions
  File.binwrite(file_path, data_to_write)
end

#search(**params) ⇒ Rospatent::SearchResult

Execute a search against the Rospatent API

Parameters:

  • params (Hash)

    Search parameters

Returns:



37
38
39
40
# File 'lib/rospatent/client.rb', line 37

def search(**params)
  # Validation is now handled by Search class to avoid duplication
  Search.new(self).execute(**params)
end

#similar_patents_by_id(document_id, count: 100) ⇒ Hash

Find patents similar to a given document ID This method uses the Rospatent API’s similar search endpoint to find patents similar to the given document ID. The document ID should be in the format ‘XX12345Y1_YYYYMMDD’, where ‘XX’ is the country code, ‘12345’ is the publication number, ‘Y1’ is the document type, and ‘YYYYMMDD’ is the publication date.

The method returns a hash containing the similar search results, which includes the patent IDs, titles, and other relevant information.

If the document ID is not provided, the method raises an InvalidRequestError. If the API request fails, the method raises an ApiError.

Parameters:

  • document_id (String)

    The document ID to find similar patents to

  • count (Integer) (defaults to: 100)

    Maximum number of results to return (default: 100)

Returns:

  • (Hash)

    The similar search results

Raises:



116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# File 'lib/rospatent/client.rb', line 116

def similar_patents_by_id(document_id, count: 100)
  # Validate inputs
  validated_id = validate_patent_id(document_id)
  validated_count = validate_positive_integer(count, "count", max_value: Rospatent.configuration.validation_limits[:similar_count_max_value])

  # Check cache first
  cache_key = "similar:id:#{validated_id}:#{validated_count}"
  cached_result = @cache.get(cache_key)
  if cached_result
    @logger.log_cache("hit", cache_key)
    return cached_result
  end

  @logger.log_cache("miss", cache_key)

  # Build the payload according to API spec
  payload = {
    type_search: "id_search",
    pat_id: validated_id,
    count: validated_count
  }

  # Make the API request with redirect handling
  result = post_with_redirects("/patsearch/v0.2/similar_search", payload)

  # Cache the result
  @cache.set(cache_key, result, ttl: 1800) # Cache for 30 minutes
  @logger.log_cache("set", cache_key, ttl: 1800)

  result
end

#similar_patents_by_text(text, count: 100) ⇒ Hash

Find patents similar to a given text

Parameters:

  • text (String)

    The text to find similar patents to (minimum 50 words required)

  • count (Integer) (defaults to: 100)

    Maximum number of results to return (default: 100)

Returns:

  • (Hash)

    The similar search results

Raises:



153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
# File 'lib/rospatent/client.rb', line 153

def similar_patents_by_text(text, count: 100)
  # Validate inputs - text must have at least 50 words for the API
          validated_text = validate_text_with_word_count(text, "search_text",
                                                    min_words: Rospatent.configuration.validation_limits[:similar_text_min_words],
                                                    max_length: Rospatent.configuration.validation_limits[:similar_text_max_length])
    validated_count = validate_positive_integer(count, "count", max_value: Rospatent.configuration.validation_limits[:similar_count_max_value])

  # Check cache first (using hash of text for key)
  text_hash = validated_text.hash.abs.to_s(16)
  cache_key = "similar:text:#{text_hash}:#{validated_count}"
  cached_result = @cache.get(cache_key)
  if cached_result
    @logger.log_cache("hit", cache_key)
    return cached_result
  end

  @logger.log_cache("miss", cache_key)

  # Build the payload according to API spec
  payload = {
    type_search: "text_search",
    pat_text: validated_text,
    count: validated_count
  }

  # Make the API request with redirect handling
  result = post_with_redirects("/patsearch/v0.2/similar_search", payload)

  # Cache the result
  @cache.set(cache_key, result, ttl: 1800) # Cache for 30 minutes
  @logger.log_cache("set", cache_key, ttl: 1800)

  result
end

#statisticsHash

Get client statistics

Returns:

  • (Hash)

    Client usage statistics



504
505
506
507
508
509
510
511
512
513
514
515
# File 'lib/rospatent/client.rb', line 504

def statistics
  {
    requests_made: @request_count,
    total_duration_seconds: @total_duration.round(3),
    average_request_time: if @request_count.positive?
                            (@total_duration / @request_count).round(3)
                          else
                            0
                          end,
    cache_stats: @cache.statistics
  }
end