Class: AthenaUDF::BaseUDF
- Inherits:
-
Object
- Object
- AthenaUDF::BaseUDF
- Includes:
- Utils
- Defined in:
- lib/athena-udf/base_udf.rb
Instance Attribute Summary collapse
-
#logger ⇒ Object
readonly
Returns the value of attribute logger.
Class Method Summary collapse
Instance Method Summary collapse
- #handle_athena_record(input_schema, output_schema, records) ⇒ Object
- #handle_ping(event) ⇒ Object
- #handle_udf_request(event) ⇒ Object
-
#initialize(event:, context:) ⇒ BaseUDF
constructor
rubocop:disable Lint/UnusedMethodArgument.
Methods included from Utils
#get_record_batch_bytes, #get_record_batch_index, #get_schema_bytes, #read_record_batches, #read_schema
Constructor Details
#initialize(event:, context:) ⇒ BaseUDF
rubocop:disable Lint/UnusedMethodArgument
33 34 35 36 |
# File 'lib/athena-udf/base_udf.rb', line 33 def initialize(event:, context:) # rubocop:disable Lint/UnusedMethodArgument @logger = Logger.new($stdout) @logger.level = Logger.const_get(ENV.fetch('LOG_LEVEL', 'WARN').upcase) end |
Instance Attribute Details
#logger ⇒ Object (readonly)
Returns the value of attribute logger.
14 15 16 |
# File 'lib/athena-udf/base_udf.rb', line 14 def logger @logger end |
Class Method Details
.capabilities ⇒ Object
29 30 31 |
# File 'lib/athena-udf/base_udf.rb', line 29 def self.capabilities 1 end |
.lambda_handler(event:, context:) ⇒ Object
16 17 18 19 20 21 22 23 24 25 26 |
# File 'lib/athena-udf/base_udf.rb', line 16 def self.lambda_handler(event:, context:) instance = new(event:, context:) incoming_type = event['@type'] if incoming_type == 'PingRequest' return instance.handle_ping(event) elsif incoming_type == 'UserDefinedFunctionRequest' return instance.handle_udf_request(event) end raise "Unknown event type #{incoming_type} from Athena" end |
Instance Method Details
#handle_athena_record(input_schema, output_schema, records) ⇒ Object
79 80 81 |
# File 'lib/athena-udf/base_udf.rb', line 79 def handle_athena_record(input_schema, output_schema, records) raise NotImplementedError end |
#handle_ping(event) ⇒ Object
38 39 40 41 42 43 44 45 46 |
# File 'lib/athena-udf/base_udf.rb', line 38 def handle_ping(event) { '@type' => 'PingResponse', 'catalogName' => 'event', 'queryId' => event['queryId'], 'sourceType' => 'athena_udf', 'capabilities' => self.class.capabilities, } end |
#handle_udf_request(event) ⇒ Object
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
# File 'lib/athena-udf/base_udf.rb', line 48 def handle_udf_request(event) # Cannot find a way to write Arrow::RecordBatch to a buffer directly in Ruby. output_schema = read_schema(Base64.decode64(event['outputSchema']['schema'])) output_builder = Arrow::RecordBatchBuilder.new(output_schema) input_schema_data = Base64.decode64(event['inputRecords']['schema']) input_records_data = Base64.decode64(event['inputRecords']['records']) read_record_batches(input_schema_data, input_records_data) do |input_schema, record_batch| logger.info("Processing #{record_batch.size} records") output_builder.append_records( record_batch.each_record.map do |record| handle_athena_record(input_schema, output_schema, record) end, ) end output_record_batch = output_builder.flush output_records_bytes = get_record_batch_bytes(output_schema, output_record_batch) { '@type' => 'UserDefinedFunctionResponse', 'methodName' => event['methodName'], 'records' => { 'aId' => SecureRandom.uuid, 'schema' => event['outputSchema']['schema'], 'records' => Base64.strict_encode64(output_records_bytes), }, } end |