Class: Webhookdb::Replicator::AwsPricingV1::ServiceBackfiller::UrlStreamer

Inherits:
Object
  • Object
show all
Defined in:
lib/webhookdb/replicator/aws_pricing_v1.rb

Instance Method Summary collapse

Constructor Details

#initialize(replicator, arns) ⇒ UrlStreamer

Returns a new instance of UrlStreamer.



239
240
241
242
# File 'lib/webhookdb/replicator/aws_pricing_v1.rb', line 239

def initialize(replicator, arns)
  @arns = arns
  @replicator = replicator
end

Instance Method Details

#eachObject



244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
# File 'lib/webhookdb/replicator/aws_pricing_v1.rb', line 244

def each
  @arns.each do |price_list_arn|
    url_req = @replicator.with_pricing_client do |client|
      client.build_request(
        :get_price_list_file_url,
        {file_format: "json", price_list_arn:},
      )
    end
    url_resp = url_req.send_request({})
    tmp = Tempfile.new("awspricing", binmode: true)
    # This file can be enormous- 200+ mb of JSON. We CANNOT just parse this directly.
    # Even loading it is too much. We write it directly to a file, then process the file.
    # This is a confusing algorithm. Instead of treating it like JSON, we treat it
    # sort of like a text format:
    # - look for known keys at the start of the file
    # - once we hit 'products', we start collecting lines.
    # - once we hit 'terms', we parse 'products', then keep going for terms.
    # - each /^    "/ is a term type, like 'OnDemand' or 'Reserved'
    # - each /^      "/ is a product id and the start of its rates
    # - Grab the lines until /^      }/ and process it as the product rates.
    #   Grab the product from the line offsets.
    Webhookdb::Http.get(url_resp.url, logger: @replicator.logger, stream_body: true,
                                      timeout: Webhookdb::AWS.http_timeout,) do |fragment|
      tmp.write(fragment)
    end
    tmp.flush
    tmp.seek(0)
    flines = File.foreach(tmp.path)
    # Grab the first section of the file up to 'products', this is the metadata.
    meta_str = +""
    until (mline = flines.next) =~ /^ {2}"products"/
      meta_str << mline
    end
    meta_str << '"":""}'
    meta = Oj.load(meta_str)
    meta_str.clear
    publication_date = Time.parse(meta.fetch("publicationDate"))
    service_code = meta.fetch("offerCode")
    version = meta.fetch("version")

    # Grab all the products. This is like 170k of 5million, so is small enough for memory.
    products_str = +"{"
    until (pline = flines.next) =~ /^ {2}}/
      products_str << pline
    end
    products_str << "}"
    products = Oj.load(products_str)
    products_str.clear

    # Read the rest of the file.
    _terms = flines.next
    term_type = nil
    until (line = flines.next) == "}" # next has EOF problems and end the program/iterator
      # Look for 'OnDemand', 'Reserved', etc.
      start_of_term_type = line =~ /^ {4}"/
      if start_of_term_type
        term_type = line[/^ {4}"([A-Za-z0-9]+)"/, 1]
        next
      end
      start_of_product_and_term_map = line =~ /^ {6}"/
      next unless start_of_product_and_term_map
      # Look for the product SKU, which then lists all the terms (and rates).
      # 'ABC': {'ABC.DEF': {}, 'ABC.XYZ': {}}
      # Parse this entire set of product terms at once- it may be a few thousand lines,
      # which is probably faster anyway than trying to split it up.
      product_sku = line[/^ {6}"([A-Za-z0-9]+)"/, 1]
      term_map_str = +"{"
      until (tline = flines.next) =~ /^ {6}}/
        term_map_str << tline
      end
      term_map_str << "}"
      term_map = Oj.load(term_map_str)
      term_map_str.clear
      product = products.fetch(product_sku)
      product_family = product.fetch("productFamily", nil)
      product_attributes = product.fetch("attributes", {})
      product_group = product_attributes.fetch("group", nil)
      product_location = product_attributes.fetch("location", nil)
      product_region = Webhookdb::AWS::LOCATIONS_TO_REGIONS.fetch(product_location, product_location)
      product_operation = product_attributes.fetch("operation", nil)
      product_usagetype = product_attributes.fetch("usagetype", nil)
      term_map.each do |term_code, term|
        term["priceDimensions"].each do |rate_code, rate|
          price_per_unit_currency, price_per_unit_amount = rate.fetch("pricePerUnit").first
          rate = {
            product_sku:,
            product_family:,
            product_attributes:,
            product_group:,
            product_location:,
            product_region:,
            product_operation:,
            product_usagetype:,
            publication_date:,
            service_code:,
            version:,
            term_type:,
            term_code:,
            offer_term_code: term.fetch("offerTermCode"),
            effective_date: Time.parse(term.fetch("effectiveDate")),
            term_attributes: term.fetch("termAttributes", {}),
            rate_code:,
            applies_to: (applies_to = rate.fetch("appliesTo")) ? Sequel.pg_array(applies_to) : nil,
            begin_range: self.parse_range(rate.fetch("beginRange", nil)),
            description: rate.fetch("description"),
            end_range: self.parse_range(rate.fetch("endRange", nil)),
            unit: rate.fetch("unit", nil),
            price_per_unit_raw: rate.fetch("pricePerUnit"),
            price_per_unit_amount:,
            price_per_unit_currency:,
          }.stringify_keys
          yield rate
        end
      end
    end
    tmp.unlink
  end
end

#parse_range(value) ⇒ Object



363
364
365
366
367
# File 'lib/webhookdb/replicator/aws_pricing_v1.rb', line 363

def parse_range(value)
  return nil if value.nil?
  return nil if value == "Inf"
  return BigDecimal(value)
end