Class: Gitlab::TopologyServiceClient::Metrics

Inherits:
Object
  • Object
show all
Extended by:
Utils::StrongMemoize
Defined in:
lib/gitlab/topology_service_client/metrics.rb

Overview

Instrumentation for topology service gRPC metrics Implements standard OpenTelemetry gRPC metrics as Prometheus equivalents

Constant Summary collapse

DURATION_BUCKETS =

Histogram buckets for duration measurements (seconds) Focuses on latency SLOs

[0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5].freeze
SIZE_BUCKETS =

Histogram buckets for size measurements (bytes) Uses exponential buckets for variable sizes

[100, 500, 1000, 5000, 10000, 50000, 100000, 500000, 1000000].freeze

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(cell_id:, topology_service_address:) ⇒ Metrics

Returns a new instance of Metrics.



81
82
83
84
# File 'lib/gitlab/topology_service_client/metrics.rb', line 81

def initialize(cell_id:, topology_service_address:)
  @cell_id = cell_id
  @topology_service_address = topology_service_address
end

Class Method Details

.failed_calls_total_counterObject



73
74
75
76
77
78
79
# File 'lib/gitlab/topology_service_client/metrics.rb', line 73

def self.failed_calls_total_counter
  strong_memoize(:failed_calls_total_counter) do
    metric(:topology_service_rpc_failed_calls_total,
      'Total number of failed RPC calls',
      type: :counter)
  end
end

.metric(name, desc, type:, buckets: nil, extra_labels: {}) ⇒ Object



18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# File 'lib/gitlab/topology_service_client/metrics.rb', line 18

def self.metric(name, desc, type:, buckets: nil, extra_labels: {})
  labels = {
    rpc_service: nil,
    rpc_method: nil,
    rpc_status: nil,
    rpc_system: 'grpc',
    cell_id: nil,
    topology_service_address: nil
  }.merge(extra_labels)

  case type
  when :histogram
    ::Gitlab::Metrics.histogram(name, desc, labels, buckets)
  when :counter
    ::Gitlab::Metrics.counter(name, desc, labels)
  else
    raise ArgumentError, "Unsupported metric type: #{type}"
  end
end

.request_size_histogramObject



47
48
49
50
51
52
53
54
# File 'lib/gitlab/topology_service_client/metrics.rb', line 47

def self.request_size_histogram
  strong_memoize(:request_size_histogram) do
    metric(:topology_service_rpc_request_size_bytes,
      'RPC request size in bytes',
      type: :histogram,
      buckets: SIZE_BUCKETS)
  end
end

.response_size_histogramObject



56
57
58
59
60
61
62
63
# File 'lib/gitlab/topology_service_client/metrics.rb', line 56

def self.response_size_histogram
  strong_memoize(:response_size_histogram) do
    metric(:topology_service_rpc_response_size_bytes,
      'RPC response size in bytes',
      type: :histogram,
      buckets: SIZE_BUCKETS)
  end
end

.rpc_calls_total_counterObject



65
66
67
68
69
70
71
# File 'lib/gitlab/topology_service_client/metrics.rb', line 65

def self.rpc_calls_total_counter
  strong_memoize(:rpc_calls_total_counter) do
    metric(:topology_service_rpc_calls_total,
      'Total number of RPC calls',
      type: :counter)
  end
end

.rpc_duration_histogramObject



38
39
40
41
42
43
44
45
# File 'lib/gitlab/topology_service_client/metrics.rb', line 38

def self.rpc_duration_histogram
  strong_memoize(:rpc_duration_histogram) do
    metric(:topology_service_rpc_duration_seconds,
      'RPC call duration in seconds',
      type: :histogram,
      buckets: DURATION_BUCKETS)
  end
end

Instance Method Details

#build_labels(service:, method:, status_code:) ⇒ Object

Build labels hash for metrics This method is public to allow interceptor to build labels once and reuse them



126
127
128
129
130
131
132
133
134
135
# File 'lib/gitlab/topology_service_client/metrics.rb', line 126

def build_labels(service:, method:, status_code:)
  {
    rpc_service: service,
    rpc_method: method,
    rpc_status: status_code_to_label(status_code),
    rpc_system: 'grpc',
    cell_id: cell_id,
    topology_service_address: topology_service_address
  }
end

#increment_failed_calls_total(labels:, error_type: nil) ⇒ Object

Increment failed RPC calls counter



116
117
118
119
120
121
122
# File 'lib/gitlab/topology_service_client/metrics.rb', line 116

def increment_failed_calls_total(labels:, error_type: nil)
  labels_with_error = labels.dup
  labels_with_error[:error_type] = error_type if error_type
  self.class.failed_calls_total_counter.increment(labels_with_error)
rescue StandardError => e
  log_metric_error('Failed to increment failed calls total', e)
end

#increment_rpc_calls_total(labels:) ⇒ Object

Increment total RPC calls counter



109
110
111
112
113
# File 'lib/gitlab/topology_service_client/metrics.rb', line 109

def increment_rpc_calls_total(labels:)
  self.class.rpc_calls_total_counter.increment(labels)
rescue StandardError => e
  log_metric_error('Failed to increment RPC calls total', e)
end

#observe_request_size(labels:, size_bytes:) ⇒ Object

Record RPC request size



95
96
97
98
99
# File 'lib/gitlab/topology_service_client/metrics.rb', line 95

def observe_request_size(labels:, size_bytes:)
  self.class.request_size_histogram.observe(labels, size_bytes)
rescue StandardError => e
  log_metric_error('Failed to observe request size', e)
end

#observe_response_size(labels:, size_bytes:) ⇒ Object

Record RPC response size



102
103
104
105
106
# File 'lib/gitlab/topology_service_client/metrics.rb', line 102

def observe_response_size(labels:, size_bytes:)
  self.class.response_size_histogram.observe(labels, size_bytes)
rescue StandardError => e
  log_metric_error('Failed to observe response size', e)
end

#observe_rpc_duration(labels:, duration_seconds:) ⇒ Object

Record RPC call duration



87
88
89
90
91
92
# File 'lib/gitlab/topology_service_client/metrics.rb', line 87

def observe_rpc_duration(labels:, duration_seconds:)
  self.class.rpc_duration_histogram.observe(labels, duration_seconds)
rescue StandardError => e
  # Gracefully handle metric recording failures without blocking gRPC calls
  log_metric_error('Failed to observe RPC duration', e)
end