Class: Labkit::Logging::Sanitizer

Inherits:
Object
  • Object
show all
Defined in:
lib/labkit/logging/sanitizer.rb

Overview

Sanitizer provides log message sanitization, removing confidential information from log messages

Constant Summary collapse

SCP_URL_REGEXP =
%r{
(?:((?:[\-_.!~*()a-zA-Z\d;&=+$,]|%[a-fA-F\d]{2})+)(:(?:(?:[\-_.!~*()a-zA-Z\d;:&=+$,]|%[a-fA-F\d]{2})*))?@)        (?# 1: username, 2: password)
(?:((?:(?:[a-zA-Z0-9\-._])+|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}|\[(?:(?:[a-fA-F\d]{1,4}:)*(?:[a-fA-F\d]{1,4}|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})|(?:(?:[a-fA-F\d]{1,4}:)*[a-fA-F\d]{1,4})?::(?:(?:[a-fA-F\d]{1,4}:)*(?:[a-fA-F\d]{1,4}|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}))?)\]))) (?# 3: host)
:
((?:[\-_.!~*'()a-zA-Z\d:@&=+$,]|%[a-fA-F\d]{2})*(?:;(?:[\-_.!~*'()a-zA-Z\d:@&=+$,]|%[a-fA-F\d]{2})*)*(?:\/(?:[\-_.!~*'()a-zA-Z\d:@&=+$,]|%[a-fA-F\d]{2})*(?:;(?:[\-_.!~*'()a-zA-Z\d:@&=+$,]|%[a-fA-F\d]{2})*)*)*)? (?# 4: path)
}x.freeze
SCP_ANCHORED_URL_REGEXP =
/^#{SCP_URL_REGEXP}$/x.freeze
ALLOWED_SCHEMES =
%w[http https ssh git].freeze
URL_REGEXP =
URI::DEFAULT_PARSER.make_regexp(ALLOWED_SCHEMES).freeze

Class Method Summary collapse

Class Method Details

.mask_scp_url(scp_url) ⇒ Object

Ensures that URLs of the form user:password@hostname:project.git are sanitized to hide credentials



59
60
61
62
63
64
65
66
67
68
69
70
71
# File 'lib/labkit/logging/sanitizer.rb', line 59

def self.mask_scp_url(scp_url)
  scp_url = scp_url.to_s.strip
  m = SCP_ANCHORED_URL_REGEXP.match(scp_url)
  return "" unless m

  password = m[2]
  host = m[3]
  path = m[4]

  return "*****@#{host}:#{path}" unless password.present?

  "*****:*****@#{host}:#{path}"
end

.mask_url(url) ⇒ Object

Ensures that URLS are sanitized to hide credentials



38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# File 'lib/labkit/logging/sanitizer.rb', line 38

def self.mask_url(url)
  url = url.to_s.strip
  p = URI::DEFAULT_PARSER.parse(url)

  # After upgrading uri to 1.0.0, the DEFAULT_PARSER has been changed from rfc2396 to rfc3986,
  # and there are the following differences in parsing between rfc2396 and rfc3986.
  # e.g:
  #
  #     URI::RFC3986_PARSER.parse('ssh://') => #<URI::Generic ssh://>
  #     URI::RFC2396_PARSER.parse('ssh://') => raise URI::InvalidURIError
  return "" if p.host.empty?

  p.password = "*****" if p.password.present?
  p.user = "*****" if p.user.present?
  p.to_s
rescue URI::InvalidURIError
  ""
end

.sanitize_field(content) ⇒ Object



20
21
22
23
# File 'lib/labkit/logging/sanitizer.rb', line 20

def self.sanitize_field(content)
  content = content.gsub(URL_REGEXP) { |url| mask_url(url) }
  content.gsub(SCP_URL_REGEXP) { |scp_url| mask_scp_url(scp_url) }
end

.sanitize_sql(sql) ⇒ Object



25
26
27
28
29
# File 'lib/labkit/logging/sanitizer.rb', line 25

def self.sanitize_sql(sql)
  PgQuery.normalize(sql)
rescue PgQuery::ParseError
  ""
end

.sql_fingerprint(normalized_sql) ⇒ Object



31
32
33
34
35
# File 'lib/labkit/logging/sanitizer.rb', line 31

def self.sql_fingerprint(normalized_sql)
  PgQuery.parse(normalized_sql)&.fingerprint
rescue PgQuery::ParseError
  ""
end