Class: Avro::Schema

Inherits:
Object
  • Object
show all
Defined in:
lib/avro/schema.rb

Defined Under Namespace

Classes: ArraySchema, BytesSchema, EnumSchema, Field, FixedSchema, MapSchema, NamedSchema, PrimitiveSchema, RecordSchema, UnionSchema

Constant Summary collapse

PRIMITIVE_TYPES =

Sets of strings, for backwards compatibility. See below for sets of symbols, for better performance.

Set.new(%w[null boolean string bytes int long float double])
NAMED_TYPES =
Set.new(%w[fixed enum record error])
VALID_TYPES =
PRIMITIVE_TYPES + NAMED_TYPES + Set.new(%w[array map union request])
PRIMITIVE_TYPES_SYM =
Set.new(PRIMITIVE_TYPES.map(&:to_sym))
NAMED_TYPES_SYM =
Set.new(NAMED_TYPES.map(&:to_sym))
VALID_TYPES_SYM =
Set.new(VALID_TYPES.map(&:to_sym))
NAME_REGEX =
/^([A-Za-z_][A-Za-z0-9_]*)(\.([A-Za-z_][A-Za-z0-9_]*))*$/.freeze
INT_MIN_VALUE =
-(1 << 31)
INT_MAX_VALUE =
(1 << 31) - 1
LONG_MIN_VALUE =
-(1 << 63)
LONG_MAX_VALUE =
(1 << 63) - 1
DEFAULT_VALIDATE_OPTIONS =
{ recursive: true, encoded: false }.freeze
DECIMAL_LOGICAL_TYPE =
'decimal'
CRC_EMPTY =
0xc15d213aa4d7a795
SINGLE_OBJECT_MAGIC_NUMBER =
[0xC3, 0x01].freeze
@@fp_table =

The java library caches this value after initialized, so this pattern mimics that.

nil

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(type, logical_type = nil) ⇒ Schema

Returns a new instance of Schema.



126
127
128
129
130
# File 'lib/avro/schema.rb', line 126

def initialize(type, logical_type=nil)
  @type_sym = type.is_a?(Symbol) ? type : type.to_sym
  @logical_type = logical_type
  @type_adapter = nil
end

Instance Attribute Details

#logical_typeObject (readonly)

Returns the value of attribute logical_type.



133
134
135
# File 'lib/avro/schema.rb', line 133

def logical_type
  @logical_type
end

#type_symObject (readonly)

Returns the value of attribute type_sym.



132
133
134
# File 'lib/avro/schema.rb', line 132

def type_sym
  @type_sym
end

Class Method Details

.parse(json_string) ⇒ Object



44
45
46
# File 'lib/avro/schema.rb', line 44

def self.parse(json_string)
  real_parse(MultiJson.load(json_string), {})
end

.real_parse(json_obj, names = nil, default_namespace = nil) ⇒ Object

Build Avro Schema from data parsed out of JSON string.



49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# File 'lib/avro/schema.rb', line 49

def self.real_parse(json_obj, names=nil, default_namespace=nil)
  if json_obj.is_a? Hash
    type = json_obj['type']
    logical_type = json_obj['logicalType']
    raise SchemaParseError, %Q(No "type" property: #{json_obj}) if type.nil?

    # Check that the type is valid before calling #to_sym, since symbols are never garbage
    # collected (important to avoid DoS if we're accepting schemas from untrusted clients)
    unless VALID_TYPES.include?(type)
      raise SchemaParseError, "Unknown type: #{type}"
    end

    type_sym = type.to_sym
    if PRIMITIVE_TYPES_SYM.include?(type_sym)
      case type_sym
      when :bytes
        precision = json_obj['precision']
        scale = json_obj['scale']
        return BytesSchema.new(type_sym, logical_type, precision, scale)
      else
        return PrimitiveSchema.new(type_sym, logical_type)
      end
    elsif NAMED_TYPES_SYM.include? type_sym
      name = json_obj['name']
      if !Avro.disable_schema_name_validation && name !~ NAME_REGEX
        raise SchemaParseError, "Name #{name} is invalid for type #{type}!"
      end
      namespace = json_obj.include?('namespace') ? json_obj['namespace'] : default_namespace
      aliases = json_obj['aliases']
      case type_sym
      when :fixed
        size = json_obj['size']
        precision = json_obj['precision']
        scale = json_obj['scale']
        return FixedSchema.new(name, namespace, size, names, logical_type, aliases, precision, scale)
      when :enum
        symbols = json_obj['symbols']
        doc     = json_obj['doc']
        default = json_obj['default']
        return EnumSchema.new(name, namespace, symbols, names, doc, default, aliases)
      when :record, :error
        fields = json_obj['fields']
        doc    = json_obj['doc']
        return RecordSchema.new(name, namespace, fields, names, type_sym, doc, aliases)
      else
        raise SchemaParseError.new("Unknown named type: #{type}")
      end

    else
      case type_sym
      when :array
        return ArraySchema.new(json_obj['items'], names, default_namespace)
      when :map
        return MapSchema.new(json_obj['values'], names, default_namespace)
      else
        raise SchemaParseError.new("Unknown Valid Type: #{type}")
      end
    end

  elsif json_obj.is_a? Array
    # JSON array (union)
    return UnionSchema.new(json_obj, names, default_namespace)
  elsif PRIMITIVE_TYPES.include? json_obj
    return PrimitiveSchema.new(json_obj)
  else
    raise UnknownSchemaError.new(json_obj, default_namespace)
  end
end

.validate(expected_schema, logical_datum, options = DEFAULT_VALIDATE_OPTIONS) ⇒ Object

Determine if a ruby datum is an instance of a schema



119
120
121
122
123
124
# File 'lib/avro/schema.rb', line 119

def self.validate(expected_schema, logical_datum, options = DEFAULT_VALIDATE_OPTIONS)
  SchemaValidator.validate!(expected_schema, logical_datum, options)
  true
rescue SchemaValidator::ValidationError
  false
end

Instance Method Details

#==(other, _seen = nil) ⇒ Object



210
211
212
# File 'lib/avro/schema.rb', line 210

def ==(other, _seen=nil)
  other.is_a?(Schema) && type_sym == other.type_sym
end

#be_read?(other_schema) ⇒ Boolean

Returns:

  • (Boolean)


202
203
204
# File 'lib/avro/schema.rb', line 202

def be_read?(other_schema)
  other_schema.read?(self)
end

#crc_64_avro_fingerprintObject



171
172
173
174
175
176
177
178
179
180
181
182
# File 'lib/avro/schema.rb', line 171

def crc_64_avro_fingerprint
  parsing_form = Avro::SchemaNormalization.to_parsing_form(self)
  data_bytes = parsing_form.unpack("C*")

  initFPTable unless @@fp_table

  fp = CRC_EMPTY
  data_bytes.each do |b|
    fp = (fp >> 8) ^ @@fp_table[ (fp ^ b) & 0xff ]
  end
  fp
end

#hash(_seen = nil) ⇒ Object



214
215
216
# File 'lib/avro/schema.rb', line 214

def hash(_seen=nil)
  type_sym.hash
end

#initFPTableObject



160
161
162
163
164
165
166
167
168
169
# File 'lib/avro/schema.rb', line 160

def initFPTable
  @@fp_table = Array.new(256)
  256.times do |i|
    fp = i
    8.times do
      fp = (fp >> 1) ^ ( CRC_EMPTY & -( fp & 1 ) )
    end
    @@fp_table[i] = fp
  end
end

#md5_fingerprintObject

Returns the MD5 fingerprint of the schema as an Integer.



144
145
146
147
# File 'lib/avro/schema.rb', line 144

def md5_fingerprint
  parsing_form = SchemaNormalization.to_parsing_form(self)
  Digest::MD5.hexdigest(parsing_form).to_i(16)
end

#mutual_read?(other_schema) ⇒ Boolean

Returns:

  • (Boolean)


206
207
208
# File 'lib/avro/schema.rb', line 206

def mutual_read?(other_schema)
  SchemaCompatibility.mutual_read?(other_schema, self)
end

#read?(writers_schema) ⇒ Boolean

Returns:

  • (Boolean)


198
199
200
# File 'lib/avro/schema.rb', line 198

def read?(writers_schema)
  SchemaCompatibility.can_read?(writers_schema, self)
end

#sha256_fingerprintObject

Returns the SHA-256 fingerprint of the schema as an Integer.



150
151
152
153
# File 'lib/avro/schema.rb', line 150

def sha256_fingerprint
  parsing_form = SchemaNormalization.to_parsing_form(self)
  Digest::SHA256.hexdigest(parsing_form).to_i(16)
end

#single_object_encoding_headerObject



185
186
187
# File 'lib/avro/schema.rb', line 185

def single_object_encoding_header
  [SINGLE_OBJECT_MAGIC_NUMBER, single_object_schema_fingerprint].flatten
end

#single_object_schema_fingerprintObject



188
189
190
191
192
193
194
195
196
# File 'lib/avro/schema.rb', line 188

def single_object_schema_fingerprint
  working = crc_64_avro_fingerprint
  bytes = Array.new(8)
  8.times do |i|
    bytes[i] = (working & 0xff)
    working = working >> 8
  end
  bytes
end

#subparse(json_obj, names = nil, namespace = nil) ⇒ Object



218
219
220
221
222
223
224
225
226
227
228
229
230
# File 'lib/avro/schema.rb', line 218

def subparse(json_obj, names=nil, namespace=nil)
  if json_obj.is_a?(String) && names
    fullname = Name.make_fullname(json_obj, namespace)
    return names[fullname] if names.include?(fullname)
  end

  begin
    Schema.real_parse(json_obj, names, namespace)
  rescue => e
    raise e if e.is_a? SchemaParseError
    raise SchemaParseError, "Sub-schema for #{self.class.name} not a valid Avro schema. Bad schema: #{json_obj}"
  end
end

#to_avro(_names = nil) ⇒ Object



232
233
234
235
236
# File 'lib/avro/schema.rb', line 232

def to_avro(_names=nil)
  props = {'type' => type}
  props['logicalType'] = logical_type if logical_type
  props
end

#to_sObject



238
239
240
# File 'lib/avro/schema.rb', line 238

def to_s
  MultiJson.dump to_avro
end

#typeObject

Returns the type as a string (rather than a symbol), for backwards compatibility. Deprecated in favor of #type_sym.



137
# File 'lib/avro/schema.rb', line 137

def type; @type_sym.to_s; end

#type_adapterObject



139
140
141
# File 'lib/avro/schema.rb', line 139

def type_adapter
  @type_adapter ||= LogicalTypes.type_adapter(type, logical_type, self) || LogicalTypes::Identity
end