Class: Avro::Schema

Inherits:
Object
  • Object
show all
Defined in:
lib/avro/schema.rb

Defined Under Namespace

Classes: ArraySchema, EnumSchema, Field, FixedSchema, MapSchema, NamedSchema, PrimitiveSchema, RecordSchema, UnionSchema

Constant Summary collapse

PRIMITIVE_TYPES =

Sets of strings, for backwards compatibility. See below for sets of symbols, for better performance.

Set.new(%w[null boolean string bytes int long float double])
NAMED_TYPES =
Set.new(%w[fixed enum record error])
VALID_TYPES =
PRIMITIVE_TYPES + NAMED_TYPES + Set.new(%w[array map union request])
PRIMITIVE_TYPES_SYM =
Set.new(PRIMITIVE_TYPES.map(&:to_sym))
NAMED_TYPES_SYM =
Set.new(NAMED_TYPES.map(&:to_sym))
VALID_TYPES_SYM =
Set.new(VALID_TYPES.map(&:to_sym))
INT_MIN_VALUE =
-(1 << 31)
INT_MAX_VALUE =
(1 << 31) - 1
LONG_MIN_VALUE =
-(1 << 63)
LONG_MAX_VALUE =
(1 << 63) - 1

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(type, logical_type = nil) ⇒ Schema

Returns a new instance of Schema.



139
140
141
142
# File 'lib/avro/schema.rb', line 139

def initialize(type, logical_type=nil)
  @type_sym = type.is_a?(Symbol) ? type : type.to_sym
  @logical_type = logical_type
end

Instance Attribute Details

#logical_typeObject (readonly)

Returns the value of attribute logical_type.



145
146
147
# File 'lib/avro/schema.rb', line 145

def logical_type
  @logical_type
end

#type_symObject (readonly)

Returns the value of attribute type_sym.



144
145
146
# File 'lib/avro/schema.rb', line 144

def type_sym
  @type_sym
end

Class Method Details

.parse(json_string) ⇒ Object



37
38
39
# File 'lib/avro/schema.rb', line 37

def self.parse(json_string)
  real_parse(MultiJson.load(json_string), {})
end

.real_parse(json_obj, names = nil, default_namespace = nil) ⇒ Object

Build Avro Schema from data parsed out of JSON string.



42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# File 'lib/avro/schema.rb', line 42

def self.real_parse(json_obj, names=nil, default_namespace=nil)
  if json_obj.is_a? Hash
    type = json_obj['type']
    logical_type = json_obj['logicalType']
    raise SchemaParseError, %Q(No "type" property: #{json_obj}) if type.nil?

    # Check that the type is valid before calling #to_sym, since symbols are never garbage
    # collected (important to avoid DoS if we're accepting schemas from untrusted clients)
    unless VALID_TYPES.include?(type)
      raise SchemaParseError, "Unknown type: #{type}"
    end

    type_sym = type.to_sym
    if PRIMITIVE_TYPES_SYM.include?(type_sym)
      return PrimitiveSchema.new(type_sym, logical_type)

    elsif NAMED_TYPES_SYM.include? type_sym
      name = json_obj['name']
      namespace = json_obj.include?('namespace') ? json_obj['namespace'] : default_namespace
      case type_sym
      when :fixed
        size = json_obj['size']
        return FixedSchema.new(name, namespace, size, names, logical_type)
      when :enum
        symbols = json_obj['symbols']
        return EnumSchema.new(name, namespace, symbols, names)
      when :record, :error
        fields = json_obj['fields']
        return RecordSchema.new(name, namespace, fields, names, type_sym)
      else
        raise SchemaParseError.new("Unknown named type: #{type}")
      end

    else
      case type_sym
      when :array
        return ArraySchema.new(json_obj['items'], names, default_namespace)
      when :map
        return MapSchema.new(json_obj['values'], names, default_namespace)
      else
        raise SchemaParseError.new("Unknown Valid Type: #{type}")
      end
    end

  elsif json_obj.is_a? Array
    # JSON array (union)
    return UnionSchema.new(json_obj, names, default_namespace)
  elsif PRIMITIVE_TYPES.include? json_obj
    return PrimitiveSchema.new(json_obj)
  else
    raise UnknownSchemaError.new(json_obj)
  end
end

.validate(expected_schema, logical_datum, encoded = false) ⇒ Object

Determine if a ruby datum is an instance of a schema



97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# File 'lib/avro/schema.rb', line 97

def self.validate(expected_schema, logical_datum, encoded = false)
  datum = if encoded
            logical_datum
          else
            expected_schema.type_adapter.encode(logical_datum)
          end

  case expected_schema.type_sym
  when :null
    datum.nil?
  when :boolean
    datum == true || datum == false
  when :string, :bytes
    datum.is_a? String
  when :int
    (datum.is_a?(Fixnum) || datum.is_a?(Bignum)) &&
        (INT_MIN_VALUE <= datum) && (datum <= INT_MAX_VALUE)
  when :long
    (datum.is_a?(Fixnum) || datum.is_a?(Bignum)) &&
        (LONG_MIN_VALUE <= datum) && (datum <= LONG_MAX_VALUE)
  when :float, :double
    datum.is_a?(Float) || datum.is_a?(Fixnum) || datum.is_a?(Bignum)
  when :fixed
    datum.is_a?(String) && datum.bytesize == expected_schema.size
  when :enum
    expected_schema.symbols.include? datum
  when :array
    datum.is_a?(Array) &&
      datum.all?{|d| validate(expected_schema.items, d) }
  when :map
      datum.keys.all?{|k| k.is_a? String } &&
      datum.values.all?{|v| validate(expected_schema.values, v) }
  when :union
    expected_schema.schemas.any?{|s| validate(s, datum) }
  when :record, :error, :request
    datum.is_a?(Hash) &&
      expected_schema.fields.all?{|f| validate(f.type, datum[f.name]) }
  else
    raise "you suck #{expected_schema.inspect} is not allowed."
  end
end

Instance Method Details

#==(other, seen = nil) ⇒ Object



167
168
169
# File 'lib/avro/schema.rb', line 167

def ==(other, seen=nil)
  other.is_a?(Schema) && type_sym == other.type_sym
end

#hash(seen = nil) ⇒ Object



171
172
173
# File 'lib/avro/schema.rb', line 171

def hash(seen=nil)
  type_sym.hash
end

#md5_fingerprintObject

Returns the MD5 fingerprint of the schema as an Integer.



156
157
158
159
# File 'lib/avro/schema.rb', line 156

def md5_fingerprint
  parsing_form = SchemaNormalization.to_parsing_form(self)
  Digest::MD5.hexdigest(parsing_form).to_i(16)
end

#sha256_fingerprintObject

Returns the SHA-256 fingerprint of the schema as an Integer.



162
163
164
165
# File 'lib/avro/schema.rb', line 162

def sha256_fingerprint
  parsing_form = SchemaNormalization.to_parsing_form(self)
  Digest::SHA256.hexdigest(parsing_form).to_i(16)
end

#subparse(json_obj, names = nil, namespace = nil) ⇒ Object



175
176
177
178
179
180
181
182
183
184
185
186
187
# File 'lib/avro/schema.rb', line 175

def subparse(json_obj, names=nil, namespace=nil)
  if json_obj.is_a?(String) && names
    fullname = Name.make_fullname(json_obj, namespace)
    return names[fullname] if names.include?(fullname)
  end

  begin
    Schema.real_parse(json_obj, names, namespace)
  rescue => e
    raise e if e.is_a? SchemaParseError
    raise SchemaParseError, "Sub-schema for #{self.class.name} not a valid Avro schema. Bad schema: #{json_obj}"
  end
end

#to_avro(names = nil) ⇒ Object



189
190
191
192
193
# File 'lib/avro/schema.rb', line 189

def to_avro(names=nil)
  props = {'type' => type}
  props['logicalType'] = logical_type if logical_type
  props
end

#to_sObject



195
196
197
# File 'lib/avro/schema.rb', line 195

def to_s
  MultiJson.dump to_avro
end

#typeObject

Returns the type as a string (rather than a symbol), for backwards compatibility. Deprecated in favor of #type_sym.



149
# File 'lib/avro/schema.rb', line 149

def type; @type_sym.to_s; end

#type_adapterObject



151
152
153
# File 'lib/avro/schema.rb', line 151

def type_adapter
  @type_adapter ||= LogicalTypes.type_adapter(type, logical_type) || LogicalTypes::Identity
end