@@ -67,6 +67,24 @@ def _schema_loads(schema_str):
67
67
return Schema (schema_str , schema_type = 'AVRO' )
68
68
69
69
70
+ def _resolve_named_schema (schema , schema_registry_client , named_schemas = None ):
71
+ """
72
+ Resolves named schemas referenced by the provided schema recursively.
73
+ :param schema: Schema to resolve named schemas for.
74
+ :param schema_registry_client: SchemaRegistryClient to use for retrieval.
75
+ :param named_schemas: Dict of named schemas resolved recursively.
76
+ :return: named_schemas dict.
77
+ """
78
+ if named_schemas is None :
79
+ named_schemas = {}
80
+ if schema .references is not None :
81
+ for ref in schema .references :
82
+ referenced_schema = schema_registry_client .get_version (ref .subject , ref .version )
83
+ _resolve_named_schema (referenced_schema .schema , schema_registry_client , named_schemas )
84
+ parse_schema (loads (referenced_schema .schema .schema_str ), named_schemas = named_schemas )
85
+ return named_schemas
86
+
87
+
70
88
class AvroSerializer (Serializer ):
71
89
"""
72
90
Serializer that outputs Avro binary encoded data with Confluent Schema Registry framing.
@@ -146,7 +164,7 @@ class AvroSerializer(Serializer):
146
164
Args:
147
165
schema_registry_client (SchemaRegistryClient): Schema Registry client instance.
148
166
149
- schema_str (str): Avro `Schema Declaration. <https://avro.apache.org/docs/current/spec.html#schemas>`_
167
+ schema_str (str or Schema ): Avro `Schema Declaration. <https://avro.apache.org/docs/current/spec.html#schemas>`_ Accepts either a string or a `Schema`(Schema) instance. Note that string definitions cannot reference other schemas. For referencing other schemas, use a Schema instance.
150
168
151
169
to_dict (callable, optional): Callable(object, SerializationContext) -> dict. Converts object to a dict.
152
170
@@ -155,15 +173,21 @@ class AvroSerializer(Serializer):
155
173
__slots__ = ['_hash' , '_auto_register' , '_normalize_schemas' , '_use_latest_version' ,
156
174
'_known_subjects' , '_parsed_schema' ,
157
175
'_registry' , '_schema' , '_schema_id' , '_schema_name' ,
158
- '_subject_name_func' , '_to_dict' ]
176
+ '_subject_name_func' , '_to_dict' , '_named_schemas' ]
159
177
160
178
_default_conf = {'auto.register.schemas' : True ,
161
179
'normalize.schemas' : False ,
162
180
'use.latest.version' : False ,
163
181
'subject.name.strategy' : topic_subject_name_strategy }
164
182
165
- def __init__ (self , schema_registry_client , schema_str ,
166
- to_dict = None , conf = None ):
183
+ def __init__ (self , schema_registry_client , schema_str , to_dict = None , conf = None ):
184
+ if isinstance (schema_str , str ):
185
+ schema = _schema_loads (schema_str )
186
+ elif isinstance (schema_str , Schema ):
187
+ schema = schema_str
188
+ else :
189
+ raise TypeError ('You must pass either schema string or schema object' )
190
+
167
191
self ._registry = schema_registry_client
168
192
self ._schema_id = None
169
193
self ._known_subjects = set ()
@@ -200,9 +224,9 @@ def __init__(self, schema_registry_client, schema_str,
200
224
raise ValueError ("Unrecognized properties: {}"
201
225
.format (", " .join (conf_copy .keys ())))
202
226
203
- schema = _schema_loads (schema_str )
204
227
schema_dict = loads (schema .schema_str )
205
- parsed_schema = parse_schema (schema_dict )
228
+ self ._named_schemas = _resolve_named_schema (schema , schema_registry_client )
229
+ parsed_schema = parse_schema (schema_dict , named_schemas = self ._named_schemas )
206
230
207
231
if isinstance (parsed_schema , list ):
208
232
# if parsed_schema is a list, we have an Avro union and there
@@ -299,8 +323,9 @@ class AvroDeserializer(Deserializer):
299
323
schema_registry_client (SchemaRegistryClient): Confluent Schema Registry
300
324
client instance.
301
325
302
- schema_str (str, optional): The reader schema.
303
- If not provided, the writer schema will be used as the reader schema.
326
+ schema_str (str, Schema, optional): Avro reader schema declaration Accepts either a string or a `Schema`(
327
+ Schema) instance. If not provided, the writer schema will be used as the reader schema. Note that string
328
+ definitions cannot reference other schemas. For referencing other schemas, use a Schema instance.
304
329
305
330
from_dict (callable, optional): Callable(dict, SerializationContext) -> object.
306
331
Converts a dict to an instance of some object.
@@ -315,13 +340,31 @@ class AvroDeserializer(Deserializer):
315
340
`Apache Avro Schema Resolution <https://avro.apache.org/docs/1.8.2/spec.html#Schema+Resolution>`_
316
341
"""
317
342
318
- __slots__ = ['_reader_schema' , '_registry' , '_from_dict' , '_writer_schemas' , '_return_record_name' ]
343
+ __slots__ = ['_reader_schema' , '_registry' , '_from_dict' , '_writer_schemas' , '_return_record_name' , '_schema' ,
344
+ '_named_schemas' ]
319
345
320
346
def __init__ (self , schema_registry_client , schema_str = None , from_dict = None , return_record_name = False ):
347
+ schema = None
348
+ if schema_str is not None :
349
+ if isinstance (schema_str , str ):
350
+ schema = _schema_loads (schema_str )
351
+ elif isinstance (schema_str , Schema ):
352
+ schema = schema_str
353
+ else :
354
+ raise TypeError ('You must pass either schema string or schema object' )
355
+
356
+ self ._schema = schema
321
357
self ._registry = schema_registry_client
322
358
self ._writer_schemas = {}
323
359
324
- self ._reader_schema = parse_schema (loads (schema_str )) if schema_str else None
360
+ if schema :
361
+ schema_dict = loads (self ._schema .schema_str )
362
+ self ._named_schemas = _resolve_named_schema (self ._schema , schema_registry_client )
363
+ self ._reader_schema = parse_schema (schema_dict ,
364
+ named_schemas = self ._named_schemas )
365
+ else :
366
+ self ._named_schemas = None
367
+ self ._reader_schema = None
325
368
326
369
if from_dict is not None and not callable (from_dict ):
327
370
raise ValueError ("from_dict must be callable with the signature "
@@ -370,10 +413,11 @@ def __call__(self, data, ctx):
370
413
writer_schema = self ._writer_schemas .get (schema_id , None )
371
414
372
415
if writer_schema is None :
373
- schema = self ._registry .get_schema (schema_id )
374
- prepared_schema = _schema_loads (schema .schema_str )
416
+ registered_schema = self ._registry .get_schema (schema_id )
417
+ self ._named_schemas = _resolve_named_schema (registered_schema , self ._registry )
418
+ prepared_schema = _schema_loads (registered_schema .schema_str )
375
419
writer_schema = parse_schema (loads (
376
- prepared_schema .schema_str ))
420
+ prepared_schema .schema_str ), named_schemas = self . _named_schemas )
377
421
self ._writer_schemas [schema_id ] = writer_schema
378
422
379
423
obj_dict = schemaless_reader (payload ,
0 commit comments