Skip to content

Commit c771e17

Browse files
authored
JSON referenced schema support (confluentinc#1514)
* JSON referenced schema support * Changes * PR Feedback * PR Feedback * PR Feedback * PR Feedback * Fix wrong documentation * PR Feedback * Update unit tests * PR Feedback * Use ref.name as the id * Remove _id_of function
1 parent 6621187 commit c771e17

File tree

7 files changed

+441
-12
lines changed

7 files changed

+441
-12
lines changed

CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ v2.1.0 is a feature release with the following features, fixes and enhancements:
1111
- Added support for password protected private key in CachedSchemaRegistryClient.
1212
- Add reference support in Schema Registry client. (@RickTalken, #1304)
1313
- Migrated travis jobs to Semaphore CI (#1503)
14-
14+
- Add support for passing schema references in JSONSerializer and JSONDeserializer. (#1514)
1515

1616
confluent-kafka-python is based on librdkafka v2.1.0, see the
1717
[librdkafka release notes](https://github.com/edenhill/librdkafka/releases/tag/v2.1.0)

src/confluent_kafka/schema_registry/json_schema.py

Lines changed: 68 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
import json
2121
import struct
2222

23-
from jsonschema import validate, ValidationError
23+
from jsonschema import validate, ValidationError, RefResolver
2424

2525
from confluent_kafka.schema_registry import (_MAGIC_BYTE,
2626
Schema,
@@ -43,6 +43,25 @@ def __exit__(self, *args):
4343
return False
4444

4545

46+
def _resolve_named_schema(schema, schema_registry_client, named_schemas=None):
47+
"""
48+
Resolves named schemas referenced by the provided schema recursively.
49+
:param schema: Schema to resolve named schemas for.
50+
:param schema_registry_client: SchemaRegistryClient to use for retrieval.
51+
:param named_schemas: Dict of named schemas resolved recursively.
52+
:return: named_schemas dict.
53+
"""
54+
if named_schemas is None:
55+
named_schemas = {}
56+
if schema.references is not None:
57+
for ref in schema.references:
58+
referenced_schema = schema_registry_client.get_version(ref.subject, ref.version)
59+
_resolve_named_schema(referenced_schema.schema, schema_registry_client, named_schemas)
60+
referenced_schema_dict = json.loads(referenced_schema.schema.schema_str)
61+
named_schemas[ref.name] = referenced_schema_dict
62+
return named_schemas
63+
64+
4665
class JSONSerializer(Serializer):
4766
"""
4867
Serializer that outputs JSON encoded data with Confluent Schema Registry framing.
@@ -122,7 +141,7 @@ class JSONSerializer(Serializer):
122141
callable with JSONSerializer.
123142
124143
Args:
125-
schema_str (str): `JSON Schema definition. <https://json-schema.org/understanding-json-schema/reference/generic.html>`_
144+
schema_str (str, Schema): `JSON Schema definition. <https://json-schema.org/understanding-json-schema/reference/generic.html>`_ Accepts schema as either a string or a `Schema`(Schema) instance. Note that string definitions cannot reference other schemas. For referencing other schemas, use a Schema instance.
126145
127146
schema_registry_client (SchemaRegistryClient): Schema Registry
128147
client instance.
@@ -134,14 +153,23 @@ class JSONSerializer(Serializer):
134153
""" # noqa: E501
135154
__slots__ = ['_hash', '_auto_register', '_normalize_schemas', '_use_latest_version',
136155
'_known_subjects', '_parsed_schema', '_registry', '_schema', '_schema_id',
137-
'_schema_name', '_subject_name_func', '_to_dict']
156+
'_schema_name', '_subject_name_func', '_to_dict', '_are_references_provided']
138157

139158
_default_conf = {'auto.register.schemas': True,
140159
'normalize.schemas': False,
141160
'use.latest.version': False,
142161
'subject.name.strategy': topic_subject_name_strategy}
143162

144163
def __init__(self, schema_str, schema_registry_client, to_dict=None, conf=None):
164+
self._are_references_provided = False
165+
if isinstance(schema_str, str):
166+
self._schema = Schema(schema_str, schema_type="JSON")
167+
elif isinstance(schema_str, Schema):
168+
self._schema = schema_str
169+
self._are_references_provided = bool(schema_str.references)
170+
else:
171+
raise TypeError('You must pass either str or Schema')
172+
145173
self._registry = schema_registry_client
146174
self._schema_id = None
147175
self._known_subjects = set()
@@ -178,14 +206,13 @@ def __init__(self, schema_str, schema_registry_client, to_dict=None, conf=None):
178206
raise ValueError("Unrecognized properties: {}"
179207
.format(", ".join(conf_copy.keys())))
180208

181-
schema_dict = json.loads(schema_str)
209+
schema_dict = json.loads(self._schema.schema_str)
182210
schema_name = schema_dict.get('title', None)
183211
if schema_name is None:
184212
raise ValueError("Missing required JSON schema annotation title")
185213

186214
self._schema_name = schema_name
187215
self._parsed_schema = schema_dict
188-
self._schema = Schema(schema_str, schema_type="JSON")
189216

190217
def __call__(self, obj, ctx):
191218
"""
@@ -238,7 +265,14 @@ def __call__(self, obj, ctx):
238265
value = obj
239266

240267
try:
241-
validate(instance=value, schema=self._parsed_schema)
268+
if self._are_references_provided:
269+
named_schemas = _resolve_named_schema(self._schema, self._registry)
270+
validate(instance=value, schema=self._parsed_schema,
271+
resolver=RefResolver(self._parsed_schema.get('$id'),
272+
self._parsed_schema,
273+
store=named_schemas))
274+
else:
275+
validate(instance=value, schema=self._parsed_schema)
242276
except ValidationError as ve:
243277
raise SerializationError(ve.message)
244278

@@ -258,16 +292,32 @@ class JSONDeserializer(Deserializer):
258292
framing.
259293
260294
Args:
261-
schema_str (str): `JSON schema definition <https://json-schema.org/understanding-json-schema/reference/generic.html>`_ use for validating records.
295+
schema_str (str, Schema): `JSON schema definition <https://json-schema.org/understanding-json-schema/reference/generic.html>`_ Accepts schema as either a string or a `Schema`(Schema) instance. Note that string definitions cannot reference other schemas. For referencing other schemas, use a Schema instance.
262296
263297
from_dict (callable, optional): Callable(dict, SerializationContext) -> object.
264298
Converts a dict to a Python object instance.
299+
300+
schema_registry_client (SchemaRegistryClient, optional): Schema Registry client instance. Needed if ``schema_str`` is a schema referencing other schemas.
265301
""" # noqa: E501
266302

267-
__slots__ = ['_parsed_schema', '_from_dict']
303+
__slots__ = ['_parsed_schema', '_from_dict', '_registry', '_are_references_provided', '_schema']
304+
305+
def __init__(self, schema_str, from_dict=None, schema_registry_client=None):
306+
self._are_references_provided = False
307+
if isinstance(schema_str, str):
308+
schema = Schema(schema_str, schema_type="JSON")
309+
elif isinstance(schema_str, Schema):
310+
schema = schema_str
311+
self._are_references_provided = bool(schema_str.references)
312+
if self._are_references_provided and schema_registry_client is None:
313+
raise ValueError(
314+
"""schema_registry_client must be provided if "schema_str" is a Schema instance with references""")
315+
else:
316+
raise TypeError('You must pass either str or Schema')
268317

269-
def __init__(self, schema_str, from_dict=None):
270-
self._parsed_schema = json.loads(schema_str)
318+
self._parsed_schema = json.loads(schema.schema_str)
319+
self._schema = schema
320+
self._registry = schema_registry_client
271321

272322
if from_dict is not None and not callable(from_dict):
273323
raise ValueError("from_dict must be callable with the signature"
@@ -313,7 +363,14 @@ def __call__(self, data, ctx):
313363
obj_dict = json.loads(payload.read())
314364

315365
try:
316-
validate(instance=obj_dict, schema=self._parsed_schema)
366+
if self._are_references_provided:
367+
named_schemas = _resolve_named_schema(self._schema, self._registry)
368+
validate(instance=obj_dict,
369+
schema=self._parsed_schema, resolver=RefResolver(self._parsed_schema.get('$id'),
370+
self._parsed_schema,
371+
store=named_schemas))
372+
else:
373+
validate(instance=obj_dict, schema=self._parsed_schema)
317374
except ValidationError as ve:
318375
raise SerializationError(ve.message)
319376

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
{
2+
"$schema": "http://json-schema.org/draft-07/schema#",
3+
"$id": "http://example.com/customer.schema.json",
4+
"title": "Customer",
5+
"description": "Customer data",
6+
"type": "object",
7+
"properties": {
8+
"name": {
9+
"description": "Customer name",
10+
"type": "string"
11+
},
12+
"id": {
13+
"description": "Customer id",
14+
"type": "integer"
15+
},
16+
"email": {
17+
"description": "Customer email",
18+
"type": "string"
19+
}
20+
},
21+
"required": [ "name", "id"]
22+
}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
{
2+
"$schema": "http://json-schema.org/draft-07/schema#",
3+
"$id": "http://example.com/referencedproduct.schema.json",
4+
"title": "Order",
5+
"description": "Order",
6+
"type": "object",
7+
"properties": {
8+
"order_details": {
9+
"description": "Order Details",
10+
"$ref": "http://example.com/order_details.schema.json"
11+
},
12+
"order_date": {
13+
"description": "Order Date",
14+
"type": "string",
15+
"format": "date-time"
16+
},
17+
"product": {
18+
"description": "Product",
19+
"$ref": "http://example.com/product.schema.json"
20+
}
21+
},
22+
"required": [
23+
"order_details", "product"]
24+
}
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
{
2+
"$schema": "http://json-schema.org/draft-07/schema#",
3+
"$id": "http://example.com/order_details.schema.json",
4+
"title": "Order Details",
5+
"description": "Order Details",
6+
"type": "object",
7+
"properties": {
8+
"id": {
9+
"description": "Order Id",
10+
"type": "integer"
11+
},
12+
"customer": {
13+
"description": "Customer",
14+
"$ref": "http://example.com/customer.schema.json"
15+
},
16+
"payment_id": {
17+
"description": "Payment Id",
18+
"type": "string"
19+
}
20+
},
21+
"required": [ "id", "customer"]
22+
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy