Skip to content

Commit 9ac92d7

Browse files
authored
JsonSerializer/JsonDeserializer for GenericSerdeApi (confluentinc#811)
1 parent 87de9fa commit 9ac92d7

File tree

14 files changed

+1019
-61
lines changed

14 files changed

+1019
-61
lines changed

confluent_kafka/avro/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
fastavro
1+
fastavro>=0.23.0
22
requests
33
avro==1.9.2;python_version<='3.0'
44
avro-python3==1.9.2.1;python_version>='3.0'

confluent_kafka/error.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,14 @@ class ConsumeError(Exception):
2828
may be retrieved from the ``message`` attribute.
2929
3030
Args:
31-
error (KafkaError): The error that occurred
31+
error (KafkaError): The error that occurred.
32+
3233
message (Message, optional): The message returned from the broker.
34+
3335
reason (str): String description of the error.
3436
3537
"""
38+
3639
def __init__(self, error, reason=None, message=None):
3740
self.error = error
3841
if reason is None:
@@ -45,5 +48,5 @@ def __repr__(self):
4548
return str(self)
4649

4750
def __str__(self):
48-
return "{} (Error code {})".format(self.reason,
49-
self.error_code)
51+
return "{} (KafkaError code {})".format(self.reason,
52+
self.error)
Lines changed: 293 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,293 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
#
4+
# Copyright 2020 Confluent Inc.
5+
#
6+
# Licensed under the Apache License, Version 2.0 (the "License");
7+
# you may not use this file except in compliance with the License.
8+
# You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
#
18+
19+
from io import BytesIO
20+
21+
import json
22+
import struct
23+
24+
from jsonschema import validate, ValidationError
25+
26+
from confluent_kafka.schema_registry import (_MAGIC_BYTE,
27+
Schema,
28+
topic_subject_name_strategy)
29+
from confluent_kafka.serialization import (SerializationError,
30+
Deserializer,
31+
Serializer)
32+
33+
34+
class _ContextStringIO(BytesIO):
35+
"""
36+
Wrapper to allow use of StringIO via 'with' constructs.
37+
38+
"""
39+
40+
def __enter__(self):
41+
return self
42+
43+
def __exit__(self, *args):
44+
self.close()
45+
return False
46+
47+
48+
class JSONSerializer(Serializer):
49+
"""
50+
JsonSerializer serializes objects in the Confluent Schema Registry binary
51+
format for JSON.
52+
53+
JsonSerializer configuration properties:
54+
+-----------------------+----------+--------------------------------------------------+
55+
| Property Name | Type | Description |
56+
+=======================+==========+==================================================+
57+
| | | Registers schemas automatically if not |
58+
| auto.register.schemas | bool | previously associated with a particular subject. |
59+
| | | Defaults to True. |
60+
+-----------------------|----------+--------------------------------------------------+
61+
| | | Callable(SerializationContext, str) -> str |
62+
| | | |
63+
| subject.name.strategy | callable | Instructs the JsonSerializer on how to construct |
64+
| | | Schema Registry subject names. |
65+
| | | Defaults to topic_subject_name_strategy. |
66+
+-----------------------+----------+--------------------------------------------------+
67+
68+
Schemas are registered to namespaces known as Subjects which define how a
69+
schema may evolve over time. By default the subject name is formed by
70+
concatenating the topic name with the message field separated by a hyphen.
71+
72+
i.e. {topic name}-{message field}
73+
74+
Alternative naming strategies may be configured with the property
75+
`subject.name.strategy`.
76+
77+
Supported subject name strategies:
78+
+--------------------------------------+------------------------------+
79+
| Subject Name Strategy | Output Format |
80+
+======================================+==============================+
81+
| topic_subject_name_strategy(default) | {topic name}-{message field} |
82+
+--------------------------------------+------------------------------+
83+
| topic_record_subject_name_strategy | {topic name}-{record name} |
84+
+--------------------------------------+------------------------------+
85+
| record_subject_name_strategy | {record name} |
86+
+--------------------------------------+------------------------------+
87+
88+
See ``Subject name strategy`` for additional details.
89+
90+
Note:
91+
The ``title`` annotation, referred to as a record name
92+
elsewhere in this document, is not strictly required by the JSON Schema
93+
specification. It is however required by this Serializer. This
94+
annotation(record name) is used to register the Schema with the Schema
95+
Registry. See documentation below for additional details on Subjects
96+
and schema registration.
97+
98+
Args:
99+
schema_registry_client (SchemaRegistryClient): Schema Registry
100+
client instance.
101+
102+
schema_str (str): JSON Schema definition.
103+
104+
to_dict (callable, optional): Callable(object, SerializationContext) -> dict.
105+
Converts object to a dict.
106+
107+
conf (dict): JsonSerializer configuration.
108+
109+
.. _Subject name strategy:
110+
https://docs.confluent.io/current/schema-registry/serializer-formatter.html#subject-name-strategy
111+
112+
.. _Schema definition:
113+
https://json-schema.org/understanding-json-schema/reference/generic.html
114+
115+
""" # noqa: E501
116+
__slots__ = ['_hash', '_auto_register', '_known_subjects', '_parsed_schema',
117+
'_registry', '_schema', '_schema_id', '_schema_name',
118+
'_subject_name_func', '_to_dict']
119+
120+
# default configuration
121+
_default_conf = {'auto.register.schemas': True,
122+
'subject.name.strategy': topic_subject_name_strategy}
123+
124+
def __init__(self, schema_registry_client, schema_str, to_dict=None,
125+
conf=None):
126+
self._registry = schema_registry_client
127+
self._schema_id = None
128+
# Avoid calling registry if schema is known to be registered
129+
self._known_subjects = set()
130+
131+
if to_dict is not None and not callable(to_dict):
132+
raise ValueError("to_dict must be callable with the signature"
133+
" to_dict(object, SerializationContext)->dict")
134+
135+
self._to_dict = to_dict
136+
137+
# handle configuration
138+
conf_copy = self._default_conf.copy()
139+
if conf is not None:
140+
conf_copy.update(conf)
141+
142+
self._auto_register = conf_copy.pop('auto.register.schemas')
143+
if not isinstance(self._auto_register, bool):
144+
raise ValueError("auto.register.schemas must be a boolean value")
145+
146+
self._subject_name_func = conf_copy.pop('subject.name.strategy')
147+
if not callable(self._subject_name_func):
148+
raise ValueError("subject.name.strategy must be callable")
149+
150+
if len(conf_copy) > 0:
151+
raise ValueError("Unrecognized properties: {}"
152+
.format(", ".join(conf_copy.keys())))
153+
154+
schema_dict = json.loads(schema_str)
155+
schema_name = schema_dict.get('title', None)
156+
if schema_name is None:
157+
raise ValueError("Missing required JSON schema annotation title")
158+
159+
self._schema_name = schema_name
160+
self._parsed_schema = schema_dict
161+
self._schema = Schema(schema_str, schema_type="JSON")
162+
163+
def __call__(self, obj, ctx):
164+
"""
165+
Serializes an object to the Confluent Schema Registry's JSON binary
166+
format.
167+
168+
Args:
169+
obj (object): object instance to serialize.
170+
171+
ctx (SerializationContext): Metadata pertaining to the serialization
172+
operation.
173+
174+
Note:
175+
None objects are represented as Kafka Null.
176+
177+
Raises:
178+
SerializerError if any error occurs serializing obj
179+
180+
Returns:
181+
bytes: Confluent Schema Registry formatted JSON bytes
182+
183+
"""
184+
if obj is None:
185+
return None
186+
187+
subject = self._subject_name_func(ctx, self._schema_name)
188+
189+
# Check to ensure this schema has been registered under subject_name.
190+
if self._auto_register and subject not in self._known_subjects:
191+
# The schema name will always be the same. We can't however register
192+
# a schema without a subject so we set the schema_id here to handle
193+
# the initial registration.
194+
self._schema_id = self._registry.register_schema(subject,
195+
self._schema)
196+
self._known_subjects.add(subject)
197+
elif not self._auto_register and subject not in self._known_subjects:
198+
registered_schema = self._registry.lookup_schema(subject,
199+
self._schema)
200+
self._schema_id = registered_schema.schema_id
201+
self._known_subjects.add(subject)
202+
203+
if self._to_dict is not None:
204+
value = self._to_dict(obj, ctx)
205+
else:
206+
value = obj
207+
208+
try:
209+
validate(instance=value, schema=self._parsed_schema)
210+
except ValidationError as ve:
211+
raise SerializationError(ve.message)
212+
213+
with _ContextStringIO() as fo:
214+
# Write the magic byte and schema ID in network byte order (big endian)
215+
fo.write(struct.pack('>bI', _MAGIC_BYTE, self._schema_id))
216+
# JSON dump always writes a str never bytes
217+
# https://docs.python.org/3/library/json.html
218+
fo.write(json.dumps(value).encode('utf8'))
219+
220+
return fo.getvalue()
221+
222+
223+
class JSONDeserializer(Deserializer):
224+
"""
225+
JsonDeserializer decodes bytes written in the Schema Registry
226+
JSON format to an object.
227+
228+
Args:
229+
schema_str (str): JSON schema definition use for validating records.
230+
231+
from_dict (callable, optional): Callable(dict, SerializationContext) -> object.
232+
Converts dict to an instance of some object.
233+
234+
.. _Schema definition:
235+
https://json-schema.org/understanding-json-schema/reference/generic.html
236+
237+
"""
238+
__slots__ = ['_parsed_schema', '_from_dict']
239+
240+
def __init__(self, schema_str, from_dict=None):
241+
self._parsed_schema = json.loads(schema_str)
242+
243+
if from_dict is not None and not callable(from_dict):
244+
raise ValueError("from_dict must be callable with the signature"
245+
" from_dict(dict, SerializationContext) -> object")
246+
247+
self._from_dict = from_dict
248+
249+
def __call__(self, value, ctx):
250+
"""
251+
Deserializes Schema Registry formatted JSON to JSON object literal(dict).
252+
253+
Args:
254+
value (bytes): Confluent Schema Registry formatted JSON bytes
255+
256+
ctx (SerializationContext): Metadata pertaining to the serialization
257+
operation.
258+
259+
Returns:
260+
dict: Deserialized JSON
261+
262+
Raises:
263+
SerializerError: If ``value`` cannot be validated by the schema
264+
configured with this JsonDeserializer instance.
265+
266+
"""
267+
if value is None:
268+
return None
269+
270+
if len(value) <= 5:
271+
raise SerializationError("Message too small. This message was not"
272+
" produced with a Confluent"
273+
" Schema Registry serializer")
274+
275+
with _ContextStringIO(value) as payload:
276+
magic, schema_id = struct.unpack('>bI', payload.read(5))
277+
if magic != _MAGIC_BYTE:
278+
raise SerializationError("Unknown magic byte. This message was"
279+
" not produced with a Confluent"
280+
" Schema Registry serializer")
281+
282+
# JSON documents are self-describing; no need to query schema
283+
obj_dict = json.loads(payload.read(), encoding="utf8")
284+
285+
try:
286+
validate(instance=obj_dict, schema=self._parsed_schema)
287+
except ValidationError as ve:
288+
raise SerializationError(ve.message)
289+
290+
if self._from_dict is not None:
291+
return self._from_dict(obj_dict, ctx)
292+
293+
return obj_dict

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy