Skip to content

Commit f731b24

Browse files
committed
Add Avro serializer tests
1 parent 829277d commit f731b24

File tree

6 files changed

+226
-41
lines changed

6 files changed

+226
-41
lines changed

confluent_kafka/avro/schema.py

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,20 @@
1+
#!/usr/bin/env python
2+
#
3+
# Copyright 2018 Confluent Inc.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
118
from avro.schema import PrimitiveSchema
219

320
# Python 2 considers int an instance of str
@@ -8,12 +25,23 @@
825

926

1027
class GenericAvroRecord(dict):
28+
"""
29+
Pairs a AvroRecord with it's schema
30+
31+
:param schema schema: A parsed Avro schema.
32+
:param dict record: Wraps existing dict in GenericAvroRecord
33+
:raises ValueError: If schema is None
34+
:returns: Avro record with its schema
35+
:rtype: GenericAvroRecord
36+
"""
1137
__slots__ = ['schema']
1238

13-
def __init__(self, schema, datum=None):
39+
def __init__(self, schema, record=None):
40+
if schema is None:
41+
raise ValueError("schema must not be None")
1442
self.schema = schema
15-
if datum is not None:
16-
self.update(datum)
43+
if record is not None:
44+
self.update(record)
1745

1846
def put(self, key, value):
1947
self[key] = value

confluent_kafka/avro/serializer/__init__.py

Lines changed: 99 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
import traceback
2525

2626
from confluent_kafka.avro.schema import GenericAvroRecord, get_schema
27-
from confluent_kafka.avro import ClientError
27+
from confluent_kafka.avro.error import ClientError
2828

2929
log = logging.getLogger(__name__)
3030

@@ -47,7 +47,7 @@ def __new__(cls, message, is_key=False):
4747
return super(SerializerError, cls).__new__(KeySerializerError, message)
4848
return super(SerializerError, cls).__new__(ValueSerializerError, message)
4949

50-
def __init__(self, message):
50+
def __init__(self, message, is_key=False):
5151
self.message = message
5252

5353
def __repr__(self):
@@ -81,31 +81,101 @@ def __exit__(self, *args):
8181
return False
8282

8383

84+
def TopicNameStrategy(topic=None, is_key=False, schema=None):
85+
"""
86+
Constructs the subject name under which a schema is registered with the Confluent Schema Registry.
87+
88+
TopicNameStrategy returns the schema's subject in the form of <topic>-key or <topic>-value.
89+
90+
:param str topic: Topic name.
91+
:param is_key: True if subject is being registered for a message key.
92+
:param schema schema: Parsed Avro schema. *Note* Not used by TopicNameStrategy
93+
:raises ValueError: If topic is unset.
94+
:returns: The subject name with which to register the schema.
95+
:rtype: str
96+
"""
97+
if topic is None:
98+
raise ValueError("Topic must be set when using TopicNameStrategy")
99+
100+
return "-".join([topic, '-key' if is_key else '-value'])
101+
102+
103+
def RecordNameStrategy(topic=None, is_key=False, schema=None):
104+
"""
105+
Constructs the subject name under which a schema is registered with the Confluent Schema Registry.
106+
107+
RecordNameStrategy returns the fully-qualified record name regardless of the topic.
108+
109+
Compatibility checks of the same record name across all topics.
110+
This strategy allows a topic to contain a mixture of different record types.
111+
112+
:param str topic: Topic name. *Note* Not used by RecordNameStrategy
113+
:param is_key: True if subject is being registered for a message key. *Note* Not used by RecordNameStrategy.
114+
:param schema schema: Parsed Avro schema.
115+
:raises ValueError: If schema is not set.
116+
:returns: The subject name with which to register the schema.
117+
:rtype: str
118+
"""
119+
if schema is None:
120+
raise ValueError("Schema must be set when using RecordNameStategy")
121+
122+
return schema.fullname
123+
124+
125+
def TopicRecordNameStrategy(topic=None, is_key=False, schema=None):
126+
"""
127+
Constructs the subject name under which a schema is registered with the Confluent Schema Registry.
128+
129+
TopicRecordNameStrategy returns the topic name appended by the fully-qualified record name.
130+
131+
Compatibility checks are performed against all records of the same name within the same topic.
132+
Like the RecordNameStrategy mixed record types are allowed within a topic.
133+
This strategy is more flexible in that records needn't be complaint across the cluster.
134+
135+
:param str topic: Topic name.
136+
:param schema schema: Parsed Avro schema.
137+
:param is_key: True if used by a key_serializer.
138+
:raises ValueError: If topic and schema are not set.
139+
:returns: The subject name with which to register the schema.
140+
:rtype: str
141+
"""
142+
if not any([topic, schema]):
143+
raise ValueError("Both Topic and Schema must be set when using TopicRecordNameStrategy")
144+
return "-".join([topic, schema.fullname])
145+
146+
84147
class AvroSerializer(object):
148+
"""
149+
Encodes kafka messages as Avro; registering the schema with the Confluent Schema Registry.
150+
151+
:param registry_client CachedSchemaRegistryClient: Instance of CachedSchemaRegistryClient.
152+
:param bool is_key: True if configured as a key_serializer.
153+
:param func(str, bool, schema): Returns the subject name used when registering schemas.
154+
"""
85155

86-
__slots__ = ["registry_client", "codec_cache", "is_key"]
156+
__slots__ = ["registry_client", "codec_cache", "is_key", "subject_strategy"]
87157

88-
def __init__(self, registry_client, is_key=False):
158+
def __init__(self, registry_client, is_key=False, subject_strategy=TopicNameStrategy):
89159
self.registry_client = registry_client
90160
self.codec_cache = {}
91161
self.is_key = is_key
162+
self.subject_strategy = subject_strategy
92163

93164
def __call__(self, topic, record):
94165
"""
95166
Given a parsed avro schema, encode a record for the given topic.
96167
97-
The schema is registered with the subject of 'topic-value'
98-
:param str topic: Topic name
99-
:param GenericAvroRecord record: An object to serialize
100-
:returns: Encoded record with schema ID as bytes
168+
The schema is registered with the subject of 'topic-value'.
169+
:param str topic: Topic name.
170+
:param GenericAvroRecord record: An object to serialize.
171+
:returns: Encoded record with schema ID as bytes.
101172
:rtype: bytes
102173
"""
103174

104175
if record is None:
105176
return None
106177

107-
subject_suffix = '-key' if self.is_key else '-value'
108-
subject = topic + subject_suffix
178+
subject = self.subject_strategy(topic, self.is_key, get_schema(record))
109179

110180
schema_id = self.registry_client.register(subject, get_schema(record))
111181
if not schema_id:
@@ -115,20 +185,19 @@ def __call__(self, topic, record):
115185
if schema_id not in self.codec_cache:
116186
self.codec_cache[schema_id] = self._get_encoder_func(get_schema(record))
117187

118-
return self._encode_record_with_schema_id(schema_id, record)
188+
return self._encode(schema_id, record)
119189

120190
def _get_encoder_func(self, writer_schema):
121191
if HAS_FAST:
122192
return lambda record, fp: schemaless_writer(fp, writer_schema.to_json(), record)
123193
writer = avro.io.DatumWriter(writer_schema)
124194
return lambda record, fp: writer.write(record, avro.io.BinaryEncoder(fp))
125195

126-
def _encode_record_with_schema_id(self, schema_id, record):
196+
def _encode(self, schema_id, datum):
127197
"""
128-
Encode a record with a given schema id. The record must
129-
be a python dictionary.
198+
Encode a datum with a given schema id.
130199
:param int schema_id: integer ID
131-
:param dict record: An object to serialize
200+
:param object datum: An object to serialize
132201
:param bool is_key: If the record is a key
133202
:param SerializerErr err_type: Error type to raise on serialization exception
134203
:returns: decoder function
@@ -154,12 +223,18 @@ def _encode_record_with_schema_id(self, schema_id, record):
154223
outf.write(struct.pack('>bI', MAGIC_BYTE, schema_id))
155224

156225
# write the record to the rest of the buffer
157-
writer(record, outf)
226+
writer(datum, outf)
158227
return outf.getvalue()
159228

160229

161230
class AvroDeserializer(object):
231+
"""
232+
Decodes Kafka messages encoded by Confluent Schema Registry compliant Avro Serializers.
162233
234+
:param registry_client CachedSchemaRegistryClient: Instance of CachedSchemaRegistryClient.
235+
:param bool is_key: True if configured as a key_serializer.
236+
:param schema reader_schema: Optional reader schema to be used during deserialization.
237+
"""
163238
__slots__ = ["registry_client", "codec_cache", "is_key", "reader_schema"]
164239

165240
def __init__(self, registry_client, is_key=False, reader_schema=None):
@@ -168,22 +243,21 @@ def __init__(self, registry_client, is_key=False, reader_schema=None):
168243
self.is_key = is_key
169244
self.reader_schema = reader_schema
170245

171-
def __call__(self, topic, message):
246+
def __call__(self, topic, datum):
172247
"""
173-
Decode a message from kafka that has been encoded for use with
174-
the schema registry.
175-
:param str|bytes or None message: message key or value to be decoded
176-
:returns: Decoded message contents.
248+
Decode a datum from kafka that has been encoded for use with the Confluent Schema Registry.
249+
:param str|bytes or None datum: message key or value to be decoded.
250+
:returns: Decoded message key or value contents.
177251
:rtype GenericAvroRecord:
178252
"""
179253

180-
if message is None:
254+
if datum is None:
181255
return None
182256

183-
if len(message) <= 5:
257+
if len(datum) <= 5:
184258
raise SerializerError("message is too small to decode")
185259

186-
with ContextStringIO(message) as payload:
260+
with ContextStringIO(datum) as payload:
187261
magic, schema_id = struct.unpack('>bI', payload.read(5))
188262
if magic != MAGIC_BYTE:
189263
raise SerializerError("message does not start with magic byte", self.is_key)
@@ -247,7 +321,7 @@ def decoder(p):
247321
bin_decoder = avro.io.BinaryDecoder(p)
248322
return avro_reader.read(bin_decoder)
249323

250-
if writer_schema.get_prop('type') is 'record':
324+
if writer_schema.type is 'record':
251325
self.codec_cache[schema_id] = record_decoder
252326
else:
253327
self.codec_cache[schema_id] = decoder

confluent_kafka/avro/serializer/message_serializer.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,7 @@
2929
import avro.io
3030

3131
from confluent_kafka.avro import ClientError
32-
from confluent_kafka.avro.serializer import (SerializerError,
33-
KeySerializerError,
34-
ValueSerializerError)
32+
from confluent_kafka.avro.serializer import SerializerError
3533

3634
log = logging.getLogger(__name__)
3735

@@ -96,7 +94,6 @@ def encode_record_with_schema(self, topic, schema, record, is_key=False):
9694
:returns: Encoded record with schema ID as bytes
9795
:rtype: bytes
9896
"""
99-
serialize_err = KeySerializerError if is_key else ValueSerializerError
10097

10198
subject_suffix = ('-key' if is_key else '-value')
10299
# get the latest schema for the subject
@@ -105,7 +102,7 @@ def encode_record_with_schema(self, topic, schema, record, is_key=False):
105102
schema_id = self.registry_client.register(subject, schema)
106103
if not schema_id:
107104
message = "Unable to retrieve schema id for subject %s" % (subject)
108-
raise serialize_err(message)
105+
raise SerializerError(message, is_key)
109106

110107
# cache writer
111108
self.id_to_writers[schema_id] = self._get_encoder_func(schema)
@@ -122,7 +119,6 @@ def encode_record_with_schema_id(self, schema_id, record, is_key=False):
122119
:returns: decoder function
123120
:rtype: func
124121
"""
125-
serialize_err = KeySerializerError if is_key else ValueSerializerError
126122

127123
# use slow avro
128124
if schema_id not in self.id_to_writers:
@@ -131,11 +127,11 @@ def encode_record_with_schema_id(self, schema_id, record, is_key=False):
131127
try:
132128
schema = self.registry_client.get_by_id(schema_id)
133129
if not schema:
134-
raise serialize_err("Schema does not exist")
130+
raise SerializerError("Schema does not exist", is_key)
135131
self.id_to_writers[schema_id] = self._get_encoder_func(schema)
136132
except ClientError:
137133
exc_type, exc_value, exc_traceback = sys.exc_info()
138-
raise serialize_err(repr(traceback.format_exception(exc_type, exc_value, exc_traceback)))
134+
raise SerializerError(repr(traceback.format_exception(exc_type, exc_value, exc_traceback), is_key))
139135

140136
# get the writer
141137
writer = self.id_to_writers[schema_id]

confluent_kafka/producer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ class Producer(_impl):
3333
:param func key_serializer(topic, key): Converts key to bytes.
3434
**note** serializers are responsible for handling NULL keys
3535
:param func value_serializer(topic, value): Converts value to bytes.
36-
**note** serializers are responsible for handling NULL keys
36+
**note** serializers are responsible for handling NULL values
3737
:param func error_cb(kafka.KafkaError): Callback for generic/global error events.
3838
:param func stats_cb(json_str): Callback for statistics emitted every ``statistics.interval.ms``.
3939
See https://github.com/edenhill/librdkafka/wiki/Statistics” for more information.

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy