24
24
import traceback
25
25
26
26
from confluent_kafka .avro .schema import GenericAvroRecord , get_schema
27
- from confluent_kafka .avro import ClientError
27
+ from confluent_kafka .avro . error import ClientError
28
28
29
29
log = logging .getLogger (__name__ )
30
30
@@ -47,7 +47,7 @@ def __new__(cls, message, is_key=False):
47
47
return super (SerializerError , cls ).__new__ (KeySerializerError , message )
48
48
return super (SerializerError , cls ).__new__ (ValueSerializerError , message )
49
49
50
- def __init__ (self , message ):
50
+ def __init__ (self , message , is_key = False ):
51
51
self .message = message
52
52
53
53
def __repr__ (self ):
@@ -81,31 +81,101 @@ def __exit__(self, *args):
81
81
return False
82
82
83
83
84
+ def TopicNameStrategy (topic = None , is_key = False , schema = None ):
85
+ """
86
+ Constructs the subject name under which a schema is registered with the Confluent Schema Registry.
87
+
88
+ TopicNameStrategy returns the schema's subject in the form of <topic>-key or <topic>-value.
89
+
90
+ :param str topic: Topic name.
91
+ :param is_key: True if subject is being registered for a message key.
92
+ :param schema schema: Parsed Avro schema. *Note* Not used by TopicNameStrategy
93
+ :raises ValueError: If topic is unset.
94
+ :returns: The subject name with which to register the schema.
95
+ :rtype: str
96
+ """
97
+ if topic is None :
98
+ raise ValueError ("Topic must be set when using TopicNameStrategy" )
99
+
100
+ return "-" .join ([topic , '-key' if is_key else '-value' ])
101
+
102
+
103
+ def RecordNameStrategy (topic = None , is_key = False , schema = None ):
104
+ """
105
+ Constructs the subject name under which a schema is registered with the Confluent Schema Registry.
106
+
107
+ RecordNameStrategy returns the fully-qualified record name regardless of the topic.
108
+
109
+ Compatibility checks of the same record name across all topics.
110
+ This strategy allows a topic to contain a mixture of different record types.
111
+
112
+ :param str topic: Topic name. *Note* Not used by RecordNameStrategy
113
+ :param is_key: True if subject is being registered for a message key. *Note* Not used by RecordNameStrategy.
114
+ :param schema schema: Parsed Avro schema.
115
+ :raises ValueError: If schema is not set.
116
+ :returns: The subject name with which to register the schema.
117
+ :rtype: str
118
+ """
119
+ if schema is None :
120
+ raise ValueError ("Schema must be set when using RecordNameStategy" )
121
+
122
+ return schema .fullname
123
+
124
+
125
+ def TopicRecordNameStrategy (topic = None , is_key = False , schema = None ):
126
+ """
127
+ Constructs the subject name under which a schema is registered with the Confluent Schema Registry.
128
+
129
+ TopicRecordNameStrategy returns the topic name appended by the fully-qualified record name.
130
+
131
+ Compatibility checks are performed against all records of the same name within the same topic.
132
+ Like the RecordNameStrategy mixed record types are allowed within a topic.
133
+ This strategy is more flexible in that records needn't be complaint across the cluster.
134
+
135
+ :param str topic: Topic name.
136
+ :param schema schema: Parsed Avro schema.
137
+ :param is_key: True if used by a key_serializer.
138
+ :raises ValueError: If topic and schema are not set.
139
+ :returns: The subject name with which to register the schema.
140
+ :rtype: str
141
+ """
142
+ if not any ([topic , schema ]):
143
+ raise ValueError ("Both Topic and Schema must be set when using TopicRecordNameStrategy" )
144
+ return "-" .join ([topic , schema .fullname ])
145
+
146
+
84
147
class AvroSerializer (object ):
148
+ """
149
+ Encodes kafka messages as Avro; registering the schema with the Confluent Schema Registry.
150
+
151
+ :param registry_client CachedSchemaRegistryClient: Instance of CachedSchemaRegistryClient.
152
+ :param bool is_key: True if configured as a key_serializer.
153
+ :param func(str, bool, schema): Returns the subject name used when registering schemas.
154
+ """
85
155
86
- __slots__ = ["registry_client" , "codec_cache" , "is_key" ]
156
+ __slots__ = ["registry_client" , "codec_cache" , "is_key" , "subject_strategy" ]
87
157
88
- def __init__ (self , registry_client , is_key = False ):
158
+ def __init__ (self , registry_client , is_key = False , subject_strategy = TopicNameStrategy ):
89
159
self .registry_client = registry_client
90
160
self .codec_cache = {}
91
161
self .is_key = is_key
162
+ self .subject_strategy = subject_strategy
92
163
93
164
def __call__ (self , topic , record ):
94
165
"""
95
166
Given a parsed avro schema, encode a record for the given topic.
96
167
97
- The schema is registered with the subject of 'topic-value'
98
- :param str topic: Topic name
99
- :param GenericAvroRecord record: An object to serialize
100
- :returns: Encoded record with schema ID as bytes
168
+ The schema is registered with the subject of 'topic-value'.
169
+ :param str topic: Topic name.
170
+ :param GenericAvroRecord record: An object to serialize.
171
+ :returns: Encoded record with schema ID as bytes.
101
172
:rtype: bytes
102
173
"""
103
174
104
175
if record is None :
105
176
return None
106
177
107
- subject_suffix = '-key' if self .is_key else '-value'
108
- subject = topic + subject_suffix
178
+ subject = self .subject_strategy (topic , self .is_key , get_schema (record ))
109
179
110
180
schema_id = self .registry_client .register (subject , get_schema (record ))
111
181
if not schema_id :
@@ -115,20 +185,19 @@ def __call__(self, topic, record):
115
185
if schema_id not in self .codec_cache :
116
186
self .codec_cache [schema_id ] = self ._get_encoder_func (get_schema (record ))
117
187
118
- return self ._encode_record_with_schema_id (schema_id , record )
188
+ return self ._encode (schema_id , record )
119
189
120
190
def _get_encoder_func (self , writer_schema ):
121
191
if HAS_FAST :
122
192
return lambda record , fp : schemaless_writer (fp , writer_schema .to_json (), record )
123
193
writer = avro .io .DatumWriter (writer_schema )
124
194
return lambda record , fp : writer .write (record , avro .io .BinaryEncoder (fp ))
125
195
126
- def _encode_record_with_schema_id (self , schema_id , record ):
196
+ def _encode (self , schema_id , datum ):
127
197
"""
128
- Encode a record with a given schema id. The record must
129
- be a python dictionary.
198
+ Encode a datum with a given schema id.
130
199
:param int schema_id: integer ID
131
- :param dict record : An object to serialize
200
+ :param object datum : An object to serialize
132
201
:param bool is_key: If the record is a key
133
202
:param SerializerErr err_type: Error type to raise on serialization exception
134
203
:returns: decoder function
@@ -154,12 +223,18 @@ def _encode_record_with_schema_id(self, schema_id, record):
154
223
outf .write (struct .pack ('>bI' , MAGIC_BYTE , schema_id ))
155
224
156
225
# write the record to the rest of the buffer
157
- writer (record , outf )
226
+ writer (datum , outf )
158
227
return outf .getvalue ()
159
228
160
229
161
230
class AvroDeserializer (object ):
231
+ """
232
+ Decodes Kafka messages encoded by Confluent Schema Registry compliant Avro Serializers.
162
233
234
+ :param registry_client CachedSchemaRegistryClient: Instance of CachedSchemaRegistryClient.
235
+ :param bool is_key: True if configured as a key_serializer.
236
+ :param schema reader_schema: Optional reader schema to be used during deserialization.
237
+ """
163
238
__slots__ = ["registry_client" , "codec_cache" , "is_key" , "reader_schema" ]
164
239
165
240
def __init__ (self , registry_client , is_key = False , reader_schema = None ):
@@ -168,22 +243,21 @@ def __init__(self, registry_client, is_key=False, reader_schema=None):
168
243
self .is_key = is_key
169
244
self .reader_schema = reader_schema
170
245
171
- def __call__ (self , topic , message ):
246
+ def __call__ (self , topic , datum ):
172
247
"""
173
- Decode a message from kafka that has been encoded for use with
174
- the schema registry.
175
- :param str|bytes or None message: message key or value to be decoded
176
- :returns: Decoded message contents.
248
+ Decode a datum from kafka that has been encoded for use with the Confluent Schema Registry.
249
+ :param str|bytes or None datum: message key or value to be decoded.
250
+ :returns: Decoded message key or value contents.
177
251
:rtype GenericAvroRecord:
178
252
"""
179
253
180
- if message is None :
254
+ if datum is None :
181
255
return None
182
256
183
- if len (message ) <= 5 :
257
+ if len (datum ) <= 5 :
184
258
raise SerializerError ("message is too small to decode" )
185
259
186
- with ContextStringIO (message ) as payload :
260
+ with ContextStringIO (datum ) as payload :
187
261
magic , schema_id = struct .unpack ('>bI' , payload .read (5 ))
188
262
if magic != MAGIC_BYTE :
189
263
raise SerializerError ("message does not start with magic byte" , self .is_key )
@@ -247,7 +321,7 @@ def decoder(p):
247
321
bin_decoder = avro .io .BinaryDecoder (p )
248
322
return avro_reader .read (bin_decoder )
249
323
250
- if writer_schema .get_prop ( ' type' ) is 'record' :
324
+ if writer_schema .type is 'record' :
251
325
self .codec_cache [schema_id ] = record_decoder
252
326
else :
253
327
self .codec_cache [schema_id ] = decoder
0 commit comments