Skip to content

Commit 4e4146f

Browse files
authored
Merge pull request confluentinc#99 from axiom-data-science/avrocleanup
AVRO fix, some cleanup, and tox.ini changes
2 parents 956ff94 + af9a87e commit 4e4146f

14 files changed

+242
-105
lines changed

README.md

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -157,16 +157,18 @@ Tests
157157

158158
In order to run full test suite, simply execute:
159159

160-
$ tox
160+
$ tox -r
161161

162-
**NOTE**: Requires `tox` ( please install with `pip install tox` ) and several supported versions of Python.
162+
**NOTE**: Requires `tox` (please install with `pip install tox`), several supported versions of Python on your path, and `librdkafka` [installed](tools/bootstrap-librdkafka.sh) into `tmp-build`.
163163

164164

165165
**Run integration tests:**
166166

167-
$ examples/integration_test.py <kafka-broker> [<test-topic>]
167+
To run the integration tests, uncomment the following line from `tox.ini` and add the paths to your Kafka and Confluent Schema Registry instances. If no Schema Registry path is provided then no AVRO tests will by run. You can also run the integration tests outside of `tox` by running this command from the source root.
168168

169-
**WARNING**: These tests require an active Kafka cluster and will make use of a topic named 'test'.
169+
examples/integration_test.py <kafka-broker> [<test-topic>] [<schema-registry>]
170+
171+
**WARNING**: These tests require an active Kafka cluster and will create new topics.
170172

171173

172174

confluent_kafka/avro/__init__.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,9 @@ def __str__(self):
5353

5454

5555
from confluent_kafka.avro.cached_schema_registry_client import CachedSchemaRegistryClient
56-
from confluent_kafka.avro.serializer import SerializerError
56+
from confluent_kafka.avro.serializer import (SerializerError,
57+
KeySerializerError,
58+
ValueSerializerError)
5759
from confluent_kafka.avro.serializer.message_serializer import MessageSerializer
5860

5961

@@ -85,9 +87,9 @@ def produce(self, **kwargs):
8587
"""
8688
Sends message to kafka by encoding with specified avro schema
8789
@:param: topic: topic name
88-
@:param: value: A dictionary object
90+
@:param: value: An object to serialize
8991
@:param: value_schema : Avro schema for value
90-
@:param: key: A dictionary object
92+
@:param: key: An object to serialize
9193
@:param: key_schema : Avro schema for key
9294
@:exception: SerializerError
9395
"""
@@ -99,17 +101,19 @@ def produce(self, **kwargs):
99101
raise ClientError("Topic name not specified.")
100102
value = kwargs.pop('value', None)
101103
key = kwargs.pop('key', None)
104+
102105
if value:
103106
if value_schema:
104107
value = self._serializer.encode_record_with_schema(topic, value_schema, value)
105108
else:
106-
raise SerializerError("Avro schema required for value")
109+
raise ValueSerializerError("Avro schema required for values")
107110

108111
if key:
109112
if key_schema:
110113
key = self._serializer.encode_record_with_schema(topic, key_schema, key, True)
111114
else:
112-
raise SerializerError("Avro schema required for key")
115+
raise KeySerializerError("Avro schema required for key")
116+
113117

114118
super(AvroProducer, self).produce(topic, value, key, **kwargs)
115119

@@ -133,16 +137,18 @@ def __init__(self, config):
133137
super(AvroConsumer, self).__init__(config)
134138
self._serializer = MessageSerializer(CachedSchemaRegistryClient(url=schem_registry_url))
135139

136-
def poll(self, timeout):
140+
def poll(self, timeout=None):
137141
"""
138142
This is an overriden method from confluent_kafka.Consumer class. This handles message
139143
deserialization using avro schema
140144
141145
@:param timeout
142146
@:return message object with deserialized key and value as dict objects
143147
"""
148+
if timeout is None:
149+
timeout = -1
144150
message = super(AvroConsumer, self).poll(timeout)
145-
if not message:
151+
if not message.value() and not message.key():
146152
return message
147153
if not message.error():
148154
if message.value() is not None:
Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,40 @@
1+
#!/usr/bin/env python
2+
#
3+
# Copyright 2016 Confluent Inc.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
18+
119
class SerializerError(Exception):
220
"""Generic error from serializer package"""
321

422
def __init__(self, message):
523
self.message = message
624

725
def __repr__(self):
8-
return 'SerializerError(error={error})'.format(error=self.message)
26+
return '{klass}(error={error})'.format(
27+
klass=self.__class__.__name__,
28+
error=self.message
29+
)
930

1031
def __str__(self):
1132
return self.message
33+
34+
35+
class KeySerializerError(SerializerError):
36+
pass
37+
38+
39+
class ValueSerializerError(SerializerError):
40+
pass

confluent_kafka/avro/serializer/message_serializer.py

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,9 @@
2929
import avro.io
3030

3131
from confluent_kafka.avro import ClientError
32-
from . import SerializerError
32+
from confluent_kafka.avro.serializer import (SerializerError,
33+
KeySerializerError,
34+
ValueSerializerError)
3335

3436
log = logging.getLogger(__name__)
3537

@@ -83,47 +85,49 @@ def encode_record_with_schema(self, topic, schema, record, is_key=False):
8385
The schema is registered with the subject of 'topic-value'
8486
@:param topic : Topic name
8587
@:param schema : Avro Schema
86-
@:param record : A dictionary object
88+
@:param record : An object to serialize
89+
@:param is_key : If the record is a key
8790
@:returns : Encoded record with schema ID as bytes
8891
"""
89-
if not isinstance(record, dict):
90-
raise SerializerError("record must be a dictionary")
92+
serialize_err = KeySerializerError if is_key else ValueSerializerError
93+
9194
subject_suffix = ('-key' if is_key else '-value')
9295
# get the latest schema for the subject
9396
subject = topic + subject_suffix
9497
# register it
9598
schema_id = self.registry_client.register(subject, schema)
9699
if not schema_id:
97100
message = "Unable to retrieve schema id for subject %s" % (subject)
98-
raise SerializerError(message)
101+
raise serialize_err(message)
99102

100103
# cache writer
101104
self.id_to_writers[schema_id] = avro.io.DatumWriter(schema)
102105

103-
return self.encode_record_with_schema_id(schema_id, record)
106+
return self.encode_record_with_schema_id(schema_id, record, is_key=is_key)
104107

105-
def encode_record_with_schema_id(self, schema_id, record):
108+
def encode_record_with_schema_id(self, schema_id, record, is_key=False):
106109
"""
107110
Encode a record with a given schema id. The record must
108111
be a python dictionary.
109112
@:param: schema_id : integer ID
110-
@:param: record : A dictionary object
113+
@:param: record : An object to serialize
114+
@:param is_key : If the record is a key
111115
@:returns: decoder function
112116
"""
113-
if not isinstance(record, dict):
114-
raise SerializerError("record must be a dictionary")
117+
serialize_err = KeySerializerError if is_key else ValueSerializerError
118+
115119
# use slow avro
116120
if schema_id not in self.id_to_writers:
117121
# get the writer + schema
118122

119123
try:
120124
schema = self.registry_client.get_by_id(schema_id)
121125
if not schema:
122-
raise SerializerError("Schema does not exist")
126+
raise serialize_err("Schema does not exist")
123127
self.id_to_writers[schema_id] = avro.io.DatumWriter(schema)
124128
except ClientError as e:
125129
exc_type, exc_value, exc_traceback = sys.exc_info()
126-
raise SerializerError("Error fetching schema from registry:" + repr(
130+
raise serialize_err( + repr(
127131
traceback.format_exception(exc_type, exc_value, exc_traceback)))
128132

129133
# get the writer

examples/integration_test.py

Lines changed: 90 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,12 @@
2020
""" Test script for confluent_kafka module """
2121

2222
import confluent_kafka
23-
import re
23+
import os
2424
import time
2525
import uuid
2626
import sys
2727
import json
28+
from copy import copy
2829

2930
try:
3031
from progress.bar import Bar
@@ -35,6 +36,9 @@
3536
# Kafka bootstrap server(s)
3637
bootstrap_servers = None
3738

39+
# Confluent schema-registry
40+
schema_registry = None
41+
3842
# Topic to use
3943
topic = 'test'
4044

@@ -50,7 +54,7 @@
5054
def error_cb (err):
5155
print('Error: %s' % err)
5256

53-
57+
5458
class MyTestDr(object):
5559
""" Producer: Delivery report callback """
5660

@@ -124,6 +128,83 @@ def verify_producer():
124128
p.flush()
125129

126130

131+
def verify_avro():
132+
from confluent_kafka import avro
133+
avsc_dir = os.path.join(os.path.dirname(__file__), os.pardir, 'tests', 'avro')
134+
135+
# Producer config
136+
conf = {'bootstrap.servers': bootstrap_servers,
137+
'schema.registry.url': schema_registry,
138+
'error_cb': error_cb,
139+
'api.version.request': api_version_request,
140+
'default.topic.config': {'produce.offset.report': True}}
141+
142+
# Create producer
143+
p = avro.AvroProducer(conf)
144+
145+
prim_float = avro.load(os.path.join(avsc_dir, "primitive_float.avsc"))
146+
prim_string = avro.load(os.path.join(avsc_dir, "primitive_string.avsc"))
147+
basic = avro.load(os.path.join(avsc_dir, "basic_schema.avsc"))
148+
str_value = 'abc'
149+
float_value = 32.
150+
151+
combinations = [
152+
dict(key=float_value, key_schema=prim_float),
153+
dict(value=float_value, value_schema=prim_float),
154+
dict(key={'name': 'abc'}, key_schema=basic),
155+
dict(value={'name': 'abc'}, value_schema=basic),
156+
dict(value={'name': 'abc'}, value_schema=basic, key=float_value, key_schema=prim_float),
157+
dict(value={'name': 'abc'}, value_schema=basic, key=str_value, key_schema=prim_string),
158+
dict(value=float_value, value_schema=prim_float, key={'name': 'abc'}, key_schema=basic),
159+
dict(value=float_value, value_schema=prim_float, key=str_value, key_schema=prim_string),
160+
dict(value=str_value, value_schema=prim_string, key={'name': 'abc'}, key_schema=basic),
161+
dict(value=str_value, value_schema=prim_string, key=float_value, key_schema=prim_float),
162+
]
163+
164+
# Consumer config
165+
cons_conf = {'bootstrap.servers': bootstrap_servers,
166+
'schema.registry.url': schema_registry,
167+
'group.id': 'test.py',
168+
'session.timeout.ms': 6000,
169+
'enable.auto.commit': False,
170+
'api.version.request': api_version_request,
171+
'on_commit': print_commit_result,
172+
'error_cb': error_cb,
173+
'default.topic.config': {
174+
'auto.offset.reset': 'earliest'
175+
}}
176+
177+
for i, combo in enumerate(combinations):
178+
combo['topic'] = str(uuid.uuid4())
179+
p.produce(**combo)
180+
p.poll(0)
181+
p.flush()
182+
183+
# Create consumer
184+
c = avro.AvroConsumer(copy(cons_conf))
185+
c.subscribe([combo['topic']])
186+
187+
while True:
188+
msg = c.poll(0)
189+
if msg is None:
190+
continue
191+
192+
if msg.error():
193+
if msg.error().code() == confluent_kafka.KafkaError._PARTITION_EOF:
194+
break
195+
else:
196+
continue
197+
198+
tstype, timestamp = msg.timestamp()
199+
print('%s[%d]@%d: key=%s, value=%s, tstype=%d, timestamp=%s' %
200+
(msg.topic(), msg.partition(), msg.offset(),
201+
msg.key(), msg.value(), tstype, timestamp))
202+
203+
c.commit(msg, async=False)
204+
205+
# Close consumer
206+
c.close()
207+
127208

128209
def verify_producer_performance(with_dr_cb=True):
129210
""" Time how long it takes to produce and delivery X messages """
@@ -285,8 +366,6 @@ def verify_consumer():
285366
c.close()
286367

287368

288-
289-
290369
def verify_consumer_performance():
291370
""" Verify Consumer performance """
292371

@@ -450,8 +529,10 @@ def stats_cb(stats_json_str):
450529
bootstrap_servers = sys.argv[1]
451530
if len(sys.argv) > 2:
452531
topic = sys.argv[2]
532+
if len(sys.argv) > 3:
533+
schema_registry = sys.argv[3]
453534
else:
454-
print('Usage: %s <broker> [<topic>]' % sys.argv[0])
535+
print('Usage: %s <broker> [<topic>] [<schema_registry>]' % sys.argv[0])
455536
sys.exit(1)
456537

457538
print('Using confluent_kafka module version %s (0x%x)' % confluent_kafka.version())
@@ -475,6 +556,8 @@ def stats_cb(stats_json_str):
475556
print('=' * 30, 'Verifying stats_cb', '=' * 30)
476557
verify_stats_cb()
477558

478-
print('=' * 30, 'Done', '=' * 30)
479-
559+
if schema_registry:
560+
print('=' * 30, 'Verifying AVRO', '=' * 30)
561+
topics = verify_avro()
480562

563+
print('=' * 30, 'Done', '=' * 30)

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,5 +24,5 @@
2424
packages=find_packages(exclude=("tests",)),
2525
data_files = [('', ['LICENSE'])],
2626
extras_require={
27-
'avro': ['fastavro', 'requests', avro, 'unittest2']
27+
'avro': ['fastavro', 'requests', avro]
2828
})

tests/avro/primitive_float.avsc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{
2+
"type": "float"
3+
}

tests/avro/primitive_string.avsc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{
2+
"type": "string"
3+
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy