Skip to content

Commit f821652

Browse files
committed
Producer: Header values may now be unicode strings (auto convert to UTF8)
.. and a bunch of header test fixes.
1 parent f669580 commit f821652

File tree

4 files changed

+115
-60
lines changed

4 files changed

+115
-60
lines changed

confluent_kafka/src/Producer.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -488,7 +488,7 @@ static PyObject *Producer_flush (Handle *self, PyObject *args,
488488
static PyMethodDef Producer_methods[] = {
489489
{ "produce", (PyCFunction)Producer_produce,
490490
METH_VARARGS|METH_KEYWORDS,
491-
".. py:function:: produce(topic, [value], [key], [partition], [on_delivery], [timestamp])\n"
491+
".. py:function:: produce(topic, [value], [key], [partition], [on_delivery], [timestamp], [headers])\n"
492492
"\n"
493493
" Produce message to topic.\n"
494494
" This is an asynchronous operation, an application may use the "
@@ -510,6 +510,7 @@ static PyMethodDef Producer_methods[] = {
510510
"failed delivery\n"
511511
" :param int timestamp: Message timestamp (CreateTime) in microseconds since epoch UTC (requires librdkafka >= v0.9.4, api.version.request=true, and broker >= 0.10.0.0). Default value is current time.\n"
512512
"\n"
513+
" :param headers dict|list: Message headers to set on the message. The header key must be a string while the value must be binary, unicode or None. Accepts a list of (key,value) or a dict. (Requires librdkafka >= v0.11.4 and broker version >= 0.11.0.0)\n"
513514
" :rtype: None\n"
514515
" :raises BufferError: if the internal producer message queue is "
515516
"full (``queue.buffering.max.messages`` exceeded)\n"

confluent_kafka/src/confluent_kafka.c

Lines changed: 76 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -968,8 +968,77 @@ rd_kafka_topic_partition_list_t *py_to_c_parts (PyObject *plist) {
968968
#ifdef RD_KAFKA_V_HEADERS
969969

970970

971+
/**
972+
* @brief Translate Python \p key and \p value to C types and set on
973+
* provided \p rd_headers object.
974+
*
975+
* @returns 1 on success or 0 if an exception was raised.
976+
*/
977+
static int py_header_to_c (rd_kafka_headers_t *rd_headers,
978+
PyObject *key, PyObject *value) {
979+
PyObject *ks, *ks8, *vo8 = NULL;
980+
const char *k;
981+
const void *v = NULL;
982+
Py_ssize_t vsize = 0;
983+
rd_kafka_resp_err_t err;
984+
985+
if (!(ks = cfl_PyObject_Unistr(key))) {
986+
PyErr_SetString(PyExc_TypeError,
987+
"expected header key to be unicode "
988+
"string");
989+
return 0;
990+
}
991+
992+
k = cfl_PyUnistr_AsUTF8(ks, &ks8);
993+
994+
if (value != Py_None) {
995+
if (cfl_PyBin(_Check(value))) {
996+
/* Proper binary */
997+
if (cfl_PyBin(_AsStringAndSize(value, (char **)&v,
998+
&vsize)) == -1) {
999+
Py_DECREF(ks);
1000+
return 0;
1001+
}
1002+
} else if (cfl_PyUnistr(_Check(value))) {
1003+
/* Unicode string, translate to utf-8. */
1004+
v = cfl_PyUnistr_AsUTF8(value, &vo8);
1005+
if (!v) {
1006+
Py_DECREF(ks);
1007+
return 0;
1008+
}
1009+
vsize = (Py_ssize_t)strlen(v);
1010+
} else {
1011+
PyErr_Format(PyExc_TypeError,
1012+
"expected header value to be "
1013+
"None, binary, or unicode string, not %s",
1014+
((PyTypeObject *)PyObject_Type(value))->
1015+
tp_name);
1016+
return 0;
1017+
}
1018+
}
1019+
1020+
if ((err = rd_kafka_header_add(rd_headers, k, -1, v, vsize))) {
1021+
cfl_PyErr_Format(err,
1022+
"Unable to add message header \"%s\": "
1023+
"%s",
1024+
k, rd_kafka_err2str(err));
1025+
Py_DECREF(ks);
1026+
Py_XDECREF(vo8);
1027+
return 0;
1028+
}
1029+
1030+
Py_DECREF(ks);
1031+
Py_XDECREF(vo8);
1032+
1033+
return 1;
1034+
}
1035+
9711036
/**
9721037
* @brief Convert Python list of tuples to rd_kafka_headers_t
1038+
*
1039+
* Header names must be unicode strong.
1040+
* Header values may be None, binary or unicode string, the latter is
1041+
* automatically encoded as utf-8.
9731042
*/
9741043
static rd_kafka_headers_t *py_headers_list_to_c (PyObject *hdrs) {
9751044
int i, len;
@@ -979,28 +1048,19 @@ static rd_kafka_headers_t *py_headers_list_to_c (PyObject *hdrs) {
9791048
rd_headers = rd_kafka_headers_new(len);
9801049

9811050
for (i = 0; i < len; i++) {
982-
rd_kafka_resp_err_t err;
983-
const char *header_key, *header_value = NULL;
984-
int header_key_len = 0, header_value_len = 0;
1051+
PyObject *tuple = PyList_GET_ITEM(hdrs, i);
9851052

986-
if(!PyArg_ParseTuple(PyList_GET_ITEM(hdrs, i), "s#z#",
987-
&header_key, &header_key_len,
988-
&header_value, &header_value_len)){
1053+
if (!PyTuple_Check(tuple) || PyTuple_Size(tuple) != 2) {
9891054
rd_kafka_headers_destroy(rd_headers);
9901055
PyErr_SetString(PyExc_TypeError,
9911056
"Headers are expected to be a "
992-
"tuple of (key, value)");
1057+
"list of (key, value) tuples");
9931058
return NULL;
9941059
}
9951060

996-
err = rd_kafka_header_add(rd_headers,
997-
header_key, header_key_len,
998-
header_value, header_value_len);
999-
if (err) {
1000-
cfl_PyErr_Format(err,
1001-
"Unable to add message header \"%s\": "
1002-
"%s",
1003-
header_key, rd_kafka_err2str(err));
1061+
if (!py_header_to_c(rd_headers,
1062+
PyTuple_GET_ITEM(tuple, 0),
1063+
PyTuple_GET_ITEM(tuple, 1))) {
10041064
rd_kafka_headers_destroy(rd_headers);
10051065
return NULL;
10061066
}
@@ -1022,42 +1082,11 @@ static rd_kafka_headers_t *py_headers_dict_to_c (PyObject *hdrs) {
10221082
rd_headers = rd_kafka_headers_new(len);
10231083

10241084
while (PyDict_Next(hdrs, &pos, &ko, &vo)) {
1025-
PyObject *ks, *ks8;
1026-
const char *k;
1027-
const void *v = NULL;
1028-
Py_ssize_t vsize = 0;
1029-
rd_kafka_resp_err_t err;
1030-
1031-
if (!(ks = cfl_PyObject_Unistr(ko))) {
1032-
PyErr_SetString(PyExc_TypeError,
1033-
"expected header key to be unicode "
1034-
"string");
1035-
rd_kafka_headers_destroy(rd_headers);
1036-
return NULL;
1037-
}
10381085

1039-
k = cfl_PyUnistr_AsUTF8(ks, &ks8);
1040-
1041-
if (vo != Py_None) {
1042-
if (cfl_PyBin(_AsStringAndSize(vo, (char **)&v,
1043-
&vsize)) == -1) {
1044-
Py_DECREF(ks);
1045-
rd_kafka_headers_destroy(rd_headers);
1046-
return NULL;
1047-
}
1048-
}
1049-
1050-
if ((err = rd_kafka_header_add(rd_headers, k, -1, v, vsize))) {
1051-
cfl_PyErr_Format(err,
1052-
"Unable to add message header \"%s\": "
1053-
"%s",
1054-
k, rd_kafka_err2str(err));
1055-
Py_DECREF(ks);
1086+
if (!py_header_to_c(rd_headers, ko, vo)) {
10561087
rd_kafka_headers_destroy(rd_headers);
10571088
return NULL;
10581089
}
1059-
1060-
Py_DECREF(ks);
10611090
}
10621091

10631092
return rd_headers;

examples/integration_test.py

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,25 @@
6161
# global variable to be set by stats_cb call back function
6262
good_stats_cb_result = False
6363

64+
# Shared between producer and consumer tests and used to verify
65+
# that consumed headers are what was actually produced.
66+
produce_headers = [('foo1', 'bar'),
67+
('foo1', 'bar2'),
68+
('foo2', b'1'),
69+
('Jämtland', 'Härjedalen'), # automatically utf-8 encoded
70+
('nullheader', None),
71+
('empty', ''),
72+
('foobin', struct.pack('hhl', 10, 20, 30))]
73+
74+
# Identical to produce_headers but with proper binary typing
75+
expected_headers = [('foo1', b'bar'),
76+
('foo1', b'bar2'),
77+
('foo2', b'1'),
78+
('Jämtland', b'H\xc3\xa4rjedalen'), # not automatically utf-8 decoded
79+
('nullheader', None),
80+
('empty', b''),
81+
('foobin', struct.pack('hhl', 10, 20, 30))]
82+
6483

6584
def error_cb(err):
6685
print('Error: %s' % err)
@@ -126,8 +145,7 @@ def verify_producer():
126145
p = confluent_kafka.Producer(**conf)
127146
print('producer at %s' % p)
128147

129-
headers = [('foo1', 'bar'), ('foo1', 'bar2'), ('foo2', b'1'),
130-
('foobin', struct.pack('hhl', 10, 20, 30))]
148+
headers = produce_headers
131149

132150
# Produce some messages
133151
p.produce(topic, 'Hello Python!', headers=headers)
@@ -444,7 +462,7 @@ def verify_consumer_seek(c, seek_to_msg):
444462
msg = c.poll()
445463
assert msg is not None
446464
if msg.error():
447-
print('seek: Ignoring non-message: %s' % msg)
465+
print('seek: Ignoring non-message: %s' % msg.error())
448466
continue
449467

450468
if msg.topic() != seek_to_msg.topic() or msg.partition() != seek_to_msg.partition():
@@ -489,7 +507,7 @@ def print_wmark(consumer, parts):
489507

490508
first_msg = None
491509

492-
example_header = None
510+
example_headers = None
493511

494512
while True:
495513
# Consume until EOF or error
@@ -511,7 +529,7 @@ def print_wmark(consumer, parts):
511529
tstype, timestamp = msg.timestamp()
512530
headers = msg.headers()
513531
if headers:
514-
example_header = headers
532+
example_headers = headers
515533

516534
msg.set_headers([('foo', 'bar')])
517535
assert msg.headers() == [('foo', 'bar')]
@@ -544,15 +562,13 @@ def print_wmark(consumer, parts):
544562
print('Sync committed offset: %s' % offsets)
545563

546564
msgcnt += 1
547-
if msgcnt >= max_msgcnt and example_header is not None:
565+
if msgcnt >= max_msgcnt and example_headers is not None:
548566
print('max_msgcnt %d reached' % msgcnt)
549567
break
550568

551-
assert example_header, "We should have received at least one header"
552-
assert example_header == [(u'foo1', 'bar'),
553-
(u'foo1', 'bar2'),
554-
(u'foo2', '1'),
555-
('foobin', struct.pack('hhl', 10, 20, 30))]
569+
assert example_headers, "We should have received at least one header"
570+
assert example_headers == expected_headers, \
571+
"example header mismatch:\n{}\nexpected:\n{}".format(example_headers, expected_headers)
556572

557573
# Get current assignment
558574
assignment = c.assignment()

tests/test_Producer.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
23
import pytest
34

45
from confluent_kafka import Producer, KafkaError, KafkaException, libversion
@@ -79,15 +80,20 @@ def test_produce_headers():
7980
[('dupkey', 'dupvalue'), ('dupkey', 'diffvalue')],
8081
[('key_with_null_value', None)],
8182
[('binaryval', binval)],
83+
[('alreadyutf8', u'Småland'.encode('utf-8'))],
84+
[('isunicode', 'Jämtland')],
8285

8386
{'headerkey': 'headervalue'},
8487
{'dupkey': 'dupvalue', 'empty': '', 'dupkey': 'dupvalue'}, # noqa: F601
8588
{'dupkey': 'dupvalue', 'dupkey': 'diffvalue'}, # noqa: F601
8689
{'key_with_null_value': None},
87-
{'binaryval': binval}
90+
{'binaryval': binval},
91+
{'alreadyutf8': u'Småland'.encode('utf-8')},
92+
{'isunicode': 'Jämtland'}
8893
]
8994

9095
for headers in headers_to_test:
96+
print('headers', type(headers), headers)
9197
p.produce('mytopic', value='somedata', key='a key', headers=headers)
9298
p.produce('mytopic', value='somedata', headers=headers)
9399

@@ -97,6 +103,9 @@ def test_produce_headers():
97103
with pytest.raises(TypeError):
98104
p.produce('mytopic', value='somedata', key='a key', headers=[('malformed_header')])
99105

106+
with pytest.raises(TypeError):
107+
p.produce('mytopic', value='somedata', headers={'anint': 1234})
108+
100109
p.flush()
101110

102111

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy