Skip to content

Fix utf8 string conversion memory leak on Python 2 (#198) #239

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 9, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions confluent_kafka/src/Consumer.c
Original file line number Diff line number Diff line change
Expand Up @@ -123,16 +123,17 @@ static PyObject *Consumer_subscribe (Handle *self, PyObject *args,
topics = rd_kafka_topic_partition_list_new((int)PyList_Size(tlist));
for (pos = 0 ; pos < PyList_Size(tlist) ; pos++) {
PyObject *o = PyList_GetItem(tlist, pos);
PyObject *uo;
PyObject *uo, *uo8;
if (!(uo = cfl_PyObject_Unistr(o))) {
PyErr_Format(PyExc_TypeError,
"expected list of unicode strings");
rd_kafka_topic_partition_list_destroy(topics);
return NULL;
}
rd_kafka_topic_partition_list_add(topics,
cfl_PyUnistr_AsUTF8(uo),
cfl_PyUnistr_AsUTF8(uo, &uo8),
RD_KAFKA_PARTITION_UA);
Py_XDECREF(uo8);
Py_DECREF(uo);
}

Expand Down Expand Up @@ -284,6 +285,7 @@ static PyObject *Consumer_commit (Handle *self, PyObject *args,
return NULL;
} else if (msg) {
Message *m;
PyObject *uo8;

if (PyObject_Type((PyObject *)msg) !=
(PyObject *)&MessageType) {
Expand All @@ -296,8 +298,9 @@ static PyObject *Consumer_commit (Handle *self, PyObject *args,

c_offsets = rd_kafka_topic_partition_list_new(1);
rd_kafka_topic_partition_list_add(
c_offsets, cfl_PyUnistr_AsUTF8(m->topic),
c_offsets, cfl_PyUnistr_AsUTF8(m->topic, &uo8),
m->partition)->offset =m->offset + 1;
Py_XDECREF(uo8);

} else {
c_offsets = NULL;
Expand Down
55 changes: 40 additions & 15 deletions confluent_kafka/src/confluent_kafka.c
Original file line number Diff line number Diff line change
Expand Up @@ -664,19 +664,27 @@ static PyMemberDef TopicPartition_members[] = {


static PyObject *TopicPartition_str0 (TopicPartition *self) {
PyObject *errstr = self->error == Py_None ? NULL :
cfl_PyObject_Unistr(self->error);
PyObject *errstr = NULL;
PyObject *errstr8 = NULL;
const char *c_errstr = NULL;
PyObject *ret;
char offset_str[40];

snprintf(offset_str, sizeof(offset_str), "%"PRId64"", self->offset);

if (self->error != Py_None) {
errstr = cfl_PyObject_Unistr(self->error);
c_errstr = cfl_PyUnistr_AsUTF8(errstr, &errstr8);
}

ret = cfl_PyUnistr(
_FromFormat("TopicPartition{topic=%s,partition=%"PRId32
",offset=%s,error=%s}",
self->topic, self->partition,
offset_str,
errstr ? cfl_PyUnistr_AsUTF8(errstr) : "None"));
if (errstr)
Py_DECREF(errstr);
c_errstr ? c_errstr : "None"));
Py_XDECREF(errstr8);
Py_XDECREF(errstr);
return ret;
}

Expand Down Expand Up @@ -996,8 +1004,8 @@ static int populate_topic_conf (rd_kafka_topic_conf_t *tconf, const char *what,
}

while (PyDict_Next(dict, &pos, &ko, &vo)) {
PyObject *ks;
PyObject *vs;
PyObject *ks, *ks8;
PyObject *vs, *vs8;
const char *k;
const char *v;
char errstr[256];
Expand All @@ -1017,19 +1025,23 @@ static int populate_topic_conf (rd_kafka_topic_conf_t *tconf, const char *what,
return -1;
}

k = cfl_PyUnistr_AsUTF8(ks);
v = cfl_PyUnistr_AsUTF8(vs);
k = cfl_PyUnistr_AsUTF8(ks, &ks8);
v = cfl_PyUnistr_AsUTF8(vs, &vs8);

if (rd_kafka_topic_conf_set(tconf, k, v,
errstr, sizeof(errstr)) !=
RD_KAFKA_CONF_OK) {
cfl_PyErr_Format(RD_KAFKA_RESP_ERR__INVALID_ARG,
"%s: %s", what, errstr);
Py_XDECREF(ks8);
Py_XDECREF(vs8);
Py_DECREF(ks);
Py_DECREF(vs);
return -1;
}

Py_XDECREF(ks8);
Py_XDECREF(vs8);
Py_DECREF(ks);
Py_DECREF(vs);
}
Expand Down Expand Up @@ -1070,7 +1082,8 @@ static int producer_conf_set_special (Handle *self, rd_kafka_conf_t *conf,
if ((vs = cfl_PyObject_Unistr(valobj))) {
/* Use built-in C partitioners,
* based on their name. */
val = cfl_PyUnistr_AsUTF8(vs);
PyObject *vs8;
val = cfl_PyUnistr_AsUTF8(vs, &vs8);

if (!strcmp(val, "random"))
rd_kafka_topic_conf_set_partitioner_cb(
Expand All @@ -1087,10 +1100,12 @@ static int producer_conf_set_special (Handle *self, rd_kafka_conf_t *conf,
"unknown builtin partitioner: %s "
"(available: random, consistent, consistent_random)",
val);
Py_XDECREF(vs8);
Py_DECREF(vs);
return -1;
}

Py_XDECREF(vs8);
Py_DECREF(vs);

} else {
Expand Down Expand Up @@ -1210,8 +1225,8 @@ rd_kafka_conf_t *common_conf_setup (rd_kafka_type_t ktype,

/* Convert kwargs dict to config key-value pairs. */
while (PyDict_Next(kwargs, &pos, &ko, &vo)) {
PyObject *ks;
PyObject *vs = NULL;
PyObject *ks, *ks8;
PyObject *vs = NULL, *vs8 = NULL;
const char *k;
const char *v;
char errstr[256];
Expand All @@ -1226,15 +1241,15 @@ rd_kafka_conf_t *common_conf_setup (rd_kafka_type_t ktype,
return NULL;
}

k = cfl_PyUnistr_AsUTF8(ks);
k = cfl_PyUnistr_AsUTF8(ks, &ks8);
if (!strcmp(k, "default.topic.config")) {
if (populate_topic_conf(tconf, k, vo) == -1) {
Py_DECREF(ks);
rd_kafka_topic_conf_destroy(tconf);
rd_kafka_conf_destroy(conf);
return NULL;
}

Py_XDECREF(ks8);
Py_DECREF(ks);
continue;

Expand All @@ -1245,6 +1260,7 @@ rd_kafka_conf_t *common_conf_setup (rd_kafka_type_t ktype,
"as a callable function");
rd_kafka_topic_conf_destroy(tconf);
rd_kafka_conf_destroy(conf);
Py_XDECREF(ks8);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know it becomes inconsistent with the way ks is handled, but couldn't we just Py_XDECREF immediately so we only have it once and don't have to scatter it on all of these paths? In fact, it looks like we could do that for ks in some of the cases that have been converted here.

Copy link
Contributor Author

@edenhill edenhill Sep 5, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On Python 2.x ks8 will point to another object than ks, namely the utf8 representation of ks, and since k is a char pointer to ks's underlying memory we must retain the reference until all usage of k is over.

On Python 3 ks8 will always be NULL, thus causing XDECREF to be a no-op.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ack, makes sense. Kinda sucks that the API naming doesn't make it really obvious when you're getting internal data vs a completely new object.

Py_DECREF(ks);
return NULL;
}
Expand All @@ -1256,6 +1272,7 @@ rd_kafka_conf_t *common_conf_setup (rd_kafka_type_t ktype,
h->error_cb = vo;
Py_INCREF(h->error_cb);
}
Py_XDECREF(ks8);
Py_DECREF(ks);
continue;
} else if (!strcmp(k, "stats_cb")) {
Expand All @@ -1265,6 +1282,7 @@ rd_kafka_conf_t *common_conf_setup (rd_kafka_type_t ktype,
"as a callable function");
rd_kafka_topic_conf_destroy(tconf);
rd_kafka_conf_destroy(conf);
Py_XDECREF(ks8);
Py_DECREF(ks);
return NULL;
}
Expand All @@ -1277,6 +1295,7 @@ rd_kafka_conf_t *common_conf_setup (rd_kafka_type_t ktype,
h->stats_cb = vo;
Py_INCREF(h->stats_cb);
}
Py_XDECREF(ks8);
Py_DECREF(ks);
continue;
}
Expand All @@ -1288,6 +1307,7 @@ rd_kafka_conf_t *common_conf_setup (rd_kafka_type_t ktype,
r = consumer_conf_set_special(h, conf, tconf, k, vo);
if (r == -1) {
/* Error */
Py_XDECREF(ks8);
Py_DECREF(ks);
rd_kafka_topic_conf_destroy(tconf);
rd_kafka_conf_destroy(conf);
Expand All @@ -1312,10 +1332,11 @@ rd_kafka_conf_t *common_conf_setup (rd_kafka_type_t ktype,
"unicode string");
rd_kafka_topic_conf_destroy(tconf);
rd_kafka_conf_destroy(conf);
Py_XDECREF(ks8);
Py_DECREF(ks);
return NULL;
}
v = cfl_PyUnistr_AsUTF8(vs);
v = cfl_PyUnistr_AsUTF8(vs, &vs8);
}

if (rd_kafka_conf_set(conf, k, v, errstr, sizeof(errstr)) !=
Expand All @@ -1324,12 +1345,16 @@ rd_kafka_conf_t *common_conf_setup (rd_kafka_type_t ktype,
"%s", errstr);
rd_kafka_topic_conf_destroy(tconf);
rd_kafka_conf_destroy(conf);
Py_XDECREF(vs8);
Py_XDECREF(vs);
Py_XDECREF(ks8);
Py_DECREF(ks);
return NULL;
}

Py_XDECREF(vs8);
Py_XDECREF(vs);
Py_XDECREF(ks8);
Py_DECREF(ks);
}

Expand Down
15 changes: 13 additions & 2 deletions confluent_kafka/src/confluent_kafka.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,15 @@

/**
* @returns Unicode Python object as char * in UTF-8 encoding
* @param uobjp might be set to NULL or a new object reference (depending
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Along the same lines as my other comment -- why not Py_XDECREF immediately inside these methods? Doesn't seem like there would ever be a case where we'd want to save this intermediate data, especially since it is only an issue in Py2..

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this comment is misleading, it is not a new object reference to the original object, but a new object reference to an utf8 reprsentation object of the original object, do you object?

* on Python version) which needs to be cleaned up with
* Py_XDECREF() after finished use of the returned string.
*/
#define cfl_PyUnistr_AsUTF8(X) PyUnicode_AsUTF8(X)
static __inline const char *
cfl_PyUnistr_AsUTF8 (PyObject *o, PyObject **uobjp) {
*uobjp = NULL; /* No intermediary object needed in Py3 */
return PyUnicode_AsUTF8(o);
}

/**
* @returns Unicode Python string object
Expand All @@ -77,7 +84,11 @@
/* See comments above */
#define cfl_PyBin(X) PyString ## X
#define cfl_PyUnistr(X) PyUnicode ## X
#define cfl_PyUnistr_AsUTF8(X) PyBytes_AsString(PyUnicode_AsUTF8String(X))
static __inline const char *
cfl_PyUnistr_AsUTF8 (PyObject *o, PyObject **uobjp) {
*uobjp = PyUnicode_AsUTF8String(o); /*UTF8 intermediary object on Py2*/
return PyBytes_AsString(*uobjp);
}
#define cfl_PyObject_Unistr(X) PyObject_Unicode(X)
#endif

Expand Down
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy