From 4238f7e012c4683098f93eead6500569c536704e Mon Sep 17 00:00:00 2001 From: Rogdham Date: Sat, 10 May 2025 23:32:22 +0200 Subject: [PATCH 1/4] Remove EndlessZstdDecompressor remains --- Modules/_zstd/decompressor.c | 154 ++++++----------------------------- 1 file changed, 23 insertions(+), 131 deletions(-) diff --git a/Modules/_zstd/decompressor.c b/Modules/_zstd/decompressor.c index 2ed88cd3f231f1..9d5f01e8c51a03 100644 --- a/Modules/_zstd/decompressor.c +++ b/Modules/_zstd/decompressor.c @@ -45,19 +45,10 @@ typedef struct { /* 0 if decompressor has (or may has) unconsumed input data, 0 or 1. */ char needs_input; - /* For decompress(), 0 or 1. - 1 when both input and output streams are at a frame edge, means a - frame is completely decoded and fully flushed, or the decompressor - just be initialized. */ - char at_frame_edge; - /* For ZstdDecompressor, 0 or 1. 1 means the end of the first frame has been reached. */ char eof; - /* Used for fast reset above three variables */ - char _unused_char_for_align; - /* __init__ has been called, 0 or 1. */ bool initialized; } ZstdDecompressor; @@ -258,19 +249,13 @@ _zstd_load_d_dict(ZstdDecompressor *self, PyObject *dict) return 0; } -typedef enum { - TYPE_DECOMPRESSOR, // , ZstdDecompressor class - TYPE_ENDLESS_DECOMPRESSOR, // , decompress() function -} decompress_type; - /* - Given the two types of decompressors (defined above), - decompress implementation for , , pseudo code: + Decompress implementation in pseudo code: initialize_output_buffer while True: decompress_data - set_object_flag # .eof for , .at_frame_edge for . + set_object_flag # .eof if output_buffer_exhausted: if output_buffer_reached_max_length: @@ -287,53 +272,17 @@ typedef enum { flushing to do to complete current frame. Note, decompressing "an empty input" in any case will make it > 0. - - supports multiple frames, has an .at_frame_edge flag, it means both the - input and output streams are at a frame edge. The flag can be set by this - statement: - - .at_frame_edge = (zstd_ret == 0) ? 1 : 0 - - But if decompressing "an empty input" at "a frame edge", zstd_ret will be - non-zero, then .at_frame_edge will be wrongly set to false. To solve this - problem, two AFE checks are needed to ensure that: when at "a frame edge", - empty input will not be decompressed. - - // AFE check - if (self->at_frame_edge && in->pos == in->size) { - finish - } - - In , if .at_frame_edge is eventually set to true, but input stream has - unconsumed data (in->pos < in->size), then the outer function - stream_decompress() will set .at_frame_edge to false. In this case, - although the output stream is at a frame edge, for the caller, the input - stream is not at a frame edge, see below diagram. This behavior does not - affect the next AFE check, since (in->pos < in->size). - - input stream: --------------|--- - ^ - output stream: ====================| - ^ */ static PyObject * decompress_impl(ZstdDecompressor *self, ZSTD_inBuffer *in, Py_ssize_t max_length, - Py_ssize_t initial_size, - decompress_type type) + Py_ssize_t initial_size) { size_t zstd_ret; ZSTD_outBuffer out; _BlocksOutputBuffer buffer = {.list = NULL}; PyObject *ret; - /* The first AFE check for setting .at_frame_edge flag */ - if (type == TYPE_ENDLESS_DECOMPRESSOR) { - if (self->at_frame_edge && in->pos == in->size) { - return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); - } - } - /* Initialize the output buffer */ if (initial_size >= 0) { if (_OutputBuffer_InitWithSize(&buffer, &out, max_length, initial_size) < 0) { @@ -362,22 +311,11 @@ decompress_impl(ZstdDecompressor *self, ZSTD_inBuffer *in, goto error; } - /* Set .eof/.af_frame_edge flag */ - if (type == TYPE_DECOMPRESSOR) { - /* ZstdDecompressor class stops when a frame is decompressed */ - if (zstd_ret == 0) { - self->eof = 1; - break; - } - } - else if (type == TYPE_ENDLESS_DECOMPRESSOR) { - /* decompress() function supports multiple frames */ - self->at_frame_edge = (zstd_ret == 0) ? 1 : 0; - - /* The second AFE check for setting .at_frame_edge flag */ - if (self->at_frame_edge && in->pos == in->size) { - break; - } + /* Set .eof flag */ + if (zstd_ret == 0) { + /* Stop when a frame is decompressed */ + self->eof = 1; + break; } /* Need to check out before in. Maybe zstd's internal buffer still has @@ -415,8 +353,7 @@ decompress_impl(ZstdDecompressor *self, ZSTD_inBuffer *in, } static void -decompressor_reset_session(ZstdDecompressor *self, - decompress_type type) +decompressor_reset_session(ZstdDecompressor *self) { // TODO(emmatyping): use _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED here // and ensure lock is always held @@ -425,56 +362,29 @@ decompressor_reset_session(ZstdDecompressor *self, self->in_begin = 0; self->in_end = 0; - if (type == TYPE_DECOMPRESSOR) { - Py_CLEAR(self->unused_data); - } + Py_CLEAR(self->unused_data); /* Reset variables in one operation */ self->needs_input = 1; - self->at_frame_edge = 1; self->eof = 0; - self->_unused_char_for_align = 0; /* Resetting session never fail */ ZSTD_DCtx_reset(self->dctx, ZSTD_reset_session_only); } static PyObject * -stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length, - decompress_type type) +stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length) { Py_ssize_t initial_buffer_size = -1; ZSTD_inBuffer in; PyObject *ret = NULL; int use_input_buffer; - if (type == TYPE_DECOMPRESSOR) { - /* Check .eof flag */ - if (self->eof) { - PyErr_SetString(PyExc_EOFError, "Already at the end of a zstd frame."); - assert(ret == NULL); - goto success; - } - } - else if (type == TYPE_ENDLESS_DECOMPRESSOR) { - /* Fast path for the first frame */ - if (self->at_frame_edge && self->in_begin == self->in_end) { - /* Read decompressed size */ - uint64_t decompressed_size = ZSTD_getFrameContentSize(data->buf, data->len); - - /* These two zstd constants always > PY_SSIZE_T_MAX: - ZSTD_CONTENTSIZE_UNKNOWN is (0ULL - 1) - ZSTD_CONTENTSIZE_ERROR is (0ULL - 2) - - Use ZSTD_findFrameCompressedSize() to check complete frame, - prevent allocating too much memory for small input chunk. */ - - if (decompressed_size <= (uint64_t) PY_SSIZE_T_MAX && - !ZSTD_isError(ZSTD_findFrameCompressedSize(data->buf, data->len)) ) - { - initial_buffer_size = (Py_ssize_t) decompressed_size; - } - } + /* Check .eof flag */ + if (self->eof) { + PyErr_SetString(PyExc_EOFError, "Already at the end of a zstd frame."); + assert(ret == NULL); + goto success; } /* Prepare input buffer w/wo unconsumed data */ @@ -562,29 +472,18 @@ stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length /* Decompress */ ret = decompress_impl(self, &in, - max_length, initial_buffer_size, - type); + max_length, initial_buffer_size); if (ret == NULL) { goto error; } /* Unconsumed input data */ if (in.pos == in.size) { - if (type == TYPE_DECOMPRESSOR) { - if (Py_SIZE(ret) == max_length || self->eof) { - self->needs_input = 0; - } - else { - self->needs_input = 1; - } + if (Py_SIZE(ret) == max_length || self->eof) { + self->needs_input = 0; } - else if (type == TYPE_ENDLESS_DECOMPRESSOR) { - if (Py_SIZE(ret) == max_length && !self->at_frame_edge) { - self->needs_input = 0; - } - else { - self->needs_input = 1; - } + else { + self->needs_input = 1; } if (use_input_buffer) { @@ -598,10 +497,6 @@ stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length self->needs_input = 0; - if (type == TYPE_ENDLESS_DECOMPRESSOR) { - self->at_frame_edge = 0; - } - if (!use_input_buffer) { /* Discard buffer if it's too small (resizing it may needlessly copy the current contents) */ @@ -638,7 +533,7 @@ stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length error: /* Reset decompressor's states/session */ - decompressor_reset_session(self, type); + decompressor_reset_session(self); Py_CLEAR(ret); success: @@ -668,9 +563,6 @@ _zstd_ZstdDecompressor_new(PyTypeObject *type, PyObject *args, PyObject *kwds) /* needs_input flag */ self->needs_input = 1; - /* at_frame_edge flag */ - self->at_frame_edge = 1; - /* Decompression context */ self->dctx = ZSTD_createDCtx(); if (self->dctx == NULL) { @@ -837,7 +729,7 @@ _zstd_ZstdDecompressor_decompress_impl(ZstdDecompressor *self, /* Thread-safe code */ Py_BEGIN_CRITICAL_SECTION(self); - ret = stream_decompress(self, data, max_length, TYPE_DECOMPRESSOR); + ret = stream_decompress(self, data, max_length); Py_END_CRITICAL_SECTION(); return ret; } From 9e3ae05d9d2cffd5e07835aae84b94ae461a47bf Mon Sep 17 00:00:00 2001 From: Rogdham Date: Sun, 11 May 2025 00:17:20 +0200 Subject: [PATCH 2/4] First review --- Modules/_zstd/decompressor.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/Modules/_zstd/decompressor.c b/Modules/_zstd/decompressor.c index 9d5f01e8c51a03..9de353448ddf65 100644 --- a/Modules/_zstd/decompressor.c +++ b/Modules/_zstd/decompressor.c @@ -43,11 +43,11 @@ typedef struct { PyObject *unused_data; /* 0 if decompressor has (or may has) unconsumed input data, 0 or 1. */ - char needs_input; + bool needs_input; /* For ZstdDecompressor, 0 or 1. 1 means the end of the first frame has been reached. */ - char eof; + bool eof; /* __init__ has been called, 0 or 1. */ bool initialized; @@ -368,7 +368,7 @@ decompressor_reset_session(ZstdDecompressor *self) self->needs_input = 1; self->eof = 0; - /* Resetting session never fail */ + /* Resetting session is guaranteed to never fail */ ZSTD_DCtx_reset(self->dctx, ZSTD_reset_session_only); } @@ -384,7 +384,7 @@ stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length if (self->eof) { PyErr_SetString(PyExc_EOFError, "Already at the end of a zstd frame."); assert(ret == NULL); - goto success; + return ret; } /* Prepare input buffer w/wo unconsumed data */ @@ -471,8 +471,7 @@ stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length assert(in.pos == 0); /* Decompress */ - ret = decompress_impl(self, &in, - max_length, initial_buffer_size); + ret = decompress_impl(self, &in, max_length, initial_buffer_size); if (ret == NULL) { goto error; } @@ -529,16 +528,14 @@ stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length } } - goto success; + return ret; error: /* Reset decompressor's states/session */ decompressor_reset_session(self); Py_CLEAR(ret); -success: - - return ret; + return NULL; } From 0ecb1ff4b50106fe9bd22061bd68d5a3e1a3fc22 Mon Sep 17 00:00:00 2001 From: Rogdham Date: Sun, 11 May 2025 00:18:33 +0200 Subject: [PATCH 3/4] Remove decompress_impl param max_length --- Modules/_zstd/decompressor.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/Modules/_zstd/decompressor.c b/Modules/_zstd/decompressor.c index 9de353448ddf65..e8c236a08767dd 100644 --- a/Modules/_zstd/decompressor.c +++ b/Modules/_zstd/decompressor.c @@ -275,8 +275,7 @@ _zstd_load_d_dict(ZstdDecompressor *self, PyObject *dict) */ static PyObject * decompress_impl(ZstdDecompressor *self, ZSTD_inBuffer *in, - Py_ssize_t max_length, - Py_ssize_t initial_size) + Py_ssize_t max_length) { size_t zstd_ret; ZSTD_outBuffer out; @@ -284,15 +283,8 @@ decompress_impl(ZstdDecompressor *self, ZSTD_inBuffer *in, PyObject *ret; /* Initialize the output buffer */ - if (initial_size >= 0) { - if (_OutputBuffer_InitWithSize(&buffer, &out, max_length, initial_size) < 0) { - goto error; - } - } - else { - if (_OutputBuffer_InitAndGrow(&buffer, &out, max_length) < 0) { - goto error; - } + if (_OutputBuffer_InitAndGrow(&buffer, &out, max_length) < 0) { + goto error; } assert(out.pos == 0); @@ -375,7 +367,6 @@ decompressor_reset_session(ZstdDecompressor *self) static PyObject * stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length) { - Py_ssize_t initial_buffer_size = -1; ZSTD_inBuffer in; PyObject *ret = NULL; int use_input_buffer; @@ -471,7 +462,7 @@ stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length assert(in.pos == 0); /* Decompress */ - ret = decompress_impl(self, &in, max_length, initial_buffer_size); + ret = decompress_impl(self, &in, max_length); if (ret == NULL) { goto error; } From d676df192c711a6309a815104c255b55df1b0e6f Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Date: Sun, 11 May 2025 02:37:29 +0100 Subject: [PATCH 4/4] return NULL --- Modules/_zstd/decompressor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_zstd/decompressor.c b/Modules/_zstd/decompressor.c index e8c236a08767dd..14bf6ccbc49fe6 100644 --- a/Modules/_zstd/decompressor.c +++ b/Modules/_zstd/decompressor.c @@ -375,7 +375,7 @@ stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length if (self->eof) { PyErr_SetString(PyExc_EOFError, "Already at the end of a zstd frame."); assert(ret == NULL); - return ret; + return NULL; } /* Prepare input buffer w/wo unconsumed data */ pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy