From 060352856ce32910dbe7bc4f1b4a4cdeb7b856c1 Mon Sep 17 00:00:00 2001 From: rmorotti Date: Tue, 18 Mar 2025 14:19:26 +0000 Subject: [PATCH] gh-91349: Expose the crc32 function from the lzma library --- Doc/library/lzma.rst | 17 ++++++ Lib/test/test_lzma.py | 41 +++++++++++++- ...5-03-18-14-18-06.gh-issue-91349.Qrnmxt.rst | 1 + Modules/_lzmamodule.c | 30 ++++++++++ Modules/clinic/_lzmamodule.c.h | 56 ++++++++++++++++++- 5 files changed, 143 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-03-18-14-18-06.gh-issue-91349.Qrnmxt.rst diff --git a/Doc/library/lzma.rst b/Doc/library/lzma.rst index 69f7cb8d48d7ae..e91034d23627dd 100644 --- a/Doc/library/lzma.rst +++ b/Doc/library/lzma.rst @@ -311,6 +311,23 @@ Compressing and decompressing data in memory *preset* and *filters* arguments. +.. function:: crc32(data, value=0) + + .. index:: + single: Cyclic Redundancy Check + single: checksum; Cyclic Redundancy Check + + Computes a CRC (Cyclic Redundancy Check) checksum of *data*. The + result is a positive integer, less than :math:`2^32`. If *value* is present, it is used + as the starting value of the checksum; otherwise, a default value of 0 + is used. Passing in *value* allows computing a running checksum over the + concatenation of several inputs. The algorithm is not cryptographically + strong, and should not be used for authentication or digital signatures. Since + the algorithm is designed for use as a checksum algorithm, it is not suitable + for use as a general hash algorithm. + + .. versionadded:: next + .. function:: decompress(data, format=FORMAT_AUTO, memlimit=None, filters=None) Decompress *data* (a :class:`bytes` object), returning the uncompressed data diff --git a/Lib/test/test_lzma.py b/Lib/test/test_lzma.py index e93c3c37354e27..3a02362258965f 100644 --- a/Lib/test/test_lzma.py +++ b/Lib/test/test_lzma.py @@ -1,4 +1,5 @@ import array +import binascii from io import BytesIO, UnsupportedOperation, DEFAULT_BUFFER_SIZE import os import pickle @@ -8,7 +9,7 @@ import unittest from compression._common import _streams -from test.support import _4G, bigmemtest +from test.support import _1G, _4G, bigmemtest from test.support.import_helper import import_module from test.support.os_helper import ( TESTFN, unlink, FakePath @@ -17,6 +18,44 @@ lzma = import_module("lzma") from lzma import LZMACompressor, LZMADecompressor, LZMAError, LZMAFile +class ChecksumTestCase(unittest.TestCase): + # checksum test cases + def test_crc32start(self): + self.assertEqual(lzma.crc32(b""), lzma.crc32(b"", 0)) + self.assertTrue(lzma.crc32(b"abc", 0xffffffff)) + + def test_crc32empty(self): + self.assertEqual(lzma.crc32(b"", 0), 0) + self.assertEqual(lzma.crc32(b"", 1), 1) + self.assertEqual(lzma.crc32(b"", 432), 432) + + def test_penguins(self): + self.assertEqual(lzma.crc32(b"penguin", 0), 0x0e5c1a120) + self.assertEqual(lzma.crc32(b"penguin", 1), 0x43b6aa94) + self.assertEqual(lzma.crc32(b"penguin"), lzma.crc32(b"penguin", 0)) + + def test_crc32_unsigned(self): + foo = b'abcdefghijklmnop' + # explicitly test signed behavior + self.assertEqual(lzma.crc32(foo), 2486878355) + self.assertEqual(lzma.crc32(b'spam'), 1138425661) + + def test_same_as_binascii_crc32(self): + foo = b'abcdefghijklmnop' + crc = 2486878355 + self.assertEqual(binascii.crc32(foo), crc) + self.assertEqual(lzma.crc32(foo), crc) + self.assertEqual(binascii.crc32(b'spam'), lzma.crc32(b'spam')) + + +# GH-54485 - check that inputs >=4 GiB are handled correctly. +class ChecksumBigBufferTestCase(unittest.TestCase): + + @bigmemtest(size=_4G + 4, memuse=1, dry_run=False) + def test_big_buffer(self, size): + data = b"nyan" * (_1G + 1) + self.assertEqual(lzma.crc32(data), 1044521549) + class CompressorDecompressorTestCase(unittest.TestCase): diff --git a/Misc/NEWS.d/next/Library/2025-03-18-14-18-06.gh-issue-91349.Qrnmxt.rst b/Misc/NEWS.d/next/Library/2025-03-18-14-18-06.gh-issue-91349.Qrnmxt.rst new file mode 100644 index 00000000000000..855da9d2673ca7 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-03-18-14-18-06.gh-issue-91349.Qrnmxt.rst @@ -0,0 +1 @@ +Expose the crc32 function from the lzma library as :func:`lzma.crc32`. diff --git a/Modules/_lzmamodule.c b/Modules/_lzmamodule.c index 462c2181fa6036..8b14472f3c9611 100644 --- a/Modules/_lzmamodule.c +++ b/Modules/_lzmamodule.c @@ -1602,10 +1602,40 @@ lzma_exec(PyObject *module) return 0; } +/*[clinic input] +_lzma.crc32 -> unsigned_int + + data: Py_buffer + value: unsigned_int(bitwise=True) = 0 + Starting value of the checksum. + / + +Compute a CRC-32 checksum of data. + +The returned checksum is an integer. +[clinic start generated code]*/ + +static unsigned int +_lzma_crc32_impl(PyObject *module, Py_buffer *data, unsigned int value) +/*[clinic end generated code: output=fca7916d796faf8b input=bb623a169c14534f]*/ +{ + /* Releasing the GIL for very small buffers is inefficient + and may lower performance */ + if (data->len > 1024*5) { + Py_BEGIN_ALLOW_THREADS + value = lzma_crc32(data->buf, (size_t)data->len, (uint32_t)value); + Py_END_ALLOW_THREADS + } else { + value = lzma_crc32(data->buf, (size_t)data->len, (uint32_t)value); + } + return value; +} + static PyMethodDef lzma_methods[] = { _LZMA_IS_CHECK_SUPPORTED_METHODDEF _LZMA__ENCODE_FILTER_PROPERTIES_METHODDEF _LZMA__DECODE_FILTER_PROPERTIES_METHODDEF + _LZMA_CRC32_METHODDEF {NULL} }; diff --git a/Modules/clinic/_lzmamodule.c.h b/Modules/clinic/_lzmamodule.c.h index ebdc81a0dac2f0..abad1082690785 100644 --- a/Modules/clinic/_lzmamodule.c.h +++ b/Modules/clinic/_lzmamodule.c.h @@ -333,4 +333,58 @@ _lzma__decode_filter_properties(PyObject *module, PyObject *const *args, Py_ssiz return return_value; } -/*[clinic end generated code: output=6386084cb43d2533 input=a9049054013a1b77]*/ + +PyDoc_STRVAR(_lzma_crc32__doc__, +"crc32($module, data, value=0, /)\n" +"--\n" +"\n" +"Compute a CRC-32 checksum of data.\n" +"\n" +" value\n" +" Starting value of the checksum.\n" +"\n" +"The returned checksum is an integer."); + +#define _LZMA_CRC32_METHODDEF \ + {"crc32", _PyCFunction_CAST(_lzma_crc32), METH_FASTCALL, _lzma_crc32__doc__}, + +static unsigned int +_lzma_crc32_impl(PyObject *module, Py_buffer *data, unsigned int value); + +static PyObject * +_lzma_crc32(PyObject *module, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + Py_buffer data = {NULL, NULL}; + unsigned int value = 0; + unsigned int _return_value; + + if (!_PyArg_CheckPositional("crc32", nargs, 1, 2)) { + goto exit; + } + if (PyObject_GetBuffer(args[0], &data, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (nargs < 2) { + goto skip_optional; + } + value = (unsigned int)PyLong_AsUnsignedLongMask(args[1]); + if (value == (unsigned int)-1 && PyErr_Occurred()) { + goto exit; + } +skip_optional: + _return_value = _lzma_crc32_impl(module, &data, value); + if ((_return_value == (unsigned int)-1) && PyErr_Occurred()) { + goto exit; + } + return_value = PyLong_FromUnsignedLong((unsigned long)_return_value); + +exit: + /* Cleanup for data */ + if (data.obj) { + PyBuffer_Release(&data); + } + + return return_value; +} +/*[clinic end generated code: output=b6591cb074aa87b6 input=a9049054013a1b77]*/ pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy