Skip to content

BUG: add bounds-checking to in-place string multiply #29074

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
BUG: add bounds-checking to in-place string multiply (#29060)
* BUG: add bounds-checking to in-place string multiply

* MNT: check for overflow and raise OverflowError

* MNT: respond to review suggestion

* MNT: handle overflow in one more spot

* MNT: make test behave the same on all architectures

* MNT: reorder to avoid work in some cases
  • Loading branch information
ngoldbaum authored and charris committed May 27, 2025
commit ef57a14428ae4fe4693479701e09c2bb11882bfc
3 changes: 3 additions & 0 deletions doc/release/upcoming_changes/29060.change.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
* Multiplication between a string and integer now raises OverflowError instead
of MemoryError if the result of the multiplication would create a string that
is too large to be represented. This follows Python's behavior.
12 changes: 12 additions & 0 deletions numpy/_core/src/umath/string_buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,18 @@ struct Buffer {
return num_codepoints;
}

inline size_t
buffer_width()
{
switch (enc) {
case ENCODING::ASCII:
case ENCODING::UTF8:
return after - buf;
case ENCODING::UTF32:
return (after - buf) / sizeof(npy_ucs4);
}
}

inline Buffer<enc>&
operator+=(npy_int64 rhs)
{
Expand Down
48 changes: 36 additions & 12 deletions numpy/_core/src/umath/string_ufuncs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "dtypemeta.h"
#include "convert_datatype.h"
#include "gil_utils.h"
#include "templ_common.h" /* for npy_mul_size_with_overflow_size_t */

#include "string_ufuncs.h"
#include "string_fastsearch.h"
Expand Down Expand Up @@ -166,26 +167,44 @@ string_add(Buffer<enc> buf1, Buffer<enc> buf2, Buffer<enc> out)


template <ENCODING enc>
static inline void
static inline int
string_multiply(Buffer<enc> buf1, npy_int64 reps, Buffer<enc> out)
{
size_t len1 = buf1.num_codepoints();
if (reps < 1 || len1 == 0) {
out.buffer_fill_with_zeros_after_index(0);
return;
return 0;
}

if (len1 == 1) {
out.buffer_memset(*buf1, reps);
out.buffer_fill_with_zeros_after_index(reps);
return 0;
}
else {
for (npy_int64 i = 0; i < reps; i++) {
buf1.buffer_memcpy(out, len1);
out += len1;
}
out.buffer_fill_with_zeros_after_index(0);

size_t newlen;
if (NPY_UNLIKELY(npy_mul_with_overflow_size_t(&newlen, reps, len1) != 0) || newlen > PY_SSIZE_T_MAX) {
return -1;
}

size_t pad = 0;
size_t width = out.buffer_width();
if (width < newlen) {
reps = width / len1;
pad = width % len1;
}

for (npy_int64 i = 0; i < reps; i++) {
buf1.buffer_memcpy(out, len1);
out += len1;
}

buf1.buffer_memcpy(out, pad);
out += pad;

out.buffer_fill_with_zeros_after_index(0);

return 0;
}


Expand Down Expand Up @@ -238,7 +257,9 @@ string_multiply_strint_loop(PyArrayMethod_Context *context,
while (N--) {
Buffer<enc> buf(in1, elsize);
Buffer<enc> outbuf(out, outsize);
string_multiply<enc>(buf, *(npy_int64 *)in2, outbuf);
if (NPY_UNLIKELY(string_multiply<enc>(buf, *(npy_int64 *)in2, outbuf) < 0)) {
npy_gil_error(PyExc_OverflowError, "Overflow detected in string multiply");
}

in1 += strides[0];
in2 += strides[1];
Expand Down Expand Up @@ -267,7 +288,9 @@ string_multiply_intstr_loop(PyArrayMethod_Context *context,
while (N--) {
Buffer<enc> buf(in2, elsize);
Buffer<enc> outbuf(out, outsize);
string_multiply<enc>(buf, *(npy_int64 *)in1, outbuf);
if (NPY_UNLIKELY(string_multiply<enc>(buf, *(npy_int64 *)in1, outbuf) < 0)) {
npy_gil_error(PyExc_OverflowError, "Overflow detected in string multiply");
}

in1 += strides[0];
in2 += strides[1];
Expand Down Expand Up @@ -752,10 +775,11 @@ string_multiply_resolve_descriptors(
if (given_descrs[2] == NULL) {
PyErr_SetString(
PyExc_TypeError,
"The 'out' kwarg is necessary. Use numpy.strings.multiply without it.");
"The 'out' kwarg is necessary when using the string multiply ufunc "
"directly. Use numpy.strings.multiply to multiply strings without "
"specifying 'out'.");
return _NPY_ERROR_OCCURRED_IN_CAST;
}

loop_descrs[0] = NPY_DT_CALL_ensure_canonical(given_descrs[0]);
if (loop_descrs[0] == NULL) {
return _NPY_ERROR_OCCURRED_IN_CAST;
Expand Down
12 changes: 6 additions & 6 deletions numpy/_core/src/umath/stringdtype_ufuncs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,9 +137,9 @@ static int multiply_loop_core(
size_t newsize;
int overflowed = npy_mul_with_overflow_size_t(
&newsize, cursize, factor);
if (overflowed) {
npy_gil_error(PyExc_MemoryError,
"Failed to allocate string in string multiply");
if (overflowed || newsize > PY_SSIZE_T_MAX) {
npy_gil_error(PyExc_OverflowError,
"Overflow encountered in string multiply");
goto fail;
}

Expand Down Expand Up @@ -1748,9 +1748,9 @@ center_ljust_rjust_strided_loop(PyArrayMethod_Context *context,
width - num_codepoints);
newsize += s1.size;

if (overflowed) {
npy_gil_error(PyExc_MemoryError,
"Failed to allocate string in %s", ufunc_name);
if (overflowed || newsize > PY_SSIZE_T_MAX) {
npy_gil_error(PyExc_OverflowError,
"Overflow encountered in %s", ufunc_name);
goto fail;
}

Expand Down
2 changes: 1 addition & 1 deletion numpy/_core/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ def multiply(a, i):

# Ensure we can do a_len * i without overflow.
if np.any(a_len > sys.maxsize / np.maximum(i, 1)):
raise MemoryError("repeated string is too long")
raise OverflowError("Overflow encountered in string multiply")

buffersizes = a_len * i
out_dtype = f"{a.dtype.char}{buffersizes.max()}"
Expand Down
4 changes: 2 additions & 2 deletions numpy/_core/tests/test_stringdtype.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,8 @@ def test_null_roundtripping():

def test_string_too_large_error():
arr = np.array(["a", "b", "c"], dtype=StringDType())
with pytest.raises(MemoryError):
arr * (2**63 - 2)
with pytest.raises(OverflowError):
arr * (sys.maxsize + 1)


@pytest.mark.parametrize(
Expand Down
13 changes: 12 additions & 1 deletion numpy/_core/tests/test_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,9 +224,20 @@ def test_multiply_raises(self, dt):
with pytest.raises(TypeError, match="unsupported type"):
np.strings.multiply(np.array("abc", dtype=dt), 3.14)

with pytest.raises(MemoryError):
with pytest.raises(OverflowError):
np.strings.multiply(np.array("abc", dtype=dt), sys.maxsize)

def test_inplace_multiply(self, dt):
arr = np.array(['foo ', 'bar'], dtype=dt)
arr *= 2
if dt != "T":
assert_array_equal(arr, np.array(['foo ', 'barb'], dtype=dt))
else:
assert_array_equal(arr, ['foo foo ', 'barbar'])

with pytest.raises(OverflowError):
arr *= sys.maxsize

@pytest.mark.parametrize("i_dt", [np.int8, np.int16, np.int32,
np.int64, np.int_])
def test_multiply_integer_dtypes(self, i_dt, dt):
Expand Down
3 changes: 2 additions & 1 deletion numpy/typing/tests/data/pass/ma.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
MAR_M_dt64: MaskedArray[np.datetime64] = np.ma.MaskedArray([np.datetime64(1, "D")])
MAR_S: MaskedArray[np.bytes_] = np.ma.MaskedArray([b'foo'], dtype=np.bytes_)
MAR_U: MaskedArray[np.str_] = np.ma.MaskedArray(['foo'], dtype=np.str_)
MAR_T = cast(np.ma.MaskedArray[Any, np.dtypes.StringDType], np.ma.MaskedArray(["a"], "T"))
MAR_T = cast(np.ma.MaskedArray[Any, np.dtypes.StringDType],
np.ma.MaskedArray(["a"], dtype="T"))

AR_b: npt.NDArray[np.bool] = np.array([True, False, True])

Expand Down
Loading
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy