From 51ba378108a87d17a62745b0107b3bba11a41305 Mon Sep 17 00:00:00 2001 From: Mark Dickinson Date: Sun, 4 Sep 2022 17:21:18 +0100 Subject: [PATCH] gh-95778: Correctly pre-check for int-to-str conversion (GH-96537) Converting a large enough `int` to a decimal string raises `ValueError` as expected. However, the raise comes _after_ the quadratic-time base-conversion algorithm has run to completion. For effective DOS prevention, we need some kind of check before entering the quadratic-time loop. Oops! =) The quick fix: essentially we catch _most_ values that exceed the threshold up front. Those that slip through will still be on the small side (read: sufficiently fast), and will get caught by the existing check so that the limit remains exact. The justification for the current check. The C code check is: ```c max_str_digits / (3 * PyLong_SHIFT) <= (size_a - 11) / 10 ``` In GitHub markdown math-speak, writing $M$ for `max_str_digits`, $L$ for `PyLong_SHIFT` and $s$ for `size_a`, that check is: $$\left\lfloor\frac{M}{3L}\right\rfloor \le \left\lfloor\frac{s - 11}{10}\right\rfloor$$ From this it follows that $$\frac{M}{3L} < \frac{s-1}{10}$$ hence that $$\frac{L(s-1)}{M} > \frac{10}{3} > \log_2(10).$$ So $$2^{L(s-1)} > 10^M.$$ But our input integer $a$ satisfies $|a| \ge 2^{L(s-1)}$, so $|a|$ is larger than $10^M$. This shows that we don't accidentally capture anything _below_ the intended limit in the check. * Issue: gh-95778 Co-authored-by: Gregory P. Smith [Google LLC] (cherry picked from commit b126196838bbaf5f4d35120e0e6bcde435b0b480) Co-authored-by: Mark Dickinson --- Include/internal/pycore_long.h | 4 +- Lib/test/test_int.py | 82 +++++++++++++++++++ ...08-07-16-53-38.gh-issue-95778.ch010gps.rst | 2 +- Objects/longobject.c | 26 +++++- 4 files changed, 107 insertions(+), 7 deletions(-) diff --git a/Include/internal/pycore_long.h b/Include/internal/pycore_long.h index ef57a60bd5302b..0f466eb60feadb 100644 --- a/Include/internal/pycore_long.h +++ b/Include/internal/pycore_long.h @@ -18,9 +18,9 @@ extern "C" { * everyone's existing deployed numpy test suite passes before * https://github.com/numpy/numpy/issues/22098 is widely available. * - * $ python -m timeit -s 's = * "1"*4300' 'int(s)' + * $ python -m timeit -s 's = "1"*4300' 'int(s)' * 2000 loops, best of 5: 125 usec per loop - * $ python -m timeit -s 's = * "1"*4300; v = int(s)' 'str(v)' + * $ python -m timeit -s 's = "1"*4300; v = int(s)' 'str(v)' * 1000 loops, best of 5: 311 usec per loop * (zen2 cloud VM) * diff --git a/Lib/test/test_int.py b/Lib/test/test_int.py index e9561b02fcac7b..800c0b006cdc6b 100644 --- a/Lib/test/test_int.py +++ b/Lib/test/test_int.py @@ -1,4 +1,5 @@ import sys +import time import unittest from test import support @@ -632,6 +633,87 @@ def test_max_str_digits(self): with self.assertRaises(ValueError): str(i) + def test_denial_of_service_prevented_int_to_str(self): + """Regression test: ensure we fail before performing O(N**2) work.""" + maxdigits = sys.get_int_max_str_digits() + assert maxdigits < 50_000, maxdigits # A test prerequisite. + get_time = time.process_time + if get_time() <= 0: # some platforms like WASM lack process_time() + get_time = time.monotonic + + huge_int = int(f'0x{"c"*65_000}', base=16) # 78268 decimal digits. + digits = 78_268 + with support.adjust_int_max_str_digits(digits): + start = get_time() + huge_decimal = str(huge_int) + seconds_to_convert = get_time() - start + self.assertEqual(len(huge_decimal), digits) + # Ensuring that we chose a slow enough conversion to measure. + # It takes 0.1 seconds on a Zen based cloud VM in an opt build. + if seconds_to_convert < 0.005: + raise unittest.SkipTest('"slow" conversion took only ' + f'{seconds_to_convert} seconds.') + + # We test with the limit almost at the size needed to check performance. + # The performant limit check is slightly fuzzy, give it a some room. + with support.adjust_int_max_str_digits(int(.995 * digits)): + with self.assertRaises(ValueError) as err: + start = get_time() + str(huge_int) + seconds_to_fail_huge = get_time() - start + self.assertIn('conversion', str(err.exception)) + self.assertLess(seconds_to_fail_huge, seconds_to_convert/8) + + # Now we test that a conversion that would take 30x as long also fails + # in a similarly fast fashion. + extra_huge_int = int(f'0x{"c"*500_000}', base=16) # 602060 digits. + with self.assertRaises(ValueError) as err: + start = get_time() + # If not limited, 8 seconds said Zen based cloud VM. + str(extra_huge_int) + seconds_to_fail_extra_huge = get_time() - start + self.assertIn('conversion', str(err.exception)) + self.assertLess(seconds_to_fail_extra_huge, seconds_to_convert/8) + + def test_denial_of_service_prevented_str_to_int(self): + """Regression test: ensure we fail before performing O(N**2) work.""" + maxdigits = sys.get_int_max_str_digits() + assert maxdigits < 100_000, maxdigits # A test prerequisite. + get_time = time.process_time + if get_time() <= 0: # some platforms like WASM lack process_time() + get_time = time.monotonic + + digits = 133700 + huge = '8'*digits + with support.adjust_int_max_str_digits(digits): + start = get_time() + int(huge) + seconds_to_convert = get_time() - start + # Ensuring that we chose a slow enough conversion to measure. + # It takes 0.1 seconds on a Zen based cloud VM in an opt build. + if seconds_to_convert < 0.005: + raise unittest.SkipTest('"slow" conversion took only ' + f'{seconds_to_convert} seconds.') + + with support.adjust_int_max_str_digits(digits - 1): + with self.assertRaises(ValueError) as err: + start = get_time() + int(huge) + seconds_to_fail_huge = get_time() - start + self.assertIn('conversion', str(err.exception)) + self.assertLess(seconds_to_fail_huge, seconds_to_convert/8) + + # Now we test that a conversion that would take 30x as long also fails + # in a similarly fast fashion. + extra_huge = '7'*1_200_000 + with self.assertRaises(ValueError) as err: + start = get_time() + # If not limited, 8 seconds in the Zen based cloud VM. + int(extra_huge) + seconds_to_fail_extra_huge = get_time() - start + self.assertIn('conversion', str(err.exception)) + self.assertLess(seconds_to_fail_extra_huge, seconds_to_convert/8) + def test_power_of_two_bases_unlimited(self): """The limit does not apply to power of 2 bases.""" maxdigits = sys.get_int_max_str_digits() diff --git a/Misc/NEWS.d/next/Security/2022-08-07-16-53-38.gh-issue-95778.ch010gps.rst b/Misc/NEWS.d/next/Security/2022-08-07-16-53-38.gh-issue-95778.ch010gps.rst index ea3b85d632e083..8eb8a34884dced 100644 --- a/Misc/NEWS.d/next/Security/2022-08-07-16-53-38.gh-issue-95778.ch010gps.rst +++ b/Misc/NEWS.d/next/Security/2022-08-07-16-53-38.gh-issue-95778.ch010gps.rst @@ -11,4 +11,4 @@ limitation ` documentation. The default limit is 4300 digits in string form. Patch by Gregory P. Smith [Google] and Christian Heimes [Red Hat] with feedback -from Victor Stinner, Thomas Wouters, Steve Dower, and Ned Deily. +from Victor Stinner, Thomas Wouters, Steve Dower, Ned Deily, and Mark Dickinson. diff --git a/Objects/longobject.c b/Objects/longobject.c index 0701c494133711..17274d01cec4be 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -36,7 +36,8 @@ medium_value(PyLongObject *x) #define IS_SMALL_INT(ival) (-_PY_NSMALLNEGINTS <= (ival) && (ival) < _PY_NSMALLPOSINTS) #define IS_SMALL_UINT(ival) ((ival) < _PY_NSMALLPOSINTS) -#define _MAX_STR_DIGITS_ERROR_FMT "Exceeds the limit (%d) for integer string conversion: value has %zd digits" +#define _MAX_STR_DIGITS_ERROR_FMT_TO_INT "Exceeds the limit (%d) for integer string conversion: value has %zd digits" +#define _MAX_STR_DIGITS_ERROR_FMT_TO_STR "Exceeds the limit (%d) for integer string conversion" static inline void _Py_DECREF_INT(PyLongObject *op) @@ -1726,6 +1727,23 @@ long_to_decimal_string_internal(PyObject *aa, size_a = Py_ABS(Py_SIZE(a)); negative = Py_SIZE(a) < 0; + /* quick and dirty pre-check for overflowing the decimal digit limit, + based on the inequality 10/3 >= log2(10) + + explanation in https://github.com/python/cpython/pull/96537 + */ + if (size_a >= 10 * _PY_LONG_MAX_STR_DIGITS_THRESHOLD + / (3 * PyLong_SHIFT) + 2) { + PyInterpreterState *interp = _PyInterpreterState_GET(); + int max_str_digits = interp->int_max_str_digits; + if ((max_str_digits > 0) && + (max_str_digits / (3 * PyLong_SHIFT) <= (size_a - 11) / 10)) { + PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT_TO_STR, + max_str_digits); + return -1; + } + } + /* quick and dirty upper bound for the number of digits required to express a in base _PyLong_DECIMAL_BASE: @@ -1791,8 +1809,8 @@ long_to_decimal_string_internal(PyObject *aa, Py_ssize_t strlen_nosign = strlen - negative; if ((max_str_digits > 0) && (strlen_nosign > max_str_digits)) { Py_DECREF(scratch); - PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT, - max_str_digits, strlen_nosign); + PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT_TO_STR, + max_str_digits); return -1; } } @@ -2466,7 +2484,7 @@ digit beyond the first. PyInterpreterState *interp = _PyInterpreterState_GET(); int max_str_digits = interp->int_max_str_digits; if ((max_str_digits > 0) && (digits > max_str_digits)) { - PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT, + PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT_TO_INT, max_str_digits, digits); return NULL; } pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy