diff --git a/Include/internal/pycore_long.h b/Include/internal/pycore_long.h index ef57a60bd5302b..0f466eb60feadb 100644 --- a/Include/internal/pycore_long.h +++ b/Include/internal/pycore_long.h @@ -18,9 +18,9 @@ extern "C" { * everyone's existing deployed numpy test suite passes before * https://github.com/numpy/numpy/issues/22098 is widely available. * - * $ python -m timeit -s 's = * "1"*4300' 'int(s)' + * $ python -m timeit -s 's = "1"*4300' 'int(s)' * 2000 loops, best of 5: 125 usec per loop - * $ python -m timeit -s 's = * "1"*4300; v = int(s)' 'str(v)' + * $ python -m timeit -s 's = "1"*4300; v = int(s)' 'str(v)' * 1000 loops, best of 5: 311 usec per loop * (zen2 cloud VM) * diff --git a/Lib/test/test_int.py b/Lib/test/test_int.py index e9561b02fcac7b..800c0b006cdc6b 100644 --- a/Lib/test/test_int.py +++ b/Lib/test/test_int.py @@ -1,4 +1,5 @@ import sys +import time import unittest from test import support @@ -632,6 +633,87 @@ def test_max_str_digits(self): with self.assertRaises(ValueError): str(i) + def test_denial_of_service_prevented_int_to_str(self): + """Regression test: ensure we fail before performing O(N**2) work.""" + maxdigits = sys.get_int_max_str_digits() + assert maxdigits < 50_000, maxdigits # A test prerequisite. + get_time = time.process_time + if get_time() <= 0: # some platforms like WASM lack process_time() + get_time = time.monotonic + + huge_int = int(f'0x{"c"*65_000}', base=16) # 78268 decimal digits. + digits = 78_268 + with support.adjust_int_max_str_digits(digits): + start = get_time() + huge_decimal = str(huge_int) + seconds_to_convert = get_time() - start + self.assertEqual(len(huge_decimal), digits) + # Ensuring that we chose a slow enough conversion to measure. + # It takes 0.1 seconds on a Zen based cloud VM in an opt build. + if seconds_to_convert < 0.005: + raise unittest.SkipTest('"slow" conversion took only ' + f'{seconds_to_convert} seconds.') + + # We test with the limit almost at the size needed to check performance. + # The performant limit check is slightly fuzzy, give it a some room. + with support.adjust_int_max_str_digits(int(.995 * digits)): + with self.assertRaises(ValueError) as err: + start = get_time() + str(huge_int) + seconds_to_fail_huge = get_time() - start + self.assertIn('conversion', str(err.exception)) + self.assertLess(seconds_to_fail_huge, seconds_to_convert/8) + + # Now we test that a conversion that would take 30x as long also fails + # in a similarly fast fashion. + extra_huge_int = int(f'0x{"c"*500_000}', base=16) # 602060 digits. + with self.assertRaises(ValueError) as err: + start = get_time() + # If not limited, 8 seconds said Zen based cloud VM. + str(extra_huge_int) + seconds_to_fail_extra_huge = get_time() - start + self.assertIn('conversion', str(err.exception)) + self.assertLess(seconds_to_fail_extra_huge, seconds_to_convert/8) + + def test_denial_of_service_prevented_str_to_int(self): + """Regression test: ensure we fail before performing O(N**2) work.""" + maxdigits = sys.get_int_max_str_digits() + assert maxdigits < 100_000, maxdigits # A test prerequisite. + get_time = time.process_time + if get_time() <= 0: # some platforms like WASM lack process_time() + get_time = time.monotonic + + digits = 133700 + huge = '8'*digits + with support.adjust_int_max_str_digits(digits): + start = get_time() + int(huge) + seconds_to_convert = get_time() - start + # Ensuring that we chose a slow enough conversion to measure. + # It takes 0.1 seconds on a Zen based cloud VM in an opt build. + if seconds_to_convert < 0.005: + raise unittest.SkipTest('"slow" conversion took only ' + f'{seconds_to_convert} seconds.') + + with support.adjust_int_max_str_digits(digits - 1): + with self.assertRaises(ValueError) as err: + start = get_time() + int(huge) + seconds_to_fail_huge = get_time() - start + self.assertIn('conversion', str(err.exception)) + self.assertLess(seconds_to_fail_huge, seconds_to_convert/8) + + # Now we test that a conversion that would take 30x as long also fails + # in a similarly fast fashion. + extra_huge = '7'*1_200_000 + with self.assertRaises(ValueError) as err: + start = get_time() + # If not limited, 8 seconds in the Zen based cloud VM. + int(extra_huge) + seconds_to_fail_extra_huge = get_time() - start + self.assertIn('conversion', str(err.exception)) + self.assertLess(seconds_to_fail_extra_huge, seconds_to_convert/8) + def test_power_of_two_bases_unlimited(self): """The limit does not apply to power of 2 bases.""" maxdigits = sys.get_int_max_str_digits() diff --git a/Misc/NEWS.d/next/Security/2022-08-07-16-53-38.gh-issue-95778.ch010gps.rst b/Misc/NEWS.d/next/Security/2022-08-07-16-53-38.gh-issue-95778.ch010gps.rst index ea3b85d632e083..8eb8a34884dced 100644 --- a/Misc/NEWS.d/next/Security/2022-08-07-16-53-38.gh-issue-95778.ch010gps.rst +++ b/Misc/NEWS.d/next/Security/2022-08-07-16-53-38.gh-issue-95778.ch010gps.rst @@ -11,4 +11,4 @@ limitation ` documentation. The default limit is 4300 digits in string form. Patch by Gregory P. Smith [Google] and Christian Heimes [Red Hat] with feedback -from Victor Stinner, Thomas Wouters, Steve Dower, and Ned Deily. +from Victor Stinner, Thomas Wouters, Steve Dower, Ned Deily, and Mark Dickinson. diff --git a/Objects/longobject.c b/Objects/longobject.c index 0701c494133711..17274d01cec4be 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -36,7 +36,8 @@ medium_value(PyLongObject *x) #define IS_SMALL_INT(ival) (-_PY_NSMALLNEGINTS <= (ival) && (ival) < _PY_NSMALLPOSINTS) #define IS_SMALL_UINT(ival) ((ival) < _PY_NSMALLPOSINTS) -#define _MAX_STR_DIGITS_ERROR_FMT "Exceeds the limit (%d) for integer string conversion: value has %zd digits" +#define _MAX_STR_DIGITS_ERROR_FMT_TO_INT "Exceeds the limit (%d) for integer string conversion: value has %zd digits" +#define _MAX_STR_DIGITS_ERROR_FMT_TO_STR "Exceeds the limit (%d) for integer string conversion" static inline void _Py_DECREF_INT(PyLongObject *op) @@ -1726,6 +1727,23 @@ long_to_decimal_string_internal(PyObject *aa, size_a = Py_ABS(Py_SIZE(a)); negative = Py_SIZE(a) < 0; + /* quick and dirty pre-check for overflowing the decimal digit limit, + based on the inequality 10/3 >= log2(10) + + explanation in https://github.com/python/cpython/pull/96537 + */ + if (size_a >= 10 * _PY_LONG_MAX_STR_DIGITS_THRESHOLD + / (3 * PyLong_SHIFT) + 2) { + PyInterpreterState *interp = _PyInterpreterState_GET(); + int max_str_digits = interp->int_max_str_digits; + if ((max_str_digits > 0) && + (max_str_digits / (3 * PyLong_SHIFT) <= (size_a - 11) / 10)) { + PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT_TO_STR, + max_str_digits); + return -1; + } + } + /* quick and dirty upper bound for the number of digits required to express a in base _PyLong_DECIMAL_BASE: @@ -1791,8 +1809,8 @@ long_to_decimal_string_internal(PyObject *aa, Py_ssize_t strlen_nosign = strlen - negative; if ((max_str_digits > 0) && (strlen_nosign > max_str_digits)) { Py_DECREF(scratch); - PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT, - max_str_digits, strlen_nosign); + PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT_TO_STR, + max_str_digits); return -1; } } @@ -2466,7 +2484,7 @@ digit beyond the first. PyInterpreterState *interp = _PyInterpreterState_GET(); int max_str_digits = interp->int_max_str_digits; if ((max_str_digits > 0) && (digits > max_str_digits)) { - PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT, + PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT_TO_INT, max_str_digits, digits); return NULL; }