diff --git a/lib/ultrajson.h b/lib/ultrajson.h index a0744fae..170214ff 100644 --- a/lib/ultrajson.h +++ b/lib/ultrajson.h @@ -332,6 +332,7 @@ typedef struct __JSONObjectDecoder JSOBJ (*newInt)(void *prv, JSINT32 value); JSOBJ (*newLong)(void *prv, JSINT64 value); JSOBJ (*newUnsignedLong)(void *prv, JSUINT64 value); + JSOBJ (*newIntegerFromString)(void *prv, char *value, size_t length); JSOBJ (*newDouble)(void *prv, double value); void (*releaseObject)(void *prv, JSOBJ obj); JSPFN_MALLOC malloc; diff --git a/lib/ultrajsondec.c b/lib/ultrajsondec.c index 1d647b26..caf15ccc 100644 --- a/lib/ultrajsondec.c +++ b/lib/ultrajsondec.c @@ -173,7 +173,10 @@ static FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_numeric (struct DecoderState *ds { if (hasError) { - return SetError(ds, -1, intNeg == 1 ? "Value is too big" : "Value is too small"); + char *strStart = ds->start; + ds->lastType = JT_INT; + ds->start = offset; + return ds->dec->newIntegerFromString(ds->prv, strStart, offset - strStart); } goto BREAK_INT_LOOP; break; diff --git a/python/JSONtoObj.c b/python/JSONtoObj.c index 5b94dc36..55000b58 100644 --- a/python/JSONtoObj.c +++ b/python/JSONtoObj.c @@ -119,6 +119,15 @@ static JSOBJ Object_newUnsignedLong(void *prv, JSUINT64 value) return PyLong_FromUnsignedLongLong (value); } +static JSOBJ Object_newIntegerFromString(void *prv, char *value, size_t length) +{ + // PyLong_FromString requires a NUL-terminated string in CPython, contrary to the documentation: https://github.com/python/cpython/issues/59200 + char *buf = PyObject_Malloc(length + 1); + memcpy(buf, value, length); + buf[length] = '\0'; + return PyLong_FromString(buf, NULL, 10); +} + static JSOBJ Object_newDouble(void *prv, double value) { return PyFloat_FromDouble(value); @@ -152,6 +161,7 @@ PyObject* JSONToObj(PyObject* self, PyObject *args, PyObject *kwargs) Object_newInteger, Object_newLong, Object_newUnsignedLong, + Object_newIntegerFromString, Object_newDouble, Object_releaseObject, PyObject_Malloc, diff --git a/python/objToJSON.c b/python/objToJSON.c index d0aa005c..fd0d6c1b 100644 --- a/python/objToJSON.c +++ b/python/objToJSON.c @@ -100,6 +100,17 @@ static void *PyLongToUINT64(JSOBJ _obj, JSONTypeContext *tc, void *outValue, siz return NULL; } +static void *PyLongToINTSTR(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen) +{ + PyObject *obj = PyNumber_ToBase(_obj, 10); + if (!obj) + { + return NULL; + } + *_outLen = PyUnicode_GET_LENGTH(obj); + return PyUnicode_1BYTE_DATA(obj); +} + static void *PyFloatToDOUBLE(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen) { PyObject *obj = (PyObject *) _obj; @@ -508,6 +519,16 @@ static void Object_beginTypeContext (JSOBJ _obj, JSONTypeContext *tc, JSONObject exc = PyErr_Occurred(); } + if (exc && PyErr_ExceptionMatches(PyExc_OverflowError)) + { + PyErr_Clear(); + pc->PyTypeToJSON = PyLongToINTSTR; + tc->type = JT_RAW; + // Overwritten by PyLong_* due to the union, which would lead to a DECREF in endTypeContext. + GET_TC(tc)->rawJSONValue = NULL; + return; + } + if (exc) { PRINTMARK(); diff --git a/tests/test_ujson.py b/tests/test_ujson.py index 266ae7ce..25447c32 100644 --- a/tests/test_ujson.py +++ b/tests/test_ujson.py @@ -600,6 +600,32 @@ def test_decode_numeric_int_exp(test_input): assert output == json.loads(test_input) +@pytest.mark.parametrize( + "i", + [ + -(10**25), # very negative + -(2**64), # too large in magnitude for a uint64 + -(2**63) - 1, # too small for a int64 + 2**64, # too large for a uint64 + 10**25, # very positive + ], +) +@pytest.mark.parametrize("mode", ["encode", "decode"]) +def test_encode_decode_big_int(i, mode): + # Test ints that are too large to be represented by a C integer type + for python_object in (i, [i], {"i": i}): + json_string = json.dumps(python_object, separators=(",", ":")) + if mode == "encode": + if hasattr(sys, "pypy_version_info"): + # https://foss.heptapod.net/pypy/pypy/-/issues/3765 + pytest.skip("PyPy can't serialise big ints") + assert ujson.encode(python_object) == json_string + if isinstance(python_object, dict): + assert ujson.encode(python_object, sort_keys=True) == json_string + else: + assert ujson.decode(json_string) == python_object + + @pytest.mark.parametrize( "test_input, expected", [ @@ -636,15 +662,7 @@ def test_decode_range_raises(test_input, expected): ("[,31337]", ujson.JSONDecodeError), # array leading comma fail ("[,]", ujson.JSONDecodeError), # array only comma fail ("[]]", ujson.JSONDecodeError), # array unmatched bracket fail - ("18446744073709551616", ujson.JSONDecodeError), # too big value - ("-90223372036854775809", ujson.JSONDecodeError), # too small value - ("-23058430092136939529", ujson.JSONDecodeError), # too small value - ("-11529215046068469760", ujson.JSONDecodeError), # too small value - ("18446744073709551616", ujson.JSONDecodeError), # very too big value - ("23058430092136939529", ujson.JSONDecodeError), # too big value - ("-90223372036854775809", ujson.JSONDecodeError), # very too small value ("{}\n\t a", ujson.JSONDecodeError), # with trailing non whitespaces - ("[18446744073709551616]", ujson.JSONDecodeError), # array with big int ('{"age", 44}', ujson.JSONDecodeError), # read bad object syntax ], ) @@ -718,11 +736,6 @@ def test_dumps_raises(test_input, expected_exception, expected_message): (float("nan"), OverflowError), (float("inf"), OverflowError), (-float("inf"), OverflowError), - (12839128391289382193812939, OverflowError), - ([12839128391289382193812939], OverflowError), - ([12839128391289382193812939, 42], OverflowError), - ({"a": 12839128391289382193812939}, OverflowError), - ({"a": 12839128391289382193812939, "b": 42}, OverflowError), ], ) def test_encode_raises_allow_nan(test_input, expected_exception):