ultrajson · hugovk · Jun 18, 2022 · Jun 3, 2022 · Jun 16, 2022 · Jun 16, 2022
diff --git a/lib/ultrajson.h b/lib/ultrajson.h
@@ -332,6 +332,7 @@ typedef struct __JSONObjectDecoder
   JSOBJ (*newInt)(void *prv, JSINT32 value);
   JSOBJ (*newLong)(void *prv, JSINT64 value);
   JSOBJ (*newUnsignedLong)(void *prv, JSUINT64 value);
+  JSOBJ (*newIntegerFromString)(void *prv, char *value, size_t length);
   JSOBJ (*newDouble)(void *prv, double value);
   void (*releaseObject)(void *prv, JSOBJ obj);
   JSPFN_MALLOC malloc;

diff --git a/lib/ultrajsondec.c b/lib/ultrajsondec.c
@@ -173,7 +173,10 @@ static FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_numeric (struct DecoderState *ds
       {
         if (hasError)
         {
-          return SetError(ds, -1, intNeg == 1 ? "Value is too big" : "Value is too small");
+          char *strStart = ds->start;
+          ds->lastType = JT_INT;
+          ds->start = offset;
+          return ds->dec->newIntegerFromString(ds->prv, strStart, offset - strStart);
         }
         goto BREAK_INT_LOOP;
         break;

diff --git a/python/JSONtoObj.c b/python/JSONtoObj.c
@@ -119,6 +119,15 @@ static JSOBJ Object_newUnsignedLong(void *prv, JSUINT64 value)
   return PyLong_FromUnsignedLongLong (value);
 }
 
+static JSOBJ Object_newIntegerFromString(void *prv, char *value, size_t length)
+{
+  // PyLong_FromString requires a NUL-terminated string in CPython, contrary to the documentation: https://github.com/python/cpython/issues/59200
+  char *buf = PyObject_Malloc(length + 1);
+  memcpy(buf, value, length);
+  buf[length] = '\0';
+  return PyLong_FromString(buf, NULL, 10);
+}
+
 static JSOBJ Object_newDouble(void *prv, double value)
 {
   return PyFloat_FromDouble(value);
@@ -152,6 +161,7 @@ PyObject* JSONToObj(PyObject* self, PyObject *args, PyObject *kwargs)
     Object_newInteger,
     Object_newLong,
     Object_newUnsignedLong,
+    Object_newIntegerFromString,
     Object_newDouble,
     Object_releaseObject,
     PyObject_Malloc,

diff --git a/python/objToJSON.c b/python/objToJSON.c
@@ -100,6 +100,17 @@ static void *PyLongToUINT64(JSOBJ _obj, JSONTypeContext *tc, void *outValue, siz
   return NULL;
 }
 
+static void *PyLongToINTSTR(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen)
+{
+  PyObject *obj = PyNumber_ToBase(_obj, 10);
+  if (!obj)
+  {
+    return NULL;
+  }
+  *_outLen = PyUnicode_GET_LENGTH(obj);
+  return PyUnicode_1BYTE_DATA(obj);
+}
+
 static void *PyFloatToDOUBLE(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen)
 {
   PyObject *obj = (PyObject *) _obj;
@@ -508,6 +519,16 @@ static void Object_beginTypeContext (JSOBJ _obj, JSONTypeContext *tc, JSONObject
       exc = PyErr_Occurred();
     }
 
+    if (exc && PyErr_ExceptionMatches(PyExc_OverflowError))
+    {
+      PyErr_Clear();
+      pc->PyTypeToJSON = PyLongToINTSTR;
+      tc->type = JT_RAW;
+      // Overwritten by PyLong_* due to the union, which would lead to a DECREF in endTypeContext.
+      GET_TC(tc)->rawJSONValue = NULL;
+      return;
+    }
+
     if (exc)
     {
       PRINTMARK();

diff --git a/tests/test_ujson.py b/tests/test_ujson.py
@@ -600,6 +600,32 @@ def test_decode_numeric_int_exp(test_input):
     assert output == json.loads(test_input)
 
 
+@pytest.mark.parametrize(
+    "i",
+    [
+        -(10**25),  # very negative
+        -(2**64),  # too large in magnitude for a uint64
+        -(2**63) - 1,  # too small for a int64
+        2**64,  # too large for a uint64
+        10**25,  # very positive
+    ],
+)
+@pytest.mark.parametrize("mode", ["encode", "decode"])
+def test_encode_decode_big_int(i, mode):
+    # Test ints that are too large to be represented by a C integer type
+    for py in (i, [i], {"i": i}):
+        j = json.dumps(py, separators=(",", ":"))
+        if mode == "encode":
+            if hasattr(sys, "pypy_version_info"):
+                # https://foss.heptapod.net/pypy/pypy/-/issues/3765
+                pytest.skip("PyPy can't serialise big ints")
+            assert ujson.encode(py) == j
+            if isinstance(py, dict):
+                assert ujson.encode(py, sort_keys=True) == j
+        else:
+            assert ujson.decode(j) == py
+
+
 @pytest.mark.parametrize(
     "test_input, expected",
     [
@@ -636,15 +662,7 @@ def test_decode_range_raises(test_input, expected):
         ("[,31337]", ujson.JSONDecodeError),  # array leading comma fail
         ("[,]", ujson.JSONDecodeError),  # array only comma fail
         ("[]]", ujson.JSONDecodeError),  # array unmatched bracket fail
-        ("18446744073709551616", ujson.JSONDecodeError),  # too big value
-        ("-90223372036854775809", ujson.JSONDecodeError),  # too small value
-        ("-23058430092136939529", ujson.JSONDecodeError),  # too small value
-        ("-11529215046068469760", ujson.JSONDecodeError),  # too small value
-        ("18446744073709551616", ujson.JSONDecodeError),  # very too big value
-        ("23058430092136939529", ujson.JSONDecodeError),  # too big value
-        ("-90223372036854775809", ujson.JSONDecodeError),  # very too small value
         ("{}\n\t a", ujson.JSONDecodeError),  # with trailing non whitespaces
-        ("[18446744073709551616]", ujson.JSONDecodeError),  # array with big int
         ('{"age", 44}', ujson.JSONDecodeError),  # read bad object syntax
     ],
 )
@@ -718,11 +736,6 @@ def test_dumps_raises(test_input, expected_exception, expected_message):
         (float("nan"), OverflowError),
         (float("inf"), OverflowError),
         (-float("inf"), OverflowError),
-        (12839128391289382193812939, OverflowError),
-        ([12839128391289382193812939], OverflowError),
-        ([12839128391289382193812939, 42], OverflowError),
-        ({"a": 12839128391289382193812939}, OverflowError),
-        ({"a": 12839128391289382193812939, "b": 42}, OverflowError),
     ],
 )
 def test_encode_raises_allow_nan(test_input, expected_exception):