Skip to content

Commit

Permalink
Merge pull request #548 from JustAnotherArchivist/arbitrary-ints
Browse files Browse the repository at this point in the history
Add support for arbitrary size integers
  • Loading branch information
hugovk committed Jun 18, 2022
2 parents 4b5cccc + 99709df commit cc70119
Show file tree
Hide file tree
Showing 5 changed files with 62 additions and 14 deletions.
1 change: 1 addition & 0 deletions lib/ultrajson.h
Expand Up @@ -332,6 +332,7 @@ typedef struct __JSONObjectDecoder
JSOBJ (*newInt)(void *prv, JSINT32 value);
JSOBJ (*newLong)(void *prv, JSINT64 value);
JSOBJ (*newUnsignedLong)(void *prv, JSUINT64 value);
JSOBJ (*newIntegerFromString)(void *prv, char *value, size_t length);
JSOBJ (*newDouble)(void *prv, double value);
void (*releaseObject)(void *prv, JSOBJ obj);
JSPFN_MALLOC malloc;
Expand Down
5 changes: 4 additions & 1 deletion lib/ultrajsondec.c
Expand Up @@ -173,7 +173,10 @@ static FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_numeric (struct DecoderState *ds
{
if (hasError)
{
return SetError(ds, -1, intNeg == 1 ? "Value is too big" : "Value is too small");
char *strStart = ds->start;
ds->lastType = JT_INT;
ds->start = offset;
return ds->dec->newIntegerFromString(ds->prv, strStart, offset - strStart);
}
goto BREAK_INT_LOOP;
break;
Expand Down
10 changes: 10 additions & 0 deletions python/JSONtoObj.c
Expand Up @@ -119,6 +119,15 @@ static JSOBJ Object_newUnsignedLong(void *prv, JSUINT64 value)
return PyLong_FromUnsignedLongLong (value);
}

static JSOBJ Object_newIntegerFromString(void *prv, char *value, size_t length)
{
// PyLong_FromString requires a NUL-terminated string in CPython, contrary to the documentation: https://github.com/python/cpython/issues/59200
char *buf = PyObject_Malloc(length + 1);
memcpy(buf, value, length);
buf[length] = '\0';
return PyLong_FromString(buf, NULL, 10);
}

static JSOBJ Object_newDouble(void *prv, double value)
{
return PyFloat_FromDouble(value);
Expand Down Expand Up @@ -152,6 +161,7 @@ PyObject* JSONToObj(PyObject* self, PyObject *args, PyObject *kwargs)
Object_newInteger,
Object_newLong,
Object_newUnsignedLong,
Object_newIntegerFromString,
Object_newDouble,
Object_releaseObject,
PyObject_Malloc,
Expand Down
21 changes: 21 additions & 0 deletions python/objToJSON.c
Expand Up @@ -100,6 +100,17 @@ static void *PyLongToUINT64(JSOBJ _obj, JSONTypeContext *tc, void *outValue, siz
return NULL;
}

static void *PyLongToINTSTR(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen)
{
PyObject *obj = PyNumber_ToBase(_obj, 10);
if (!obj)
{
return NULL;
}
*_outLen = PyUnicode_GET_LENGTH(obj);
return PyUnicode_1BYTE_DATA(obj);
}

static void *PyFloatToDOUBLE(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen)
{
PyObject *obj = (PyObject *) _obj;
Expand Down Expand Up @@ -508,6 +519,16 @@ static void Object_beginTypeContext (JSOBJ _obj, JSONTypeContext *tc, JSONObject
exc = PyErr_Occurred();
}

if (exc && PyErr_ExceptionMatches(PyExc_OverflowError))
{
PyErr_Clear();
pc->PyTypeToJSON = PyLongToINTSTR;
tc->type = JT_RAW;
// Overwritten by PyLong_* due to the union, which would lead to a DECREF in endTypeContext.
GET_TC(tc)->rawJSONValue = NULL;
return;
}

if (exc)
{
PRINTMARK();
Expand Down
39 changes: 26 additions & 13 deletions tests/test_ujson.py
Expand Up @@ -604,6 +604,32 @@ def test_decode_numeric_int_exp(test_input):
assert output == json.loads(test_input)


@pytest.mark.parametrize(
"i",
[
-(10**25), # very negative
-(2**64), # too large in magnitude for a uint64
-(2**63) - 1, # too small for a int64
2**64, # too large for a uint64
10**25, # very positive
],
)
@pytest.mark.parametrize("mode", ["encode", "decode"])
def test_encode_decode_big_int(i, mode):
# Test ints that are too large to be represented by a C integer type
for python_object in (i, [i], {"i": i}):
json_string = json.dumps(python_object, separators=(",", ":"))
if mode == "encode":
if hasattr(sys, "pypy_version_info"):
# https://foss.heptapod.net/pypy/pypy/-/issues/3765
pytest.skip("PyPy can't serialise big ints")
assert ujson.encode(python_object) == json_string
if isinstance(python_object, dict):
assert ujson.encode(python_object, sort_keys=True) == json_string
else:
assert ujson.decode(json_string) == python_object


@pytest.mark.parametrize(
"test_input, expected",
[
Expand Down Expand Up @@ -640,15 +666,7 @@ def test_decode_range_raises(test_input, expected):
("[,31337]", ujson.JSONDecodeError), # array leading comma fail
("[,]", ujson.JSONDecodeError), # array only comma fail
("[]]", ujson.JSONDecodeError), # array unmatched bracket fail
("18446744073709551616", ujson.JSONDecodeError), # too big value
("-90223372036854775809", ujson.JSONDecodeError), # too small value
("-23058430092136939529", ujson.JSONDecodeError), # too small value
("-11529215046068469760", ujson.JSONDecodeError), # too small value
("18446744073709551616", ujson.JSONDecodeError), # very too big value
("23058430092136939529", ujson.JSONDecodeError), # too big value
("-90223372036854775809", ujson.JSONDecodeError), # very too small value
("{}\n\t a", ujson.JSONDecodeError), # with trailing non whitespaces
("[18446744073709551616]", ujson.JSONDecodeError), # array with big int
('{"age", 44}', ujson.JSONDecodeError), # read bad object syntax
],
)
Expand Down Expand Up @@ -722,11 +740,6 @@ def test_dumps_raises(test_input, expected_exception, expected_message):
(float("nan"), OverflowError),
(float("inf"), OverflowError),
(-float("inf"), OverflowError),
(12839128391289382193812939, OverflowError),
([12839128391289382193812939], OverflowError),
([12839128391289382193812939, 42], OverflowError),
({"a": 12839128391289382193812939}, OverflowError),
({"a": 12839128391289382193812939, "b": 42}, OverflowError),
],
)
def test_encode_raises_allow_nan(test_input, expected_exception):
Expand Down

0 comments on commit cc70119

Please sign in to comment.