Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support parsing NaN, Infinity and -Infinity #514

Merged
merged 7 commits into from Apr 4, 2022
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 6 additions & 0 deletions lib/ultrajson.h
Expand Up @@ -166,6 +166,9 @@ enum JSTYPES
JT_ARRAY, // Array structure
JT_OBJECT, // Key/Value structure
JT_INVALID, // Internal, do not return nor expect
JT_NAN, // Not A Number
JT_POS_INF, // Positive infinity
JT_NEG_INF, // Negative infinity
};

typedef void * JSOBJ;
Expand Down Expand Up @@ -321,6 +324,9 @@ typedef struct __JSONObjectDecoder
JSOBJ (*newTrue)(void *prv);
JSOBJ (*newFalse)(void *prv);
JSOBJ (*newNull)(void *prv);
JSOBJ (*newNaN)(void *prv);
JSOBJ (*newPosInf)(void *prv);
JSOBJ (*newNegInf)(void *prv);
JSOBJ (*newObject)(void *prv);
JSOBJ (*newArray)(void *prv);
JSOBJ (*newInt)(void *prv, JSINT32 value);
Expand Down
58 changes: 54 additions & 4 deletions lib/ultrajsondec.c
Expand Up @@ -98,10 +98,16 @@ static FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_numeric (struct DecoderState *ds

JSUINT64 overflowLimit = LLONG_MAX;

if (*(offset) == '-')
{
offset ++;
intNeg = -1;
if (*(offset) == 'I') {
goto DECODE_INF;
} else if (*(offset) == 'N') {
goto DECODE_NAN;
} else if (*(offset) == '-') {
offset++;
intNeg = -1;
if (*(offset) == 'I') {
goto DECODE_INF;
}
overflowLimit = LLONG_MIN;
}

Expand Down Expand Up @@ -189,6 +195,48 @@ static FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_numeric (struct DecoderState *ds
{
return ds->dec->newInt(ds->prv, (JSINT32) (intValue * intNeg));
}

DECODE_NAN:
offset++;
if (*(offset++) != 'a') goto SET_NAN_ERROR;
if (*(offset++) != 'N') goto SET_NAN_ERROR;

ds->lastType = JT_NAN;
ds->start = offset;
return ds->dec->newNaN(ds->prv);

SET_NAN_ERROR:
return SetError(ds, -1, "Unexpected character found when decoding 'NaN'");

DECODE_INF:
offset++;
if (*(offset++) != 'n') goto SET_INF_ERROR;
if (*(offset++) != 'f') goto SET_INF_ERROR;
if (*(offset++) != 'i') goto SET_INF_ERROR;
if (*(offset++) != 'n') goto SET_INF_ERROR;
if (*(offset++) != 'i') goto SET_INF_ERROR;
if (*(offset++) != 't') goto SET_INF_ERROR;
if (*(offset++) != 'y') goto SET_INF_ERROR;

ds->start = offset;

if (intNeg == 1) {
ds->lastType = JT_POS_INF;
return ds->dec->newPosInf(ds->prv);
} else {
ds->lastType = JT_NEG_INF;
return ds->dec->newNegInf(ds->prv);
}

SET_INF_ERROR:
if (intNeg == 1) {
const char *msg = "Unexpected character found when decoding 'Infinity'";
return SetError(ds, -1, msg);
} else {
const char *msg = "Unexpected character found when decoding '-Infinity'";
return SetError(ds, -1, msg);
}

}

static FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_true ( struct DecoderState *ds)
Expand Down Expand Up @@ -732,6 +780,8 @@ static FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_any(struct DecoderState *ds)
case '7':
case '8':
case '9':
case 'I':
case 'N':
case '-':
return decode_numeric (ds);

Expand Down
18 changes: 18 additions & 0 deletions python/JSONtoObj.c
Expand Up @@ -79,6 +79,21 @@ static JSOBJ Object_newNull(void *prv)
Py_RETURN_NONE;
}

static JSOBJ Object_newNaN(void *prv)
{
return PyFloat_FromDouble(Py_NAN);
}

static JSOBJ Object_newPosInf(void *prv)
{
return PyFloat_FromDouble(Py_HUGE_VAL);
}

static JSOBJ Object_newNegInf(void *prv)
{
return PyFloat_FromDouble(-Py_HUGE_VAL);
}

static JSOBJ Object_newObject(void *prv)
{
return PyDict_New();
Expand Down Expand Up @@ -129,6 +144,9 @@ PyObject* JSONToObj(PyObject* self, PyObject *args, PyObject *kwargs)
Object_newTrue,
Object_newFalse,
Object_newNull,
Object_newNaN,
Object_newPosInf,
Object_newNegInf,
Object_newObject,
Object_newArray,
Object_newInteger,
Expand Down
73 changes: 73 additions & 0 deletions tests/test_ujson.py
Expand Up @@ -670,6 +670,79 @@ def test_encode_raises_allow_nan(test_input, expected_exception):
ujson.dumps(test_input, allow_nan=False)


def test_nan_inf_support():
# Test ported from pandas
text = '["a", NaN, "NaN", Infinity, "Infinity", -Infinity, "-Infinity"]'
data = ujson.loads(text)
expected = [
"a",
float("nan"),
"NaN",
float("inf"),
"Infinity",
-float("inf"),
"-Infinity",
]
for a, b in zip(data, expected):
assert a == b or math.isnan(a) and math.isnan(b)


def test_special_singletons():
pos_inf = ujson.loads("Infinity")
neg_inf = ujson.loads("-Infinity")
nan = ujson.loads("NaN")
null = ujson.loads("null")
assert math.isinf(pos_inf) and pos_inf > 0
assert math.isinf(neg_inf) and neg_inf < 0
assert math.isnan(nan)
assert null is None


@pytest.mark.parametrize(
"test_input, expected_exception, expected_message",
[
("n", ujson.JSONDecodeError, "Unexpected character .* 'null'"),
("N", ujson.JSONDecodeError, "Unexpected character .*'NaN'"),
("NA", ujson.JSONDecodeError, "Unexpected character .* 'NaN'"),
("Na N", ujson.JSONDecodeError, "Unexpected character .* 'NaN'"),
("nan", ujson.JSONDecodeError, "Unexpected character .* 'null'"),
("none", ujson.JSONDecodeError, "Unexpected character .* 'null'"),
("i", ujson.JSONDecodeError, "Expected object or value"),
("I", ujson.JSONDecodeError, "Unexpected character .* 'Infinity'"),
("Inf", ujson.JSONDecodeError, "Unexpected character .* 'Infinity'"),
("InfinitY", ujson.JSONDecodeError, "Unexpected character .* 'Infinity'"),
("-i", ujson.JSONDecodeError, "Trailing data"),
("-I", ujson.JSONDecodeError, "Unexpected character .* '-Infinity'"),
("-Inf", ujson.JSONDecodeError, "Unexpected character .* '-Infinity'"),
("-InfinitY", ujson.JSONDecodeError, "Unexpected character .* '-Infinity'"),
("- i", ujson.JSONDecodeError, "Trailing data"),
("- I", ujson.JSONDecodeError, "Trailing data"),
("- Inf", ujson.JSONDecodeError, "Trailing data"),
("- InfinitY", ujson.JSONDecodeError, "Trailing data"),
],
)
def test_incomplete_special_inputs(test_input, expected_exception, expected_message):
with pytest.raises(expected_exception, match=expected_message):
print("test_input = {!r}".format(test_input))
ujson.loads(test_input)


@pytest.mark.parametrize(
"test_input, expected_exception, expected_message",
[
("NaNaNaN", ujson.JSONDecodeError, "Trailing data"),
("Infinity and Beyond", ujson.JSONDecodeError, "Trailing data"),
("-Infinity-and-Beyond", ujson.JSONDecodeError, "Trailing data"),
("NaN!", ujson.JSONDecodeError, "Trailing data"),
("Infinity!", ujson.JSONDecodeError, "Trailing data"),
("-Infinity!", ujson.JSONDecodeError, "Trailing data"),
],
)
def test_overcomplete_special_inputs(test_input, expected_exception, expected_message):
with pytest.raises(expected_exception, match=expected_message):
ujson.loads(test_input)
Erotemic marked this conversation as resolved.
Show resolved Hide resolved


@pytest.mark.parametrize(
"test_input",
[
Expand Down