Skip to content

Commit

Permalink
Merge pull request #514 from Erotemic/nan_loads
Browse files Browse the repository at this point in the history
  • Loading branch information
hugovk committed Apr 4, 2022
2 parents beb3296 + 2421524 commit 881ee93
Show file tree
Hide file tree
Showing 4 changed files with 151 additions and 4 deletions.
6 changes: 6 additions & 0 deletions lib/ultrajson.h
Expand Up @@ -166,6 +166,9 @@ enum JSTYPES
JT_ARRAY, // Array structure
JT_OBJECT, // Key/Value structure
JT_INVALID, // Internal, do not return nor expect
JT_NAN, // Not A Number
JT_POS_INF, // Positive infinity
JT_NEG_INF, // Negative infinity
};

typedef void * JSOBJ;
Expand Down Expand Up @@ -321,6 +324,9 @@ typedef struct __JSONObjectDecoder
JSOBJ (*newTrue)(void *prv);
JSOBJ (*newFalse)(void *prv);
JSOBJ (*newNull)(void *prv);
JSOBJ (*newNaN)(void *prv);
JSOBJ (*newPosInf)(void *prv);
JSOBJ (*newNegInf)(void *prv);
JSOBJ (*newObject)(void *prv);
JSOBJ (*newArray)(void *prv);
JSOBJ (*newInt)(void *prv, JSINT32 value);
Expand Down
58 changes: 54 additions & 4 deletions lib/ultrajsondec.c
Expand Up @@ -98,10 +98,16 @@ static FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_numeric (struct DecoderState *ds

JSUINT64 overflowLimit = LLONG_MAX;

if (*(offset) == '-')
{
offset ++;
intNeg = -1;
if (*(offset) == 'I') {
goto DECODE_INF;
} else if (*(offset) == 'N') {
goto DECODE_NAN;
} else if (*(offset) == '-') {
offset++;
intNeg = -1;
if (*(offset) == 'I') {
goto DECODE_INF;
}
overflowLimit = LLONG_MIN;
}

Expand Down Expand Up @@ -189,6 +195,48 @@ static FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_numeric (struct DecoderState *ds
{
return ds->dec->newInt(ds->prv, (JSINT32) (intValue * intNeg));
}

DECODE_NAN:
offset++;
if (*(offset++) != 'a') goto SET_NAN_ERROR;
if (*(offset++) != 'N') goto SET_NAN_ERROR;

ds->lastType = JT_NAN;
ds->start = offset;
return ds->dec->newNaN(ds->prv);

SET_NAN_ERROR:
return SetError(ds, -1, "Unexpected character found when decoding 'NaN'");

DECODE_INF:
offset++;
if (*(offset++) != 'n') goto SET_INF_ERROR;
if (*(offset++) != 'f') goto SET_INF_ERROR;
if (*(offset++) != 'i') goto SET_INF_ERROR;
if (*(offset++) != 'n') goto SET_INF_ERROR;
if (*(offset++) != 'i') goto SET_INF_ERROR;
if (*(offset++) != 't') goto SET_INF_ERROR;
if (*(offset++) != 'y') goto SET_INF_ERROR;

ds->start = offset;

if (intNeg == 1) {
ds->lastType = JT_POS_INF;
return ds->dec->newPosInf(ds->prv);
} else {
ds->lastType = JT_NEG_INF;
return ds->dec->newNegInf(ds->prv);
}

SET_INF_ERROR:
if (intNeg == 1) {
const char *msg = "Unexpected character found when decoding 'Infinity'";
return SetError(ds, -1, msg);
} else {
const char *msg = "Unexpected character found when decoding '-Infinity'";
return SetError(ds, -1, msg);
}

}

static FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_true ( struct DecoderState *ds)
Expand Down Expand Up @@ -732,6 +780,8 @@ static FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_any(struct DecoderState *ds)
case '7':
case '8':
case '9':
case 'I':
case 'N':
case '-':
return decode_numeric (ds);

Expand Down
18 changes: 18 additions & 0 deletions python/JSONtoObj.c
Expand Up @@ -79,6 +79,21 @@ static JSOBJ Object_newNull(void *prv)
Py_RETURN_NONE;
}

static JSOBJ Object_newNaN(void *prv)
{
return PyFloat_FromDouble(Py_NAN);
}

static JSOBJ Object_newPosInf(void *prv)
{
return PyFloat_FromDouble(Py_HUGE_VAL);
}

static JSOBJ Object_newNegInf(void *prv)
{
return PyFloat_FromDouble(-Py_HUGE_VAL);
}

static JSOBJ Object_newObject(void *prv)
{
return PyDict_New();
Expand Down Expand Up @@ -129,6 +144,9 @@ PyObject* JSONToObj(PyObject* self, PyObject *args, PyObject *kwargs)
Object_newTrue,
Object_newFalse,
Object_newNull,
Object_newNaN,
Object_newPosInf,
Object_newNegInf,
Object_newObject,
Object_newArray,
Object_newInteger,
Expand Down
73 changes: 73 additions & 0 deletions tests/test_ujson.py
Expand Up @@ -670,6 +670,79 @@ def test_encode_raises_allow_nan(test_input, expected_exception):
ujson.dumps(test_input, allow_nan=False)


def test_nan_inf_support():
# Test ported from pandas
text = '["a", NaN, "NaN", Infinity, "Infinity", -Infinity, "-Infinity"]'
data = ujson.loads(text)
expected = [
"a",
float("nan"),
"NaN",
float("inf"),
"Infinity",
-float("inf"),
"-Infinity",
]
for a, b in zip(data, expected):
assert a == b or math.isnan(a) and math.isnan(b)


def test_special_singletons():
pos_inf = ujson.loads("Infinity")
neg_inf = ujson.loads("-Infinity")
nan = ujson.loads("NaN")
null = ujson.loads("null")
assert math.isinf(pos_inf) and pos_inf > 0
assert math.isinf(neg_inf) and neg_inf < 0
assert math.isnan(nan)
assert null is None


@pytest.mark.parametrize(
"test_input, expected_message",
[
("n", "Unexpected character .* 'null'"),
("N", "Unexpected character .*'NaN'"),
("NA", "Unexpected character .* 'NaN'"),
("Na N", "Unexpected character .* 'NaN'"),
("nan", "Unexpected character .* 'null'"),
("none", "Unexpected character .* 'null'"),
("i", "Expected object or value"),
("I", "Unexpected character .* 'Infinity'"),
("Inf", "Unexpected character .* 'Infinity'"),
("InfinitY", "Unexpected character .* 'Infinity'"),
("-i", "Trailing data"),
("-I", "Unexpected character .* '-Infinity'"),
("-Inf", "Unexpected character .* '-Infinity'"),
("-InfinitY", "Unexpected character .* '-Infinity'"),
("- i", "Trailing data"),
("- I", "Trailing data"),
("- Inf", "Trailing data"),
("- InfinitY", "Trailing data"),
],
)
def test_incomplete_special_inputs(test_input, expected_message):
with pytest.raises(ujson.JSONDecodeError, match=expected_message):
print(f"test_input = {test_input!r}")
ujson.loads(test_input)


@pytest.mark.parametrize(
"test_input, expected_message",
[
("NaNaNaN", "Trailing data"),
("Infinity and Beyond", "Trailing data"),
("-Infinity-and-Beyond", "Trailing data"),
("NaN!", "Trailing data"),
("Infinity!", "Trailing data"),
("-Infinity!", "Trailing data"),
],
)
def test_overcomplete_special_inputs(test_input, expected_message):
with pytest.raises(ujson.JSONDecodeError, match=expected_message):
ujson.loads(test_input)


@pytest.mark.parametrize(
"test_input",
[
Expand Down

0 comments on commit 881ee93

Please sign in to comment.