Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support Python's Negative Exponent Padding Idiosyncrasy #620

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
13 changes: 13 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,19 @@ Controls whether indentation ("pretty output") is enabled. Default is `0` (disab
}
```

#### zero_pad_negative_9_to_5_exponent

If true, adds a single `0` padding to the exponent in scientific notation if it
is between `-9` and `-5` both inclusive, which replicates the Python standard
library's `json` behavior. Default is `False`:

```pycon
>>> ujson.dumps([1e-10, 1e-9, 1e-5, 1e-4])
'[1e-10,1e-9,1e-5,0.0001]'
>>> ujson.dumps([1e-10, 1e-9, 1e-5, 1e-4], zero_pad_negative_9_to_5_exponent=True)
'[1e-10,1e-09,1e-05,0.0001]'
```

## Benchmarks

*UltraJSON* calls/sec compared to other popular JSON parsers with performance gain
Expand Down
5 changes: 3 additions & 2 deletions lib/dconv_wrapper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,13 @@ namespace double_conversion
int decimal_in_shortest_low,
int decimal_in_shortest_high,
int max_leading_padding_zeroes_in_precision_mode,
int max_trailing_padding_zeroes_in_precision_mode)
int max_trailing_padding_zeroes_in_precision_mode,
int min_exponent_width)
{
*d2s = new DoubleToStringConverter(flags, infinity_symbol, nan_symbol,
exponent_character, decimal_in_shortest_low,
decimal_in_shortest_high, max_leading_padding_zeroes_in_precision_mode,
max_trailing_padding_zeroes_in_precision_mode);
max_trailing_padding_zeroes_in_precision_mode, min_exponent_width);
}

int dconv_d2s(void *d2s, double value, char* buf, int buflen, int* strlength)
Expand Down
9 changes: 8 additions & 1 deletion lib/ultrajson.h
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,12 @@ typedef struct __JSONObjectEncoder
If true, bytes are rejected. */
int rejectBytes;

/*
If true, adds a single 0 padding to the exponent in scientific notation if it
is between -9 and -5 both inclusive, which replicates the Python standard
library's JSON behavior, e.g. 1e-5 will become 1e-05. */
int zeroPadNegative9to5Exponent;

/*
Configuration for item and key separators, e.g. "," and ":" for a compact representation or ", " and ": " to match the Python standard library's defaults. */
size_t itemSeparatorLength;
Expand Down Expand Up @@ -382,7 +388,8 @@ void dconv_d2s_init(void **d2s,
int decimal_in_shortest_low,
int decimal_in_shortest_high,
int max_leading_padding_zeroes_in_precision_mode,
int max_trailing_padding_zeroes_in_precision_mode);
int max_trailing_padding_zeroes_in_precision_mode,
int min_exponent_width);
int dconv_d2s(void *d2s, double value, char* buf, int buflen, int* strlength);
void dconv_d2s_free(void **d2s);

Expand Down
14 changes: 11 additions & 3 deletions python/objToJSON.c
Original file line number Diff line number Diff line change
Expand Up @@ -659,7 +659,7 @@ static char *Object_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen)

PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs)
{
static char *kwlist[] = { "obj", "ensure_ascii", "encode_html_chars", "escape_forward_slashes", "sort_keys", "indent", "allow_nan", "reject_bytes", "default", "separators", NULL };
static char *kwlist[] = { "obj", "ensure_ascii", "encode_html_chars", "escape_forward_slashes", "sort_keys", "indent", "allow_nan", "reject_bytes", "default", "separators", "zero_pad_negative_9_to_5_exponent", NULL };

char buffer[65536];
char *ret;
Expand All @@ -676,9 +676,11 @@ PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs)
PyObject *separatorsItemBytes = NULL;
PyObject *oseparatorsKey = NULL;
PyObject *separatorsKeyBytes = NULL;
PyObject *ozero_pad_negative_9_to_5_exponent = NULL;
int allowNan = -1;
int orejectBytes = -1;
size_t retLen;
int minExponentWidth = 0;

JSONObjectEncoder encoder =
{
Expand All @@ -704,6 +706,7 @@ PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs)
0, //indent
1, //allowNan
1, //rejectBytes
0, //zeroPadNegative9to5Exponent
0, //itemSeparatorLength
NULL, //itemSeparatorChars
0, //keySeparatorLength
Expand All @@ -714,7 +717,7 @@ PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs)

PRINTMARK();

if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OOOOiiiOO", kwlist, &oinput, &oensureAscii, &oencodeHTMLChars, &oescapeForwardSlashes, &osortKeys, &encoder.indent, &allowNan, &orejectBytes, &odefaultFn, &oseparators))
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OOOOiiiOOO", kwlist, &oinput, &oensureAscii, &oencodeHTMLChars, &oescapeForwardSlashes, &osortKeys, &encoder.indent, &allowNan, &orejectBytes, &odefaultFn, &oseparators, &ozero_pad_negative_9_to_5_exponent))
{
return NULL;
}
Expand Down Expand Up @@ -816,9 +819,14 @@ PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs)
}
}

if (ozero_pad_negative_9_to_5_exponent != NULL && PyObject_IsTrue(ozero_pad_negative_9_to_5_exponent))
{
minExponentWidth = 2;
}

encoder.d2s = NULL;
dconv_d2s_init(&encoder.d2s, DCONV_D2S_EMIT_TRAILING_DECIMAL_POINT | DCONV_D2S_EMIT_TRAILING_ZERO_AFTER_POINT | DCONV_D2S_EMIT_POSITIVE_EXPONENT_SIGN,
csInf, csNan, 'e', DCONV_DECIMAL_IN_SHORTEST_LOW, DCONV_DECIMAL_IN_SHORTEST_HIGH, 0, 0);
csInf, csNan, 'e', DCONV_DECIMAL_IN_SHORTEST_LOW, DCONV_DECIMAL_IN_SHORTEST_HIGH, 0, 0, minExponentWidth);

PRINTMARK();
ret = JSON_EncodeObject (oinput, &encoder, buffer, sizeof (buffer), &retLen);
Expand Down
3 changes: 2 additions & 1 deletion python/ujson.c
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ PyObject* JSONDecodeError;
"Set encode_html_chars=True to encode < > & as unicode escape sequences. "\
"Set escape_forward_slashes=False to prevent escaping / characters." \
"Set allow_nan=False to raise an exception when NaN or Infinity would be serialized." \
"Set reject_bytes=True to raise TypeError on bytes."
"Set reject_bytes=True to raise TypeError on bytes." \
"Set zero_pad_negative_9_to_5_exponent=True to add 0-pad for exponents -9 to -5."

static PyMethodDef ujsonMethods[] = {
{"encode", (PyCFunction) objToJSON, METH_VARARGS | METH_KEYWORDS, "Converts arbitrary object recursively into JSON. " ENCODER_HELP_TEXT},
Expand Down
7 changes: 7 additions & 0 deletions tests/fuzz.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,13 @@ def __call__(self, parser, namespace, values, option_string=None):
help="Sets the escape_forward_slashes option to ujson.dumps(). "
"May be 0 or 1 or 0,1 to test both.",
)
parser.add_argument(
"--zero_pad_negative_9_to_5_exponent",
default=(0, 1),
action=ListOption,
help="Sets the zero_pad_negative_9_to_5_exponent option to ujson.dumps(). "
"May be 0 or 1 or 0,1 to test both.",
)
parser.add_argument(
"--dump-python",
action="store_true",
Expand Down
9 changes: 8 additions & 1 deletion tests/test_ujson.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,19 @@ def test_double_long_decimal_issue():
assert sut == decoded


# NOTE: can't match exponents -9 to -5; Python 0-pads
# NOTE: The default behaviour can't match exponents -9 to -5; Python 0-pads
@pytest.mark.parametrize("val", [1e-10, 1e-4, 1e10, 1e15, 1e16, 1e30])
def test_encode_float_string_rep(val):
assert ujson.dumps(val) == json.dumps(val)


@pytest.mark.parametrize(
"val", [1e-10, 1e-9, 1e-8, 1e-6, 1e-5, 1e-4, 1e10, 1e15, 1e16, 1e30]
)
def test_encode_float_string_replicate_python(val):
assert ujson.dumps(val, zero_pad_negative_9_to_5_exponent=True) == json.dumps(val)


def test_encode_decode_long_decimal():
sut = {"a": -528656961.4399388}
encoded = ujson.dumps(sut)
Expand Down