Skip to content

Commit

Permalink
Support Python's Negative Exponent Padding Idiosyncrasy
Browse files Browse the repository at this point in the history
  • Loading branch information
peter-varo-hx committed Jan 24, 2024
1 parent 0d2d8c5 commit 0ed0811
Show file tree
Hide file tree
Showing 6 changed files with 45 additions and 8 deletions.
13 changes: 13 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,19 @@ Controls whether indentation ("pretty output") is enabled. Default is `0` (disab
}
```

#### zero_pad_negative_9_to_5_exponent

If true, adds a single `0` padding to the exponent in scientific notation if it
is between `-9` and `-5` both inclusive, which replicates the Python standard
library's `json` behavior. Default is `False`:

```pycon
>>> ujson.dumps([1e-10, 1e-9, 1e-5, 1e-4])
'[1e-10,1e-9,1e-5,0.0001]'
>>> ujson.dumps([1e-10, 1e-9, 1e-5, 1e-4], zero_pad_negative_9_to_5_exponent=True)
'[1e-10,1e-09,1e-05,0.0001]'
```

## Benchmarks

*UltraJSON* calls/sec compared to other popular JSON parsers with performance gain
Expand Down
5 changes: 3 additions & 2 deletions lib/dconv_wrapper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,13 @@ namespace double_conversion
int decimal_in_shortest_low,
int decimal_in_shortest_high,
int max_leading_padding_zeroes_in_precision_mode,
int max_trailing_padding_zeroes_in_precision_mode)
int max_trailing_padding_zeroes_in_precision_mode,
int min_exponent_width)
{
*d2s = new DoubleToStringConverter(flags, infinity_symbol, nan_symbol,
exponent_character, decimal_in_shortest_low,
decimal_in_shortest_high, max_leading_padding_zeroes_in_precision_mode,
max_trailing_padding_zeroes_in_precision_mode);
max_trailing_padding_zeroes_in_precision_mode, min_exponent_width);
}

int dconv_d2s(void *d2s, double value, char* buf, int buflen, int* strlength)
Expand Down
9 changes: 8 additions & 1 deletion lib/ultrajson.h
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,12 @@ typedef struct __JSONObjectEncoder
If true, bytes are rejected. */
int rejectBytes;

/*
If true, adds a single 0 padding to the exponent in scientific notation if it
is between -9 and -5 both inclusive, which replicates the Python standard
library's JSON behavior, e.g. 1e-5 will become 1e-05. */
int zeroPadNegative9to5Exponent;

/*
Configuration for item and key separators, e.g. "," and ":" for a compact representation or ", " and ": " to match the Python standard library's defaults. */
size_t itemSeparatorLength;
Expand Down Expand Up @@ -382,7 +388,8 @@ void dconv_d2s_init(void **d2s,
int decimal_in_shortest_low,
int decimal_in_shortest_high,
int max_leading_padding_zeroes_in_precision_mode,
int max_trailing_padding_zeroes_in_precision_mode);
int max_trailing_padding_zeroes_in_precision_mode,
int min_exponent_width);
int dconv_d2s(void *d2s, double value, char* buf, int buflen, int* strlength);
void dconv_d2s_free(void **d2s);

Expand Down
14 changes: 11 additions & 3 deletions python/objToJSON.c
Original file line number Diff line number Diff line change
Expand Up @@ -659,7 +659,7 @@ static char *Object_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen)

PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs)
{
static char *kwlist[] = { "obj", "ensure_ascii", "encode_html_chars", "escape_forward_slashes", "sort_keys", "indent", "allow_nan", "reject_bytes", "default", "separators", NULL };
static char *kwlist[] = { "obj", "ensure_ascii", "encode_html_chars", "escape_forward_slashes", "sort_keys", "indent", "allow_nan", "reject_bytes", "default", "separators", "zero_pad_negative_9_to_5_exponent", NULL };

char buffer[65536];
char *ret;
Expand All @@ -676,9 +676,11 @@ PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs)
PyObject *separatorsItemBytes = NULL;
PyObject *oseparatorsKey = NULL;
PyObject *separatorsKeyBytes = NULL;
PyObject *ozero_pad_negative_9_to_5_exponent = NULL;
int allowNan = -1;
int orejectBytes = -1;
size_t retLen;
int minExponentWidth = 0;

JSONObjectEncoder encoder =
{
Expand All @@ -704,6 +706,7 @@ PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs)
0, //indent
1, //allowNan
1, //rejectBytes
0, //zeroPadNegative9to5Exponent
0, //itemSeparatorLength
NULL, //itemSeparatorChars
0, //keySeparatorLength
Expand All @@ -714,7 +717,7 @@ PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs)

PRINTMARK();

if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OOOOiiiOO", kwlist, &oinput, &oensureAscii, &oencodeHTMLChars, &oescapeForwardSlashes, &osortKeys, &encoder.indent, &allowNan, &orejectBytes, &odefaultFn, &oseparators))
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OOOOiiiOOO", kwlist, &oinput, &oensureAscii, &oencodeHTMLChars, &oescapeForwardSlashes, &osortKeys, &encoder.indent, &allowNan, &orejectBytes, &odefaultFn, &oseparators, &ozero_pad_negative_9_to_5_exponent))
{
return NULL;
}
Expand Down Expand Up @@ -816,9 +819,14 @@ PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs)
}
}

if (ozero_pad_negative_9_to_5_exponent != NULL && PyObject_IsTrue(ozero_pad_negative_9_to_5_exponent))
{
minExponentWidth = 2;
}

encoder.d2s = NULL;
dconv_d2s_init(&encoder.d2s, DCONV_D2S_EMIT_TRAILING_DECIMAL_POINT | DCONV_D2S_EMIT_TRAILING_ZERO_AFTER_POINT | DCONV_D2S_EMIT_POSITIVE_EXPONENT_SIGN,
csInf, csNan, 'e', DCONV_DECIMAL_IN_SHORTEST_LOW, DCONV_DECIMAL_IN_SHORTEST_HIGH, 0, 0);
csInf, csNan, 'e', DCONV_DECIMAL_IN_SHORTEST_LOW, DCONV_DECIMAL_IN_SHORTEST_HIGH, 0, 0, minExponentWidth);

PRINTMARK();
ret = JSON_EncodeObject (oinput, &encoder, buffer, sizeof (buffer), &retLen);
Expand Down
3 changes: 2 additions & 1 deletion python/ujson.c
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ PyObject* JSONDecodeError;
"Set encode_html_chars=True to encode < > & as unicode escape sequences. "\
"Set escape_forward_slashes=False to prevent escaping / characters." \
"Set allow_nan=False to raise an exception when NaN or Infinity would be serialized." \
"Set reject_bytes=True to raise TypeError on bytes."
"Set reject_bytes=True to raise TypeError on bytes." \
"Set zero_pad_negative_9_to_5_exponent=True to add 0-pad for exponents -9 to -5."

static PyMethodDef ujsonMethods[] = {
{"encode", (PyCFunction) objToJSON, METH_VARARGS | METH_KEYWORDS, "Converts arbitrary object recursively into JSON. " ENCODER_HELP_TEXT},
Expand Down
9 changes: 8 additions & 1 deletion tests/test_ujson.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,19 @@ def test_double_long_decimal_issue():
assert sut == decoded


# NOTE: can't match exponents -9 to -5; Python 0-pads
# NOTE: The default behaviour can't match exponents -9 to -5; Python 0-pads
@pytest.mark.parametrize("val", [1e-10, 1e-4, 1e10, 1e15, 1e16, 1e30])
def test_encode_float_string_rep(val):
assert ujson.dumps(val) == json.dumps(val)


@pytest.mark.parametrize(
"val", [1e-10, 1e-9, 1e-8, 1e-6, 1e-5, 1e-4, 1e10, 1e15, 1e16, 1e30]
)
def test_encode_float_string_replicate_python(val):
assert ujson.dumps(val, zero_pad_negative_9_to_5_exponent=True) == json.dumps(val)


def test_encode_decode_long_decimal():
sut = {"a": -528656961.4399388}
encoded = ujson.dumps(sut)
Expand Down

0 comments on commit 0ed0811

Please sign in to comment.