Skip to content

Commit

Permalink
Debug code
Browse files Browse the repository at this point in the history
  • Loading branch information
Erotemic committed Apr 11, 2022
1 parent e2e1b64 commit 029e694
Show file tree
Hide file tree
Showing 4 changed files with 224 additions and 32 deletions.
4 changes: 2 additions & 2 deletions lib/ultrajson.h
Expand Up @@ -258,7 +258,7 @@ typedef struct __JSONObjectEncoder

/*
Configuration for spaces of indent */
int indent;
int indentLength;
const char* indentChars;

/*
Expand Down Expand Up @@ -312,7 +312,7 @@ Life cycle of the provided buffer must still be handled by caller.
If the return value doesn't equal the specified buffer caller must release the memory using
JSONObjectEncoder.free or free() as specified when calling this function.
*/
EXPORTFUNCTION char *JSON_EncodeObject(JSOBJ obj, JSONObjectEncoder *enc, char *buffer, size_t cbBuffer);
EXPORTFUNCTION char *JSON_EncodeObject(JSOBJ obj, JSONObjectEncoder *enc, char *buffer, size_t cbBuffer, size_t* oRetLength);

typedef struct __JSONObjectDecoder
{
Expand Down
39 changes: 21 additions & 18 deletions lib/ultrajsonenc.c
Expand Up @@ -544,26 +544,26 @@ static FASTCALL_ATTR INLINE_PREFIX void FASTCALL_MSVC strreverse(char* begin, ch

static void Buffer_AppendIndentNewlineUnchecked(JSONObjectEncoder *enc)
{
if (enc->indent > 0) Buffer_AppendCharUnchecked(enc, '\n');
if (enc->indentLength > -1) Buffer_AppendCharUnchecked(enc, '\n');
}

static void Buffer_AppendIndentUnchecked(JSONObjectEncoder *enc, JSINT32 value)
{
int i;
if (enc->indent > 0)
if (enc->indentLength > -1)
{
if (enc->indentChars == NULL)
{
while (value-- > 0)
for (i = 0; i < enc->indent; i++)
Buffer_AppendCharUnchecked(enc, ' ');
}
else
{
/*if (enc->indentChars == NULL) */
/*{ */
/* while (value-- > 0) */
/* for (i = 0; i < enc->indentLength; i++)*/
/* Buffer_AppendCharUnchecked(enc, ' ');*/
/*} */
/*else */
/*{ */
while (value-- > 0)
for (i = 0; i < enc->indent; i++)
for (i = 0; i < enc->indentLength; i++)
Buffer_AppendCharUnchecked(enc, enc->indentChars[i]);
}
/*}*/
}
}

Expand Down Expand Up @@ -666,7 +666,7 @@ static void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t c
Buffer_AppendCharUnchecked(enc, '\"');

Buffer_AppendCharUnchecked (enc, ':');
if (enc->indent)
if (enc->indentLength)
{
Buffer_AppendCharUnchecked (enc, ' ');
}
Expand Down Expand Up @@ -709,7 +709,7 @@ static void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t c
while (enc->iterNext(obj, &tc))
{
// The extra 2 bytes cover the comma and (optional) newline.
Buffer_Reserve (enc, enc->indent * (enc->level + 1) + 2);
Buffer_Reserve (enc, enc->indentLength * (enc->level + 1) + 2);

if (count > 0)
{
Expand All @@ -736,7 +736,7 @@ static void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t c

if (count > 0) {
// Reserve space for the indentation plus the newline.
Buffer_Reserve (enc, enc->indent * enc->level + 1);
Buffer_Reserve (enc, enc->indentLength * enc->level + 1);
Buffer_AppendIndentNewlineUnchecked (enc);
Buffer_AppendIndentUnchecked (enc, enc->level);
}
Expand All @@ -754,7 +754,7 @@ static void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t c
while ((res = enc->iterNext(obj, &tc)))
{
// The extra 2 bytes cover the comma and optional newline.
Buffer_Reserve (enc, enc->indent * (enc->level + 1) + 2);
Buffer_Reserve (enc, enc->indentLength * (enc->level + 1) + 2);

if(res < 0)
{
Expand Down Expand Up @@ -789,7 +789,7 @@ static void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t c
enc->iterEnd(obj, &tc);

if (count > 0) {
Buffer_Reserve (enc, enc->indent * enc->level + 1);
Buffer_Reserve (enc, enc->indentLength * enc->level + 1);
Buffer_AppendIndentNewlineUnchecked (enc);
Buffer_AppendIndentUnchecked (enc, enc->level);
}
Expand Down Expand Up @@ -916,7 +916,7 @@ static void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t c
enc->level--;
}

char *JSON_EncodeObject(JSOBJ obj, JSONObjectEncoder *enc, char *_buffer, size_t _cbBuffer)
char *JSON_EncodeObject(JSOBJ obj, JSONObjectEncoder *enc, char *_buffer, size_t _cbBuffer, size_t *oRetLength)
{
enc->malloc = enc->malloc ? enc->malloc : malloc;
enc->free = enc->free ? enc->free : free;
Expand Down Expand Up @@ -959,5 +959,8 @@ char *JSON_EncodeObject(JSOBJ obj, JSONObjectEncoder *enc, char *_buffer, size_t
}
Buffer_AppendCharUnchecked(enc, '\0');

// Is this the right way to get the length of the string?
// Offset in char array positions?
*oRetLength = enc->offset - enc->start - 1;
return enc->start;
}
51 changes: 39 additions & 12 deletions python/objToJSON.c
Expand Up @@ -746,22 +746,24 @@ static char *Object_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen)
}


static const char *_PyUnicodeToChars(PyObject *obj, size_t *_outLen)
static const char *_PyUnicodeToChars(PyObject *obj, int *_outLen)
{
// helper for indent only
// an error occurs when the return is NULL and _outLen is 0
PyObject *newObj;
#ifndef Py_LIMITED_API
/*#ifndef Py_LIMITED_API*/
if (PyUnicode_IS_COMPACT_ASCII(obj))
{
Py_ssize_t len;
Py_ssize_t len = 0;
const char *data = PyUnicode_AsUTF8AndSize(obj, &len);
*_outLen = len;
return data;
}
#endif
/*#endif*/
newObj = PyUnicode_AsUTF8String(obj);
if(!newObj)
{
*_outLen = 0;
return NULL;
}

Expand Down Expand Up @@ -848,21 +850,32 @@ PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs)
// Handle multiple input types
if (oindent == Py_None)
{
encoder.indent = 0;
encoder.indentLength = -1;
/*sprintf(encoder.indentChars, ""); // how to do this right in C?*/
}
else if (PyLong_Check(oindent))
{
encoder.indent = PyLong_AsLong(oindent);
encoder.indentLength = PyLong_AsLong(oindent);
sprintf(encoder.indentChars, " "); // how to do this right in C?
}
else if (PyUnicode_Check(oindent))
{
// set a custom indent string
size_t olen = 0;
int olen = -1;

printf("\nIndent Print: '''\n");
PyObject_Print(oindent, stdout, 0);
printf("\n'''\n");
printf("before olen = %d\n", olen);
encoder.indentChars = _PyUnicodeToChars(oindent, &olen);
encoder.indent = (int) olen;
if(encoder.indentChars == NULL)
printf("after olen = %d\n", olen);
encoder.indentLength = (int) olen;
printf("encoder.indentChars = '%s'\n", encoder.indentChars);
printf("encoder.indentLength = %d\n", encoder.indentLength);

if(encoder.indentChars == NULL && encoder.indentLength == -1)
{
PyErr_SetString(PyExc_ValueError, "indent was malformed");
PyErr_SetString(PyExc_ValueError, "malformed indent");
return NULL;
}
}
Expand Down Expand Up @@ -900,16 +913,23 @@ PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs)
csInf, csNan, 'e', DCONV_DECIMAL_IN_SHORTEST_LOW, DCONV_DECIMAL_IN_SHORTEST_HIGH, 0, 0);

PRINTMARK();
ret = JSON_EncodeObject (oinput, &encoder, buffer, sizeof (buffer));
size_t RetLength;
printf("a RetLength = %d\n", RetLength);
ret = JSON_EncodeObject (oinput, &encoder, buffer, sizeof (buffer), &RetLength);
printf("b RetLength = %d\n", RetLength);

printf("a\n");
PRINTMARK();

dconv_d2s_free(&encoder.d2s);

printf("a\n");
if (PyErr_Occurred())
{
return NULL;
}

printf("a\n");
if (encoder.errorMsg)
{
if (ret != buffer)
Expand All @@ -921,7 +941,10 @@ PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs)
return NULL;
}

newobj = PyUnicode_FromString (ret);
printf("a\n");
/*Py_ssize_t size = retLength;*/
newobj = PyUnicode_FromStringAndSize(ret, (Py_ssize_t) RetLength);
// newobj = PyUnicode_FromString (ret); Cant use because we might have a null byte in the output

if (ret != buffer)
{
Expand All @@ -930,6 +953,10 @@ PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs)

PRINTMARK();

printf("\newobj : '''\n");
PyObject_Print(newobj, stdout, 0);
printf("\n'''\n");

return newobj;
}

Expand Down
162 changes: 162 additions & 0 deletions tests/test_json_compat.py
@@ -0,0 +1,162 @@
r"""
Compare cases
python -c "import json ; print(repr(json.dumps([1], indent='\x000')))"
python -c "import ujson as json; print(repr(json.dumps([1], indent='\x000')))"
python -c "import json ; print(repr(json.dumps([1, 2], indent='a \x000 b')))"
python -c "import ujson as json; print(repr(json.dumps([1, 2], indent='a \x000 b')))"
python -c "import json ; print(repr(json.dumps([1], indent='\udfff')))"
python -c "import ujson as json; print(repr(json.dumps([1, 2, 3], indent='\udfff')))"
"""

import ujson
import json as pjson
import itertools as it
from collections import defaultdict

JSON_IMPLS = {
'ujson': ujson,
'pjson': pjson,
}


def group_items(items, key):
"""
Groups a list of items by group id. (from ubelt)
"""
pair_list = ((key(item), item) for item in items)
# Initialize a dict of lists
id_to_items = defaultdict(list)
# Insert each item into the correct group
for groupid, item in pair_list:
id_to_items[groupid].append(item)
return id_to_items


def named_product(basis):
# Implementation from ubelt
keys = list(basis.keys())
for vals in it.product(*basis.values()):
kw = dict(zip(keys, vals))
yield kw


def test_dumps_compatability():
"""
Test the difference between Python's json module (pjson) and ultrajson
(ujson) under a grid of different parameters.
"""

# Define the data we will test
# data = {'a': [1, 2, 3, named_product]}
data = {'a': [1, 2, 3]}

# Define the parameters we will test
NULL_CHAR = '\x00'
UTF_SURROGATE0000 = '\udc80'
UTF_SURROGATE1024 = '\udfff'
param_basis = {
'indent': [
# -1,
# -2,
# ' ',
# ' ab ',
# 4, 0,
# None,
# '\t',
# NULL_CHAR,
UTF_SURROGATE0000,
# UTF_SURROGATE1024,
],
'ensure_ascii': [False],
# 'ensure_ascii': [True, False, None],
# 'sort_keys': [True, False, None],
# 'default': [None, str],
'module': list(JSON_IMPLS.keys()),
}
kwargs_keys = ['indent', 'default', 'ensure_ascii', 'sort_keys']
kwargs_keys = [k for k in kwargs_keys if k in param_basis]
param_grid = named_product(param_basis)
results = []
for params in param_grid:
params_key = pjson.dumps(params, default=str)
module = JSON_IMPLS[params['module']]
kwargs = {k: params[k] for k in kwargs_keys if k in params}
try:
result = module.dumps(data, **kwargs)
except Exception as ex:
error = ex
result = None
else:
error = 0
row = {
'params_key': params_key,
**params,
'data': data,
'result': result,
'error': error,
}
results.append(row)

print(pjson.dumps(results, indent=' ', default=repr))

def grouper(row):
return tuple([(k, row[k]) for k in kwargs_keys])

grouped_results = group_items(results, key=grouper)

agree_keys = []
diagree_keys = []

for group_key, group in grouped_results.items():
assert len(group) == 2
module_to_row = {r['module']: r for r in group}
assert len(module_to_row) == 2

ujson_row = module_to_row['ujson']
pjson_row = module_to_row['pjson']

if ujson_row['error'] and pjson_row['error']:
# Both implementations errored
agree_keys.append(group_key)
else:
# Check if the results from all implementations are the same
agree_keys.append(group_key)
u_result = ujson_row['result']
p_result = pjson_row['result']

try:
p_val = pjson.loads(p_result)
except Exception as ex:
p_val = repr(ex)

try:
u_val = pjson.loads(u_result)
except Exception as ex:
u_val = repr(ex)

if p_val != u_val:
import difflib
print(f'Disagree on {group_key}')
print(' * p_result = {!r}'.format(p_result))
print(' * u_result = {!r}'.format(u_result))
print(''.join(list(difflib.ndiff([str(p_val)], [str(u_val)]))))
diagree_keys.append(group_key)
else:
agree_keys.append(group_key)

print('Num Agree: {}'.format(len(agree_keys)))
print('Num Disagree: {}'.format(len(diagree_keys)))


if __name__ == '__main__':
"""
CommandLine:
python ~/code/ultrajson/tests/test_json_compat.py
"""
test_dumps_compatability()

0 comments on commit 029e694

Please sign in to comment.