Debug code

ultrajson · Apr 11, 2022 · 029e694 · 029e694
1 parent e2e1b64
commit 029e694
Show file tree

Hide file tree

Showing 4 changed files with 224 additions and 32 deletions.
diff --git a/lib/ultrajson.h b/lib/ultrajson.h
@@ -258,7 +258,7 @@ typedef struct __JSONObjectEncoder
 
   /*
   Configuration for spaces of indent */
-  int indent;
+  int indentLength;
   const char* indentChars;
 
   /*
@@ -312,7 +312,7 @@ Life cycle of the provided buffer must still be handled by caller.
 If the return value doesn't equal the specified buffer caller must release the memory using
 JSONObjectEncoder.free or free() as specified when calling this function.
 */
-EXPORTFUNCTION char *JSON_EncodeObject(JSOBJ obj, JSONObjectEncoder *enc, char *buffer, size_t cbBuffer);
+EXPORTFUNCTION char *JSON_EncodeObject(JSOBJ obj, JSONObjectEncoder *enc, char *buffer, size_t cbBuffer, size_t* oRetLength);
 
 typedef struct __JSONObjectDecoder
 {

diff --git a/lib/ultrajsonenc.c b/lib/ultrajsonenc.c
@@ -544,26 +544,26 @@ static FASTCALL_ATTR INLINE_PREFIX void FASTCALL_MSVC strreverse(char* begin, ch
 
 static void Buffer_AppendIndentNewlineUnchecked(JSONObjectEncoder *enc)
 {
-  if (enc->indent > 0) Buffer_AppendCharUnchecked(enc, '\n');
+  if (enc->indentLength > -1) Buffer_AppendCharUnchecked(enc, '\n');
 }
 
 static void Buffer_AppendIndentUnchecked(JSONObjectEncoder *enc, JSINT32 value)
 {
   int i;
-  if (enc->indent > 0)
+  if (enc->indentLength > -1)
   {
-    if (enc->indentChars == NULL)
-    {
-      while (value-- > 0)
-        for (i = 0; i < enc->indent; i++)
-          Buffer_AppendCharUnchecked(enc, ' ');
-    }
-    else
-    {
+    /*if (enc->indentChars == NULL)              */
+    /*{                                          */
+    /*  while (value-- > 0)                      */
+    /*    for (i = 0; i < enc->indentLength; i++)*/
+    /*      Buffer_AppendCharUnchecked(enc, ' ');*/
+    /*}                                          */
+    /*else                                       */
+    /*{                                          */
       while (value-- > 0)
-        for (i = 0; i < enc->indent; i++)
+        for (i = 0; i < enc->indentLength; i++)
           Buffer_AppendCharUnchecked(enc, enc->indentChars[i]);
-    }
+    /*}*/
   }
 }
 
@@ -666,7 +666,7 @@ static void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t c
     Buffer_AppendCharUnchecked(enc, '\"');
 
     Buffer_AppendCharUnchecked (enc, ':');
-    if (enc->indent)
+    if (enc->indentLength)
     {
       Buffer_AppendCharUnchecked (enc, ' ');
     }
@@ -709,7 +709,7 @@ static void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t c
       while (enc->iterNext(obj, &tc))
       {
         // The extra 2 bytes cover the comma and (optional) newline.
-        Buffer_Reserve (enc, enc->indent * (enc->level + 1) + 2);
+        Buffer_Reserve (enc, enc->indentLength * (enc->level + 1) + 2);
 
         if (count > 0)
         {
@@ -736,7 +736,7 @@ static void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t c
 
       if (count > 0) {
         // Reserve space for the indentation plus the newline.
-        Buffer_Reserve (enc, enc->indent * enc->level + 1);
+        Buffer_Reserve (enc, enc->indentLength * enc->level + 1);
         Buffer_AppendIndentNewlineUnchecked (enc);
         Buffer_AppendIndentUnchecked (enc, enc->level);
       }
@@ -754,7 +754,7 @@ static void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t c
       while ((res = enc->iterNext(obj, &tc)))
       {
         // The extra 2 bytes cover the comma and optional newline.
-        Buffer_Reserve (enc, enc->indent * (enc->level + 1) + 2);
+        Buffer_Reserve (enc, enc->indentLength * (enc->level + 1) + 2);
 
         if(res < 0)
         {
@@ -789,7 +789,7 @@ static void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t c
       enc->iterEnd(obj, &tc);
 
       if (count > 0) {
-        Buffer_Reserve (enc, enc->indent * enc->level + 1);
+        Buffer_Reserve (enc, enc->indentLength * enc->level + 1);
         Buffer_AppendIndentNewlineUnchecked (enc);
         Buffer_AppendIndentUnchecked (enc, enc->level);
       }
@@ -916,7 +916,7 @@ static void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t c
   enc->level--;
 }
 
-char *JSON_EncodeObject(JSOBJ obj, JSONObjectEncoder *enc, char *_buffer, size_t _cbBuffer)
+char *JSON_EncodeObject(JSOBJ obj, JSONObjectEncoder *enc, char *_buffer, size_t _cbBuffer, size_t *oRetLength)
 {
   enc->malloc = enc->malloc ? enc->malloc : malloc;
   enc->free =  enc->free ? enc->free : free;
@@ -959,5 +959,8 @@ char *JSON_EncodeObject(JSOBJ obj, JSONObjectEncoder *enc, char *_buffer, size_t
   }
   Buffer_AppendCharUnchecked(enc, '\0');
 
+  // Is this the right way to get the length of the string?
+  // Offset in char array positions?
+  *oRetLength = enc->offset - enc->start - 1;
   return enc->start;
 }
diff --git a/python/objToJSON.c b/python/objToJSON.c
@@ -746,22 +746,24 @@ static char *Object_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen)
 }
 
 
-static const char *_PyUnicodeToChars(PyObject *obj, size_t *_outLen)
+static const char *_PyUnicodeToChars(PyObject *obj, int *_outLen)
 {
   // helper for indent only
+  // an error occurs when the return is NULL and _outLen is 0
   PyObject *newObj;
-#ifndef Py_LIMITED_API
+/*#ifndef Py_LIMITED_API*/
   if (PyUnicode_IS_COMPACT_ASCII(obj))
   {
-    Py_ssize_t len;
+    Py_ssize_t len = 0;
     const char *data = PyUnicode_AsUTF8AndSize(obj, &len);
     *_outLen = len;
     return data;
   }
-#endif
+/*#endif*/
   newObj = PyUnicode_AsUTF8String(obj);
   if(!newObj)
   {
+    *_outLen = 0;
     return NULL;
   }
 
@@ -848,21 +850,32 @@ PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs)
     // Handle multiple input types
     if (oindent == Py_None)
     {
-        encoder.indent = 0;
+        encoder.indentLength = -1;
+        /*sprintf(encoder.indentChars, "");  // how to do this right in C?*/
     }
     else if (PyLong_Check(oindent))
     {
-        encoder.indent = PyLong_AsLong(oindent);
+        encoder.indentLength = PyLong_AsLong(oindent);
+        sprintf(encoder.indentChars, " ");  // how to do this right in C?
     }
     else if (PyUnicode_Check(oindent))
     {
         // set a custom indent string
-        size_t olen = 0;
+        int olen = -1;
+
+        printf("\nIndent Print: '''\n");
+        PyObject_Print(oindent, stdout, 0);
+        printf("\n'''\n");
+        printf("before olen = %d\n", olen);
         encoder.indentChars = _PyUnicodeToChars(oindent, &olen);
-        encoder.indent = (int) olen;
-        if(encoder.indentChars == NULL)
+        printf("after olen = %d\n", olen);
+        encoder.indentLength = (int) olen;
+        printf("encoder.indentChars = '%s'\n", encoder.indentChars);
+        printf("encoder.indentLength = %d\n", encoder.indentLength);
+
+        if(encoder.indentChars == NULL && encoder.indentLength == -1)
         {
-            PyErr_SetString(PyExc_ValueError, "indent was malformed");
+            PyErr_SetString(PyExc_ValueError, "malformed indent");
             return NULL;
         }
     }
@@ -900,16 +913,23 @@ PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs)
                  csInf, csNan, 'e', DCONV_DECIMAL_IN_SHORTEST_LOW, DCONV_DECIMAL_IN_SHORTEST_HIGH, 0, 0);
 
   PRINTMARK();
-  ret = JSON_EncodeObject (oinput, &encoder, buffer, sizeof (buffer));
+  size_t RetLength;
+  printf("a RetLength = %d\n", RetLength);
+  ret = JSON_EncodeObject (oinput, &encoder, buffer, sizeof (buffer), &RetLength);
+  printf("b RetLength = %d\n", RetLength);
+
+  printf("a\n");
   PRINTMARK();
 
   dconv_d2s_free(&encoder.d2s);
 
+  printf("a\n");
   if (PyErr_Occurred())
   {
     return NULL;
   }
 
+  printf("a\n");
   if (encoder.errorMsg)
   {
     if (ret != buffer)
@@ -921,7 +941,10 @@ PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs)
     return NULL;
   }
 
-  newobj = PyUnicode_FromString (ret);
+  printf("a\n");
+  /*Py_ssize_t size = retLength;*/
+  newobj = PyUnicode_FromStringAndSize(ret, (Py_ssize_t) RetLength);
+  // newobj = PyUnicode_FromString (ret); Cant use because we might have a null byte in the output
 
   if (ret != buffer)
   {
@@ -930,6 +953,10 @@ PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs)
 
   PRINTMARK();
 
+  printf("\newobj : '''\n");
+  PyObject_Print(newobj, stdout, 0);
+  printf("\n'''\n");
+
   return newobj;
 }
 

diff --git a/tests/test_json_compat.py b/tests/test_json_compat.py
@@ -0,0 +1,162 @@
+r"""
+
+Compare cases
+
+python -c "import json         ; print(repr(json.dumps([1], indent='\x000')))"
+python -c "import ujson as json; print(repr(json.dumps([1], indent='\x000')))"
+
+python -c "import json         ; print(repr(json.dumps([1, 2], indent='a \x000 b')))"
+python -c "import ujson as json; print(repr(json.dumps([1, 2], indent='a \x000 b')))"
+
+python -c "import json         ; print(repr(json.dumps([1], indent='\udfff')))"
+python -c "import ujson as json; print(repr(json.dumps([1, 2, 3], indent='\udfff')))"
+
+
+"""
+
+import ujson
+import json as pjson
+import itertools as it
+from collections import defaultdict
+
+JSON_IMPLS = {
+    'ujson': ujson,
+    'pjson': pjson,
+}
+
+
+def group_items(items, key):
+    """
+    Groups a list of items by group id. (from ubelt)
+    """
+    pair_list = ((key(item), item) for item in items)
+    # Initialize a dict of lists
+    id_to_items = defaultdict(list)
+    # Insert each item into the correct group
+    for groupid, item in pair_list:
+        id_to_items[groupid].append(item)
+    return id_to_items
+
+
+def named_product(basis):
+    # Implementation from ubelt
+    keys = list(basis.keys())
+    for vals in it.product(*basis.values()):
+        kw = dict(zip(keys, vals))
+        yield kw
+
+
+def test_dumps_compatability():
+    """
+    Test the difference between Python's json module (pjson) and ultrajson
+    (ujson) under a grid of different parameters.
+    """
+
+    # Define the data we will test
+    # data = {'a': [1, 2, 3, named_product]}
+    data = {'a': [1, 2, 3]}
+
+    # Define the parameters we will test
+    NULL_CHAR = '\x00'
+    UTF_SURROGATE0000 = '\udc80'
+    UTF_SURROGATE1024 = '\udfff'
+    param_basis = {
+        'indent': [
+            # -1,
+            # -2,
+            # '    ',
+            # ' ab ',
+            # 4, 0,
+            # None,
+            # '\t',
+            # NULL_CHAR,
+            UTF_SURROGATE0000,
+            # UTF_SURROGATE1024,
+        ],
+        'ensure_ascii': [False],
+        # 'ensure_ascii': [True, False, None],
+        # 'sort_keys': [True, False, None],
+        # 'default': [None, str],
+        'module': list(JSON_IMPLS.keys()),
+    }
+    kwargs_keys = ['indent', 'default', 'ensure_ascii', 'sort_keys']
+    kwargs_keys = [k for k in kwargs_keys if k in param_basis]
+    param_grid = named_product(param_basis)
+    results = []
+    for params in param_grid:
+        params_key = pjson.dumps(params, default=str)
+        module = JSON_IMPLS[params['module']]
+        kwargs = {k: params[k] for k in kwargs_keys if k in params}
+        try:
+            result = module.dumps(data, **kwargs)
+        except Exception as ex:
+            error = ex
+            result = None
+        else:
+            error = 0
+        row = {
+            'params_key': params_key,
+            **params,
+            'data': data,
+            'result': result,
+            'error': error,
+        }
+        results.append(row)
+
+    print(pjson.dumps(results, indent='    ', default=repr))
+
+    def grouper(row):
+        return tuple([(k, row[k]) for k in kwargs_keys])
+
+    grouped_results = group_items(results, key=grouper)
+
+    agree_keys = []
+    diagree_keys = []
+
+    for group_key, group in grouped_results.items():
+        assert len(group) == 2
+        module_to_row = {r['module']: r for r in group}
+        assert len(module_to_row) == 2
+
+        ujson_row = module_to_row['ujson']
+        pjson_row = module_to_row['pjson']
+
+        if ujson_row['error'] and pjson_row['error']:
+            # Both implementations errored
+            agree_keys.append(group_key)
+        else:
+            # Check if the results from all implementations are the same
+            agree_keys.append(group_key)
+            u_result = ujson_row['result']
+            p_result = pjson_row['result']
+
+            try:
+                p_val = pjson.loads(p_result)
+            except Exception as ex:
+                p_val = repr(ex)
+
+            try:
+                u_val = pjson.loads(u_result)
+            except Exception as ex:
+                u_val = repr(ex)
+
+            if p_val != u_val:
+                import difflib
+                print(f'Disagree on {group_key}')
+                print(' * p_result = {!r}'.format(p_result))
+                print(' * u_result = {!r}'.format(u_result))
+                print(''.join(list(difflib.ndiff([str(p_val)], [str(u_val)]))))
+                diagree_keys.append(group_key)
+            else:
+                agree_keys.append(group_key)
+
+    print('Num Agree: {}'.format(len(agree_keys)))
+    print('Num Disagree: {}'.format(len(diagree_keys)))
+
+
+if __name__ == '__main__':
+    """
+    CommandLine:
+        python ~/code/ultrajson/tests/test_json_compat.py
+    """
+    test_dumps_compatability()