From 60929c281b356fd71b8061921cb2cd1ac4f54b09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Br=C3=A9nainn=20Woodsend?= Date: Wed, 9 Feb 2022 20:44:01 +0000 Subject: [PATCH 01/11] Fix unchecked buffer overflows (CVE-2021-45958). Add a few extra memory reserve calls to account for the extra space that indentation needs. These kinds of memory issues are hard to spot because the buffer is resized in powers of 2 meaning that a miscalculation would only show any symptoms if the required buffer size is estimated to be just below a 2 power but is actually just above. Add a debug mode which replaces the 2 power scheme with reserving only the memory explicitly requested and adds some overflow checks. --- .github/workflows/test.yml | 8 + lib/ultrajsonenc.c | 64 ++- tests/334-reproducer.json | 857 +++++++++++++++++++++++++++++++++++++ tests/test_ujson.py | 24 ++ 4 files changed, 944 insertions(+), 9 deletions(-) create mode 100644 tests/334-reproducer.json diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index f185a450..5c3c3a1d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -31,9 +31,17 @@ jobs: python -m pip install -U pip python -m pip install -U pytest python -m pip install . + env: + CFLAGS: '-DDEBUG' - name: Tests run: | + pytest -s + + - name: Test without debug mode + run: | + git clean -Xfd + python -m pip install --force-reinstall . pytest - name: Test with coverage diff --git a/lib/ultrajsonenc.c b/lib/ultrajsonenc.c index 0c6c3ade..7c21b8e5 100644 --- a/lib/ultrajsonenc.c +++ b/lib/ultrajsonenc.c @@ -41,6 +41,7 @@ Numeric decoder derived from from TCL library #include #include #include +#include #include #include @@ -113,14 +114,25 @@ FIXME: Keep track of how big these get across several encoder calls and try to m That way we won't run our head into the wall each call */ static void Buffer_Realloc (JSONObjectEncoder *enc, size_t cbNeeded) { + size_t free_space = enc->end - enc->offset; + if (free_space >= cbNeeded) + { + return; + } size_t curSize = enc->end - enc->start; - size_t newSize = curSize * 2; + size_t newSize = curSize; size_t offset = enc->offset - enc->start; +#ifdef DEBUG + // In debug mode, allocate only what is requested so that any miscalculation + // shows up plainly as a crash. + newSize = (enc->offset - enc->start) + cbNeeded; +#else while (newSize < curSize + cbNeeded) { newSize *= 2; } +#endif if (enc->heap) { @@ -147,6 +159,12 @@ static void Buffer_Realloc (JSONObjectEncoder *enc, size_t cbNeeded) enc->end = enc->start + newSize; } +#define Buffer_Reserve(__enc, __len) \ + if ( (size_t) ((__enc)->end - (__enc)->offset) < (size_t) (__len)) \ + { \ + Buffer_Realloc((__enc), (__len));\ + } \ + static FASTCALL_ATTR INLINE_PREFIX void FASTCALL_MSVC Buffer_AppendShortHexUnchecked (char *outputOffset, unsigned short value) { *(outputOffset++) = g_hexChars[(value & 0xf000) >> 12]; @@ -261,11 +279,19 @@ static int Buffer_EscapeStringUnvalidated (JSONObjectEncoder *enc, const char *i static int Buffer_EscapeStringValidated (JSOBJ obj, JSONObjectEncoder *enc, const char *io, const char *end) { + Buffer_Reserve(enc, RESERVE_STRING(end - io)); + JSUTF32 ucs; char *of = (char *) enc->offset; for (;;) { +#ifdef DEBUG + if ((io < end) && (enc->end - of < RESERVE_STRING(1))) { + fprintf(stderr, "Ran out of buffer space during Buffer_EscapeStringValidated()\n"); + abort(); + } +#endif JSUINT8 utflen = g_asciiOutputTable[(unsigned char) *io]; switch (utflen) @@ -487,15 +513,28 @@ static int Buffer_EscapeStringValidated (JSOBJ obj, JSONObjectEncoder *enc, cons } } -#define Buffer_Reserve(__enc, __len) \ - if ( (size_t) ((__enc)->end - (__enc)->offset) < (size_t) (__len)) \ - { \ - Buffer_Realloc((__enc), (__len));\ - } \ - -#define Buffer_AppendCharUnchecked(__enc, __chr) \ - *((__enc)->offset++) = __chr; \ +static FASTCALL_ATTR INLINE_PREFIX void FASTCALL_MSVC Buffer_AppendCharUnchecked(JSONObjectEncoder *enc, char chr) +{ +#ifdef DEBUG + if (enc->end <= enc->offset) + { + fprintf(stderr, "Overflow writing byte %d '%c'. The last few characters were:\n'''", chr, chr); + char * recent = enc->offset - 1000; + if (enc->start > recent) + { + recent = enc->start; + } + for (; recent < enc->offset; recent++) + { + fprintf(stderr, "%c", *recent); + } + fprintf(stderr, "'''\n"); + abort(); + } +#endif + *(enc->offset++) = chr; +} static FASTCALL_ATTR INLINE_PREFIX void FASTCALL_MSVC strreverse(char* begin, char* end) { @@ -670,6 +709,7 @@ static void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t c iterObj = enc->iterGetValue(obj, &tc); enc->level ++; + Buffer_Reserve (enc, enc->indent * enc->level); Buffer_AppendIndentUnchecked (enc, enc->level); encode (iterObj, enc, NULL, 0); if (enc->errorMsg) @@ -685,6 +725,9 @@ static void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t c enc->iterEnd(obj, &tc); if (count > 0) { + // Reserve space for the indentation plus the newline and the closing + // bracket. + Buffer_Reserve (enc, enc->indent * enc->level + 2); Buffer_AppendIndentNewlineUnchecked (enc); Buffer_AppendIndentUnchecked (enc, enc->level); } @@ -700,6 +743,7 @@ static void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t c while ((res = enc->iterNext(obj, &tc))) { + Buffer_Reserve (enc, 3 + (enc->indent * (enc->level + 1))); if(res < 0) { enc->iterEnd(obj, &tc); @@ -721,6 +765,7 @@ static void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t c objName = enc->iterGetName(obj, &tc, &szlen); enc->level ++; + Buffer_Reserve (enc, enc->indent * enc->level); Buffer_AppendIndentUnchecked (enc, enc->level); encode (iterObj, enc, objName, szlen); if (enc->errorMsg) @@ -736,6 +781,7 @@ static void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t c enc->iterEnd(obj, &tc); if (count > 0) { + Buffer_Reserve (enc, enc->indent * enc->level + 4); Buffer_AppendIndentNewlineUnchecked (enc); Buffer_AppendIndentUnchecked (enc, enc->level); } diff --git a/tests/334-reproducer.json b/tests/334-reproducer.json new file mode 100644 index 00000000..b53fcd5c --- /dev/null +++ b/tests/334-reproducer.json @@ -0,0 +1,857 @@ +{ + "ak.somestring.internal.Shadow": { + "id": 33300002, + "init_state": "(bk.action.array.Make, (bk.action.i32.Const, 0))", + "child": { + "ak.somestring.Flexbox": { + "flex_direction": "column", + "align_items": "stretch", + "children": [ + { + "ak.somestring.Collection": { + "id": 33300001, + "snap": "center", + "direction": "row", + "children": [ + { + "ak.somestring.Flexbox": { + "decoration": { + "ak.somestring.BoxDecoration": { + "background": { + "ak.somestring.ColorDrawable": { + "color": "#2c8932" + } + } + } + }, + "children": [ + { + "ak.somestring.Flexbox": { + "flex_direction": "column", + "align_items": "stretch", + "children": [ + { + "ak.somestring.Flexbox": { + "flex_direction": "column", + "align_items": "stretch", + "children": [ + { + "ak.somestring.Flexbox": { + "children": [ + { + "ls.components.Image": { + "media_id": "10156403921218138", + "preview_url": "https://scontent.xx.whoaa.net/v/t1.0-9/51099660_10156403921233138_3677795704043995136_n.jpg?_nc_cat=102&_nc_log=1&_nc_oc=AQk3Td-w9KpopLL2N1jgZ4WDMuxUyuGY3ZvY4mDSCk8W9-GjsFPi2S4gVQk0Y3A5ZaaQf7ASvQ2s_eR85kTmFvr0&_nc_ad=z-m&_nc_cid=0&_nc_zor=9&_nc_ht=scontent.xx&oh=fb16b0d60b13817a505f583cc9dad1eb&oe=5CBCDB46", + "height": 278, + "width": 156 + } + } + ], + "_style": { + "flex": { + "grow": 1 + } + } + } + }, + { + "ak.somestring.Flexbox": { + "flex_direction": "column", + "align_items": "stretch", + "children": [ + { + "ak.somestring.Flexbox": { + "flex_direction": "row", + "align_items": "stretch", + "children": [ + { + "ak.somestring.Flexbox": { + "decoration": { + "ak.somestring.BoxDecoration": { + "background": { + "ak.somestring.ColorDrawable": { + "color": "#ffffff" + } + } + } + }, + "_style": { + "flex": { + "margin_right": "4dp", + "grow": 1 + } + } + } + }, + { + "ak.somestring.Flexbox": { + "decoration": { + "ak.somestring.BoxDecoration": { + "background": { + "ak.somestring.ColorDrawable": { + "color": "#ffffff" + } + } + } + }, + "_style": { + "flex": { + "margin_right": "4dp", + "grow": 1 + } + } + } + }, + { + "ak.somestring.Flexbox": { + "flex_direction": "row", + "align_items": "stretch", + "decoration": { + "ak.somestring.BoxDecoration": { + "background": { + "ak.somestring.ColorDrawable": { + "color": "#ffffff" + } + } + } + }, + "children": [ + { + "ak.somestring.Flexbox": { + "id": 33300004, + "_style": { + "flex": { + "grow": 1 + } + } + } + } + ], + "_style": { + "flex": { + "margin_right": "4dp", + "grow": 1 + } + } + } + } + ], + "_style": { + "flex": { + "height": "2dp", + "margin_left": "4dp" + } + } + } + } + ], + "_style": { + "flex": { + "position_type": "absolute", + "left": "0dp", + "top": "10dp", + "margin_top": "10dp", + "right": "0dp", + "height": "2dp", + "width": "100%" + } + } + } + } + ], + "_style": { + "flex": { + "grow": 1 + } + } + } + }, + { + "ak.somestring.Flexbox": { + "align_items": "flex_start", + "children": [ + { + "ak.somestring.Flexbox": { + "decoration": { + "ak.somestring.BoxDecoration": { + "corner_radius": "17dp" + } + }, + "children": [ + { + "ls.components.Image": { + "media_id": "10156403921218138", + "preview_url": "https://scontent.xx.whoaa.net/v/t1.0-9/51099660_10156403921233138_3677795704043995136_n.jpg?_nc_cat=102&_nc_log=1&_nc_oc=AQk3Td-w9KpopLL2N1jgZ4WDMuxUyuGY3ZvY4mDSCk8W9-GjsFPi2S4gVQk0Y3A5ZaaQf7ASvQ2s_eR85kTmFvr0&_nc_ad=z-m&_nc_cid=0&_nc_zor=9&_nc_ht=scontent.xx&oh=fb16b0d60b13817a505f583cc9dad1eb&oe=5CBCDB46", + "height": 34, + "width": 34, + "_style": { + "flex": { + "width": "34dp", + "height": "34dp" + } + } + } + } + ], + "_style": { + "flex": { + "margin_right": "12dp", + "width": "34dp", + "height": "34dp" + } + } + } + }, + { + "ak.somestring.Flexbox": { + "flex_direction": "column", + "align_items": "flex_start", + "children": [ + { + "ak.somestring.RichText": { + "children": [ + { + "ak.somestring.TextSpan": { + "text": "eric", + "text_size": "15sp", + "text_style": "bold", + "text_color": "#ffffff" + } + } + ], + "_style": { + "flex": { + "margin_bottom": "2dp", + "width": "100%" + } + } + } + }, + { + "ak.somestring.RichText": { + "children": [ + { + "ak.somestring.TextSpan": { + "text": "8h", + "text_size": "13sp", + "text_style": "normal", + "text_color": "#ffffff" + } + } + ], + "_style": { + "flex": { + "width": "100%" + } + } + } + } + ], + "_style": { + "flex": { + "width": "100%", + "height": "100%" + } + } + } + } + ], + "_style": { + "flex": { + "position_type": "absolute", + "top": "30dp", + "left": "10dp", + "height": "48dp" + } + } + } + }, + { + "ak.somestring.Flexbox": { + "children": [ + { + "ls.components.StoriesReplyBar": {} + } + ], + "_style": { + "flex": { + "width": "100%", + "height": "45dp", + "margin_top": "auto", + "margin_bottom": "auto" + } + } + } + } + ], + "_style": { + "flex": { + "position_type": "absolute", + "width": "100%", + "height": "100%", + "grow": 1 + } + } + } + }, + { + "ak.somestring.Flexbox": { + "flex_direction": "column", + "align_items": "stretch", + "children": [ + { + "ak.somestring.Flexbox": { + "flex_direction": "column", + "align_items": "stretch", + "children": [ + { + "ak.somestring.Flexbox": { + "children": [ + { + "ls.components.Image": { + "media_id": "10101230968216658", + "preview_url": "https://scontent.xx.whoaa.net/v/t1.0-9/50800535_10101230968226638_6755212111762161664_n.jpg?_nc_cat=101&_nc_log=1&_nc_oc=AQmKcqYvt6DI7aeGk3k_oF6RHSVZkUg7f9hnBCWilyaOGdCWO0-u9_zssC5qGvca6wqsrz3AP0y1RPLPiZj8ycCv&_nc_ad=z-m&_nc_cid=0&_nc_zor=9&_nc_ht=scontent.xx&oh=2fffbab8f0a102d196454ee0138c1850&oe=5CC15206", + "height": 278, + "width": 156 + } + } + ], + "_style": { + "flex": { + "grow": 1 + } + } + } + }, + { + "ak.somestring.Flexbox": { + "flex_direction": "column", + "align_items": "stretch", + "children": [ + { + "ak.somestring.Flexbox": { + "flex_direction": "row", + "align_items": "stretch", + "children": [ + { + "ak.somestring.Flexbox": { + "decoration": { + "ak.somestring.BoxDecoration": { + "background": { + "ak.somestring.ColorDrawable": { + "color": "#ffffff" + } + } + } + }, + "_style": { + "flex": { + "margin_right": "4dp", + "grow": 1 + } + } + } + }, + { + "ak.somestring.Flexbox": { + "flex_direction": "row", + "align_items": "stretch", + "decoration": { + "ak.somestring.BoxDecoration": { + "background": { + "ak.somestring.ColorDrawable": { + "color": "#ffffff" + } + } + } + }, + "children": [ + { + "ak.somestring.Flexbox": { + "id": 33300005, + "_style": { + "flex": { + "grow": 1 + } + } + } + } + ], + "_style": { + "flex": { + "margin_right": "4dp", + "grow": 1 + } + } + } + }, + { + "ak.somestring.Flexbox": { + "decoration": { + "ak.somestring.BoxDecoration": { + "background": { + "ak.somestring.ColorDrawable": { + "color": "#cccccc" + } + } + } + }, + "_style": { + "flex": { + "margin_right": "4dp", + "grow": 1 + } + } + } + } + ], + "_style": { + "flex": { + "height": "2dp", + "margin_left": "4dp" + } + } + } + } + ], + "_style": { + "flex": { + "position_type": "absolute", + "left": "0dp", + "top": "10dp", + "margin_top": "10dp", + "right": "0dp", + "height": "2dp", + "width": "100%" + } + } + } + } + ], + "_style": { + "flex": { + "grow": 1 + } + } + } + }, + { + "ak.somestring.Flexbox": { + "align_items": "flex_start", + "children": [ + { + "ak.somestring.Flexbox": { + "decoration": { + "ak.somestring.BoxDecoration": { + "corner_radius": "17dp" + } + }, + "children": [ + { + "ls.components.Image": { + "media_id": "10101230968216658", + "preview_url": "https://scontent.xx.whoaa.net/v/t1.0-9/50800535_10101230968226638_6755212111762161664_n.jpg?_nc_cat=101&_nc_log=1&_nc_oc=AQmKcqYvt6DI7aeGk3k_oF6RHSVZkUg7f9hnBCWilyaOGdCWO0-u9_zssC5qGvca6wqsrz3AP0y1RPLPiZj8ycCv&_nc_ad=z-m&_nc_cid=0&_nc_zor=9&_nc_ht=scontent.xx&oh=2fffbab8f0a102d196454ee0138c1850&oe=5CC15206", + "height": 34, + "width": 34, + "_style": { + "flex": { + "width": "34dp", + "height": "34dp" + } + } + } + } + ], + "_style": { + "flex": { + "margin_right": "12dp", + "width": "34dp", + "height": "34dp" + } + } + } + }, + { + "ak.somestring.Flexbox": { + "flex_direction": "column", + "align_items": "flex_start", + "children": [ + { + "ak.somestring.RichText": { + "children": [ + { + "ak.somestring.TextSpan": { + "text": "eric", + "text_size": "15sp", + "text_style": "bold", + "text_color": "#ffffff" + } + } + ], + "_style": { + "flex": { + "margin_bottom": "2dp", + "width": "100%" + } + } + } + }, + { + "ak.somestring.RichText": { + "children": [ + { + "ak.somestring.TextSpan": { + "text": "2h", + "text_size": "13sp", + "text_style": "normal", + "text_color": "#ffffff" + } + } + ], + "_style": { + "flex": { + "width": "100%" + } + } + } + } + ], + "_style": { + "flex": { + "width": "100%", + "height": "100%" + } + } + } + } + ], + "_style": { + "flex": { + "position_type": "absolute", + "top": "30dp", + "left": "10dp", + "height": "48dp" + } + } + } + }, + { + "ak.somestring.Flexbox": { + "children": [ + { + "ls.components.StoriesReplyBar": {} + } + ], + "_style": { + "flex": { + "width": "100%", + "height": "45dp", + "margin_top": "auto", + "margin_bottom": "auto" + } + } + } + } + ], + "_style": { + "flex": { + "position_type": "absolute", + "width": "100%", + "height": "100%", + "grow": 1 + } + } + } + }, + { + "ak.somestring.Flexbox": { + "flex_direction": "column", + "align_items": "stretch", + "children": [ + { + "ak.somestring.Flexbox": { + "flex_direction": "column", + "align_items": "stretch", + "children": [ + { + "ak.somestring.Flexbox": { + "children": [ + { + "ls.components.Video": { + "media_id": "10156395664922983", + "video_url": "https://video.xx.whoaa.net/v/t42.9040-2/51636103_316525608877874_407931582842667008_n.mp4?_nc_cat=109&efg=eyJ2ZW5jb2RlX3RhZyI6InN2ZV9oZCJ9&_nc_log=1&_nc_oc=AQm6aMctRAFdMe3C66upF2JulQP4mV3Hd4THkueZex952PR389F6Ay9XHm1S40dV1x7M1I-fAW5y3iH7JlQ3MgDM&_nc_ht=video.xx&oh=e17b1f7ec67619d57a5b1cda5e076fef&oe=5C587F7D", + "preview_url": "https://scontent.xx.whoaa.net/v/t15.5256-10/s960x960/51767715_10156395667952983_4168426706077483008_n.jpg?_nc_cat=104&_nc_log=1&_nc_oc=AQnVwEZk2vG8Q3TcoR0SxdXSi8rL_GaST2aH3i9auDcDnJNTRKvuYEFfd_qKGBhmD4-bo-f8BY5j9jHyit765O7P&_nc_ad=z-m&_nc_cid=0&_nc_zor=9&_nc_ht=scontent.xx&oh=9a17e4bcf8a2a9aabc21d2ecf9f8611b&oe=5CB3D14B", + "show_media_play_button": false, + "media_height": 960, + "media_width": 540 + } + } + ], + "_style": { + "flex": { + "grow": 1 + } + } + } + }, + { + "ak.somestring.Flexbox": { + "flex_direction": "column", + "align_items": "stretch", + "children": [ + { + "ak.somestring.Flexbox": { + "flex_direction": "row", + "align_items": "stretch", + "children": [ + { + "ak.somestring.Flexbox": { + "flex_direction": "row", + "align_items": "stretch", + "decoration": { + "ak.somestring.BoxDecoration": { + "background": { + "ak.somestring.ColorDrawable": { + "color": "#ffffff" + } + } + } + }, + "children": [ + { + "ak.somestring.Flexbox": { + "id": 33300006, + "_style": { + "flex": { + "grow": 1 + } + } + } + } + ], + "_style": { + "flex": { + "margin_right": "4dp", + "grow": 1 + } + } + } + }, + { + "ak.somestring.Flexbox": { + "decoration": { + "ak.somestring.BoxDecoration": { + "background": { + "ak.somestring.ColorDrawable": { + "color": "#cccccc" + } + } + } + }, + "_style": { + "flex": { + "margin_right": "4dp", + "grow": 1 + } + } + } + }, + { + "ak.somestring.Flexbox": { + "decoration": { + "ak.somestring.BoxDecoration": { + "background": { + "ak.somestring.ColorDrawable": { + "color": "#cccccc" + } + } + } + }, + "_style": { + "flex": { + "margin_right": "4dp", + "grow": 1 + } + } + } + } + ], + "_style": { + "flex": { + "height": "2dp", + "margin_left": "4dp" + } + } + } + } + ], + "_style": { + "flex": { + "position_type": "absolute", + "left": "0dp", + "top": "10dp", + "margin_top": "10dp", + "right": "0dp", + "height": "2dp", + "width": "100%" + } + } + } + } + ], + "_style": { + "flex": { + "grow": 1 + } + } + } + }, + { + "ak.somestring.Flexbox": { + "align_items": "flex_start", + "children": [ + { + "ak.somestring.Flexbox": { + "decoration": { + "ak.somestring.BoxDecoration": { + "corner_radius": "17dp" + } + }, + "children": [ + { + "ls.components.Image": { + "media_id": "10156395664922983", + "height": 34, + "width": 34, + "_style": { + "flex": { + "width": "34dp", + "height": "34dp" + } + } + } + } + ], + "_style": { + "flex": { + "margin_right": "12dp", + "width": "34dp", + "height": "34dp" + } + } + } + }, + { + "ak.somestring.Flexbox": { + "flex_direction": "column", + "align_items": "flex_start", + "children": [ + { + "ak.somestring.RichText": { + "children": [ + { + "ak.somestring.TextSpan": { + "text": "eric", + "text_size": "15sp", + "text_style": "bold", + "text_color": "#ffffff" + } + } + ], + "_style": { + "flex": { + "margin_bottom": "2dp", + "width": "100%" + } + } + } + }, + { + "ak.somestring.RichText": { + "children": [ + { + "ak.somestring.TextSpan": { + "text": "20h", + "text_size": "13sp", + "text_style": "normal", + "text_color": "#ffffff" + } + } + ], + "_style": { + "flex": { + "width": "100%" + } + } + } + } + ], + "_style": { + "flex": { + "width": "100%", + "height": "100%" + } + } + } + } + ], + "_style": { + "flex": { + "position_type": "absolute", + "top": "30dp", + "left": "10dp", + "height": "48dp" + } + } + } + }, + { + "ak.somestring.Flexbox": { + "children": [ + { + "ls.components.StoriesReplyBar": {} + } + ], + "_style": { + "flex": { + "width": "100%", + "height": "45dp", + "margin_top": "auto", + "margin_bottom": "auto" + } + } + } + } + ], + "_style": { + "flex": { + "position_type": "absolute", + "width": "100%", + "height": "100%", + "grow": 1 + } + } + } + } + ], + "_style": { + "flex": { + "width": "100%", + "height": "100%" + } + } + } + } + ], + "_style": { + "flex": { + "height": "100%" + } + } + } + } + ] + } + } + } +} diff --git a/tests/test_ujson.py b/tests/test_ujson.py index 56e7664b..b5ae6547 100644 --- a/tests/test_ujson.py +++ b/tests/test_ujson.py @@ -7,6 +7,7 @@ import sys import uuid from collections import OrderedDict +from pathlib import Path import pytest import ujson @@ -947,6 +948,29 @@ def default(value): ujson.dumps(unjsonable_obj, default=default) +def test_dump_huge_indent(): + ujson.encode({"a": True}, indent=65539) + + +def test_dump_long_string(): + ujson.dumps(["aaaa", "\x00" * 10921]) + + +def test_dump_indented_nested_list(): + a = _a = [] + for i in range(20): + _a.append(list(range(i))) + _a = _a[-1] + ujson.dumps(a, indent=i) + + +@pytest.mark.parametrize("indent", [0, 1, 2, 4, 5, 8, 49]) +def test_issue_334(indent): + path = Path(__file__).with_name("334-reproducer.json") + a = ujson.loads(path.read_bytes()) + ujson.dumps(a, indent=indent) + + """ def test_decode_numeric_int_frc_overflow(): input = "X.Y" From 62e48535e81ff7ddf55c4806b11f68b76efa8fae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Br=C3=A9nainn=20Woodsend?= Date: Fri, 11 Feb 2022 22:48:15 +0000 Subject: [PATCH 02/11] Add a fuzzing test to search for segfaults in encoding. --- tests/fuzz.py | 170 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 170 insertions(+) create mode 100644 tests/fuzz.py diff --git a/tests/fuzz.py b/tests/fuzz.py new file mode 100644 index 00000000..da7dcb1d --- /dev/null +++ b/tests/fuzz.py @@ -0,0 +1,170 @@ +#!/usr/bin/env python +""" +A brute force fuzzer for detecting memory issues in ujson.dumps(). To use, first +compile ujson in debug mode: + + CFLAGS='-DDEBUG' python setup.py -q build_ext --inplace -f + +Then run without arguments: + + python tests/fuzz.py + +If it crashes, the last line of output is the arguments to reproduce the +failure. + + python tests/fuzz.py {{ last line of output before crash }} + +Adding --dump-python or --dump-json will print the object it intends to +serialise as either a Python literal or in JSON. + +""" + +import argparse +import itertools +import json +import math +import random +import re +from pprint import pprint + +import ujson + + +class FuzzGenerator: + """A random JSON serialisable object generator.""" + + def __init__(self, seed=None): + self._randomizer = random.Random(seed) + self._shrink = 1 + + def key(self): + key_types = [self.int, self.float, self.string, self.null, self.bool] + return self._randomizer.choice(key_types)() + + def item(self): + if self._randomizer.random() > 0.8: + return self.key() + return self._randomizer.choice([self.list, self.dict])() + + def int(self): + return int(self.float()) + + def float(self): + sign = self._randomizer.choice([-1, 1, 0]) + return sign * math.exp(self._randomizer.uniform(-40, 40)) + + def string(self): + characters = ["\x00", "\t", "a", "\U0001f680", "<>", "\u1234"] + return self._randomizer.choice(characters) * self.length() + + def bool(self): + return self._randomizer.random() < 0.5 + + def null(self): + return None + + def list(self): + return [self.item() for i in range(self.length())] + + def dict(self): + return {self.key(): self.item() for i in range(self.length())} + + def length(self): + self._shrink *= 0.99 + return int(math.exp(self._randomizer.uniform(-0.5, 5)) * self._shrink) + + +def random_object(seed=None): + return FuzzGenerator(seed).item() + + +class RangeOption(argparse.Action): + def __call__(self, parser, namespace, values, option_string=None): + values = re.findall("[^: ]+", values) + if len(values) == 1: + values = (int(values[0]),) + else: + values = range(*map(int, values)) + setattr(namespace, self.dest, values) + + +class ListOption(argparse.Action): + def __call__(self, parser, namespace, values, option_string=None): + values = tuple(map(int, re.findall("[^, ]+", values))) + setattr(namespace, self.dest, values) + + +parser = argparse.ArgumentParser( + epilog=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter +) +parser.add_argument( + "--seed", + default=range(100), + action=RangeOption, + dest="seeds", + help="A seed or range of seeds (in the form start:end[:step]) " + "to initialise the randomizer.", +) +parser.add_argument( + "--indent", + default=(0, 1, 2, 3, 4, 5, 12, 100, 1000), + action=ListOption, + help="A comma separated sequence of indentation lengths to test.", +) +parser.add_argument( + "--ensure_ascii", + default=(0, 1), + action=ListOption, + help="Sets the ensure_ascii option to ujson.dumps(). " + "May be 0 or 1 or 0,1 to testboth.", +) +parser.add_argument( + "--encode_html_chars", + default=(0, 1), + action=ListOption, + help="Sets the encode_html_chars option to ujson.dumps(). " + "May be 0 or 1 or 0,1 to test both.", +) +parser.add_argument( + "--escape_forward_slashes", + default=(0, 1), + action=ListOption, + help="Sets the escape_forward_slashes option to ujson.dumps(). " + "May be 0 or 1 or 0,1 to test both.", +) +parser.add_argument( + "--dump-python", + action="store_true", + help="Print the randomly generated object as a Python literal and exit.", +) +parser.add_argument( + "--dump-json", + action="store_true", + help="Print the randomly generated object in JSON format and exit.", +) + + +def cli(args=None): + options = dict(parser.parse_args(args)._get_kwargs()) + if options.pop("dump_json"): + print(json.dumps(random_object(options["seeds"][0]), indent=2)) + elif options.pop("dump_python"): + pprint(random_object(options["seeds"][0])) + else: + fuzz(**options) + + +def fuzz(seeds, **options): + try: + for seed in seeds: + data = random_object(seed) + for permutation in itertools.product(*options.values()): + _options = dict(zip(options.keys(), permutation)) + print(f"--seed {seed}", *(f"--{k} {v}" for (k, v) in _options.items())) + ujson.dumps(data, **_options) + except KeyboardInterrupt: + pass + + +if __name__ == "__main__": + cli() From 22ca5dc0c9fb8bdceadb3d6f7d70a309f8a79be6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Br=C3=A9nainn=20Woodsend?= Date: Sat, 12 Feb 2022 10:23:27 +0000 Subject: [PATCH 03/11] Remove the hidden JSON_NO_EXTRA_WHITESPACE compile knob. Unsetting it can lead to seg-faults. I don't think it's worth having to fix and then test this undocumented permutation. --- lib/ultrajson.h | 3 --- lib/ultrajsonenc.c | 10 ---------- 2 files changed, 13 deletions(-) diff --git a/lib/ultrajson.h b/lib/ultrajson.h index 8334fcae..c5d75b17 100644 --- a/lib/ultrajson.h +++ b/lib/ultrajson.h @@ -56,9 +56,6 @@ tree doesn't have cyclic references. #include #include -// Don't output any extra whitespaces when encoding -#define JSON_NO_EXTRA_WHITESPACE - // Max decimals to encode double floating point numbers with #ifndef JSON_DOUBLE_MAX_DECIMALS #define JSON_DOUBLE_MAX_DECIMALS 15 diff --git a/lib/ultrajsonenc.c b/lib/ultrajsonenc.c index 7c21b8e5..f8827316 100644 --- a/lib/ultrajsonenc.c +++ b/lib/ultrajsonenc.c @@ -662,14 +662,10 @@ static void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t c Buffer_AppendCharUnchecked(enc, '\"'); Buffer_AppendCharUnchecked (enc, ':'); -#ifdef JSON_NO_EXTRA_WHITESPACE if (enc->indent) { Buffer_AppendCharUnchecked (enc, ' '); } -#else - Buffer_AppendCharUnchecked (enc, ' '); -#endif } tc.encoder_prv = enc->prv; @@ -700,9 +696,6 @@ static void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t c if (count > 0) { Buffer_AppendCharUnchecked (enc, ','); -#ifndef JSON_NO_EXTRA_WHITESPACE - Buffer_AppendCharUnchecked (enc, ' '); -#endif } Buffer_AppendIndentNewlineUnchecked (enc); @@ -755,9 +748,6 @@ static void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t c if (count > 0) { Buffer_AppendCharUnchecked (enc, ','); -#ifndef JSON_NO_EXTRA_WHITESPACE - Buffer_AppendCharUnchecked (enc, ' '); -#endif } Buffer_AppendIndentNewlineUnchecked (enc); From 3151be24c413ef06605cfa71d48e09de1f96cd0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Br=C3=A9nainn=20Woodsend?= Date: Fri, 11 Feb 2022 23:05:22 +0000 Subject: [PATCH 04/11] Fix some more seg-faults on encoding. --- lib/ultrajsonenc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/ultrajsonenc.c b/lib/ultrajsonenc.c index f8827316..cc65b0a0 100644 --- a/lib/ultrajsonenc.c +++ b/lib/ultrajsonenc.c @@ -175,6 +175,7 @@ static FASTCALL_ATTR INLINE_PREFIX void FASTCALL_MSVC Buffer_AppendShortHexUnche static int Buffer_EscapeStringUnvalidated (JSONObjectEncoder *enc, const char *io, const char *end) { + Buffer_Reserve(enc, RESERVE_STRING(end - io)); char *of = (char *) enc->offset; for (;;) @@ -720,7 +721,7 @@ static void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t c if (count > 0) { // Reserve space for the indentation plus the newline and the closing // bracket. - Buffer_Reserve (enc, enc->indent * enc->level + 2); + Buffer_Reserve (enc, enc->indent * enc->level + 4); Buffer_AppendIndentNewlineUnchecked (enc); Buffer_AppendIndentUnchecked (enc, enc->level); } From fdfb9f424f25c98b78f11bbe3bb416cf3ef3035c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Br=C3=A9nainn=20Woodsend?= Date: Sun, 13 Feb 2022 14:06:25 +0000 Subject: [PATCH 05/11] Add fuzz test to CI/CD. --- .github/workflows/fuzz.yml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 .github/workflows/fuzz.yml diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml new file mode 100644 index 00000000..4d8b3781 --- /dev/null +++ b/.github/workflows/fuzz.yml @@ -0,0 +1,19 @@ +name: Fuzz + +on: [push, pull_request, workflow_dispatch] + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Install + run: | + python -m pip install -U pip + python -m pip install . + env: + CFLAGS: '-DDEBUG' + + - name: Fuzz + run: python tests/fuzz.py --seed=0:1000 From 7039d60bebcac501d9f369cdd7cb6af0485513de Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist Date: Tue, 5 Apr 2022 02:27:42 +0000 Subject: [PATCH 06/11] Refactor buffer reservations to ensure sufficient space on all additions * Removed the reservations in Buffer_EscapeStringUnvalidated and Buffer_EscapeStringValidated as those are not needed and may hide other bugs. * Debug check in Buffer_EscapeStringValidated was triggering incorrectly. * The reservation on JT_RAW was much larger than necessary; the value is copied directly, so the factor six is not needed, and this may hide other bugs. * Explicit accurate reservations everywhere else. --- lib/ultrajsonenc.c | 47 ++++++++++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/lib/ultrajsonenc.c b/lib/ultrajsonenc.c index cc65b0a0..c8756136 100644 --- a/lib/ultrajsonenc.c +++ b/lib/ultrajsonenc.c @@ -175,7 +175,6 @@ static FASTCALL_ATTR INLINE_PREFIX void FASTCALL_MSVC Buffer_AppendShortHexUnche static int Buffer_EscapeStringUnvalidated (JSONObjectEncoder *enc, const char *io, const char *end) { - Buffer_Reserve(enc, RESERVE_STRING(end - io)); char *of = (char *) enc->offset; for (;;) @@ -280,15 +279,14 @@ static int Buffer_EscapeStringUnvalidated (JSONObjectEncoder *enc, const char *i static int Buffer_EscapeStringValidated (JSOBJ obj, JSONObjectEncoder *enc, const char *io, const char *end) { - Buffer_Reserve(enc, RESERVE_STRING(end - io)); - JSUTF32 ucs; char *of = (char *) enc->offset; for (;;) { #ifdef DEBUG - if ((io < end) && (enc->end - of < RESERVE_STRING(1))) { + // 6 is the maximum length of a single character (cf. RESERVE_STRING). + if ((io < end) && (enc->end - of < 6)) { fprintf(stderr, "Ran out of buffer space during Buffer_EscapeStringValidated()\n"); abort(); } @@ -628,14 +626,6 @@ static void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t c return; } - /* - This reservation must hold - - length of _name as encoded worst case + - maxLength of double to string OR maxLength of JSLONG to string - */ - - Buffer_Reserve(enc, 256 + RESERVE_STRING(cbName)); if (enc->errorMsg) { return; @@ -643,6 +633,8 @@ static void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t c if (name) { + // 2 extra for the colon and optional space after it + Buffer_Reserve(enc, RESERVE_STRING(cbName) + 2); Buffer_AppendCharUnchecked(enc, '\"'); if (enc->forceASCII) @@ -672,6 +664,17 @@ static void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t c tc.encoder_prv = enc->prv; enc->beginTypeContext(obj, &tc, enc); + /* + This reservation covers any additions on non-variable parts below, specifically: + - Opening brackets for JT_ARRAY and JT_OBJECT + - Number representation for JT_LONG, JT_ULONG, JT_INT, and JT_DOUBLE + - Constant value for JT_TRUE, JT_FALSE, JT_NULL + + The length of 128 is the worst case length of the Buffer_AppendDoubleDconv addition. + The other types above all have smaller representations. + */ + Buffer_Reserve (enc, 128); + switch (tc.type) { case JT_INVALID: @@ -694,6 +697,9 @@ static void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t c while (enc->iterNext(obj, &tc)) { + // The extra 2 bytes cover the comma and (optional) newline. + Buffer_Reserve (enc, enc->indent * (enc->level + 1) + 2); + if (count > 0) { Buffer_AppendCharUnchecked (enc, ','); @@ -703,7 +709,6 @@ static void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t c iterObj = enc->iterGetValue(obj, &tc); enc->level ++; - Buffer_Reserve (enc, enc->indent * enc->level); Buffer_AppendIndentUnchecked (enc, enc->level); encode (iterObj, enc, NULL, 0); if (enc->errorMsg) @@ -719,12 +724,12 @@ static void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t c enc->iterEnd(obj, &tc); if (count > 0) { - // Reserve space for the indentation plus the newline and the closing - // bracket. - Buffer_Reserve (enc, enc->indent * enc->level + 4); + // Reserve space for the indentation plus the newline. + Buffer_Reserve (enc, enc->indent * enc->level + 1); Buffer_AppendIndentNewlineUnchecked (enc); Buffer_AppendIndentUnchecked (enc, enc->level); } + Buffer_Reserve (enc, 1); Buffer_AppendCharUnchecked (enc, ']'); break; } @@ -737,7 +742,9 @@ static void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t c while ((res = enc->iterNext(obj, &tc))) { - Buffer_Reserve (enc, 3 + (enc->indent * (enc->level + 1))); + // The extra 2 bytes cover the comma and optional newline. + Buffer_Reserve (enc, enc->indent * (enc->level + 1) + 2); + if(res < 0) { enc->iterEnd(obj, &tc); @@ -756,7 +763,6 @@ static void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t c objName = enc->iterGetName(obj, &tc, &szlen); enc->level ++; - Buffer_Reserve (enc, enc->indent * enc->level); Buffer_AppendIndentUnchecked (enc, enc->level); encode (iterObj, enc, objName, szlen); if (enc->errorMsg) @@ -772,10 +778,11 @@ static void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t c enc->iterEnd(obj, &tc); if (count > 0) { - Buffer_Reserve (enc, enc->indent * enc->level + 4); + Buffer_Reserve (enc, enc->indent * enc->level + 1); Buffer_AppendIndentNewlineUnchecked (enc); Buffer_AppendIndentUnchecked (enc, enc->level); } + Buffer_Reserve (enc, 1); Buffer_AppendCharUnchecked (enc, '}'); break; } @@ -880,7 +887,7 @@ static void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t c return; } - Buffer_Reserve(enc, RESERVE_STRING(szlen)); + Buffer_Reserve(enc, szlen); if (enc->errorMsg) { enc->endTypeContext(obj, &tc); From 36ffcc8c40241bc8bd1f1d81893b03e175c5854c Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist Date: Tue, 5 Apr 2022 02:35:57 +0000 Subject: [PATCH 07/11] Widen tests to cover more possible buffer overflows If the default output format changes in the future (e.g. `separators` as in the standard library), these tests would otherwise become irrelevant. --- tests/test_ujson.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tests/test_ujson.py b/tests/test_ujson.py index b5ae6547..b0b39479 100644 --- a/tests/test_ujson.py +++ b/tests/test_ujson.py @@ -948,12 +948,15 @@ def default(value): ujson.dumps(unjsonable_obj, default=default) -def test_dump_huge_indent(): - ujson.encode({"a": True}, indent=65539) +@pytest.mark.parametrize("indent", list(range(65537, 65542))) +def test_dump_huge_indent(indent): + ujson.encode({"a": True}, indent=indent) -def test_dump_long_string(): - ujson.dumps(["aaaa", "\x00" * 10921]) +@pytest.mark.parametrize("first_length", list(range(2, 7))) +@pytest.mark.parametrize("second_length", list(range(10919, 10924))) +def test_dump_long_string(first_length, second_length): + ujson.dumps(["a" * first_length, "\x00" * second_length]) def test_dump_indented_nested_list(): From 968fb162ab3e93a38040cc956b1562fbcdc86963 Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist Date: Tue, 5 Apr 2022 17:41:10 +0000 Subject: [PATCH 08/11] actions/checkout@v3 Co-authored-by: Hugo van Kemenade --- .github/workflows/fuzz.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml index 4d8b3781..23bcad52 100644 --- a/.github/workflows/fuzz.yml +++ b/.github/workflows/fuzz.yml @@ -6,7 +6,7 @@ jobs: test: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Install run: | From 3944dae13e245e098dfacf39bd9582de82cc756c Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist Date: Tue, 5 Apr 2022 17:41:43 +0000 Subject: [PATCH 09/11] Clearer pytest command Co-authored-by: Hugo van Kemenade --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 5c3c3a1d..0e36cee6 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -36,7 +36,7 @@ jobs: - name: Tests run: | - pytest -s + python -m pytest --capture=no - name: Test without debug mode run: | From 9b5012184f03a1f186b2470c3dda592133cd709e Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist Date: Tue, 5 Apr 2022 17:41:54 +0000 Subject: [PATCH 10/11] python -m pytest Co-authored-by: Hugo van Kemenade --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 0e36cee6..11919ec7 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -42,7 +42,7 @@ jobs: run: | git clean -Xfd python -m pip install --force-reinstall . - pytest + python -m pytest - name: Test with coverage if: ${{ startsWith(matrix.os, 'ubuntu') && matrix.python-version == '3.9' }} From 7dd742998b6ef7088e11869db7d03c371bab3b11 Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist Date: Tue, 5 Apr 2022 17:42:02 +0000 Subject: [PATCH 11/11] Remove shebang Co-authored-by: Hugo van Kemenade --- tests/fuzz.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/fuzz.py b/tests/fuzz.py index da7dcb1d..043ce0f1 100644 --- a/tests/fuzz.py +++ b/tests/fuzz.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """ A brute force fuzzer for detecting memory issues in ujson.dumps(). To use, first compile ujson in debug mode: