From 6deed01a90b1d5da335dc3a8ac72419b3c908b95 Mon Sep 17 00:00:00 2001 From: liu Date: Fri, 28 Oct 2022 16:40:14 +0800 Subject: [PATCH] opt: reduce memory pool size (#302) * fix: reduce state stack size * fix types * opt: shrink encoder stack size * opt: reduce decoder stack size * add bench.py loops Co-authored-by: liuqiang Co-authored-by: duanyi.aster --- bench.py | 2 +- decode_test.go | 14 +++++++--- decoder/pools.go | 2 +- encoder/encoder_test.go | 4 +-- encoder/pools.go | 2 +- internal/native/avx/native_amd64.s | 14 +++++----- internal/native/avx2/native_amd64.s | 14 +++++----- internal/native/sse/native_amd64.s | 14 +++++----- internal/native/types/types.go | 5 +++- issue_test/pretouch_test.go | 2 +- native/native.h | 33 +----------------------- native/types.h | 40 +++++++++++++++++++++++++++++ 12 files changed, 82 insertions(+), 64 deletions(-) create mode 100644 native/types.h diff --git a/bench.py b/bench.py index 86bf2899c..fc95d32d9 100755 --- a/bench.py +++ b/bench.py @@ -19,7 +19,7 @@ import subprocess import argparse -repeat_time = 10 +repeat_time = 100 gbench_prefix = "SONIC_NO_ASYNC_GC=1 go test -benchmem -run=none -count=%d "%(repeat_time) def run(cmd): diff --git a/decode_test.go b/decode_test.go index 635696568..58c3fc316 100644 --- a/decode_test.go +++ b/decode_test.go @@ -38,6 +38,7 @@ import ( `unicode/utf8` `github.com/bytedance/sonic/decoder` + `github.com/bytedance/sonic/internal/native/types` `github.com/davecgh/go-spew/spew` ) @@ -2361,6 +2362,11 @@ func TestUnmarshalRescanLiteralMangledUnquote(t *testing.T) { } func TestUnmarshalMaxDepth(t *testing.T) { + const ( + _MaxDepth = types.MAX_RECURSE + _OverMaxDepth = types.MAX_RECURSE + 1 + _UnderMaxDepth = types.MAX_RECURSE - 2 + ) testcases := []struct { name string data string @@ -2368,12 +2374,12 @@ func TestUnmarshalMaxDepth(t *testing.T) { }{ { name: "ArrayUnderMaxNestingDepth", - data: `{"a":` + strings.Repeat(`[`, 65534) + `0` + strings.Repeat(`]`, 65534) + `}`, + data: `{"a":` + strings.Repeat(`[`, _UnderMaxDepth) + `0` + strings.Repeat(`]`, _UnderMaxDepth) + `}`, errMaxDepth: false, }, { name: "ArrayOverMaxNestingDepth", - data: `{"a":` + strings.Repeat(`[`, 65537) + `0` + strings.Repeat(`]`, 65537) + `}`, + data: `{"a":` + strings.Repeat(`[`, _OverMaxDepth) + `0` + strings.Repeat(`]`, _OverMaxDepth) + `}`, errMaxDepth: true, }, { @@ -2383,12 +2389,12 @@ func TestUnmarshalMaxDepth(t *testing.T) { }, { name: "ObjectUnderMaxNestingDepth", - data: `{"a":` + strings.Repeat(`{"a":`, 65534) + `0` + strings.Repeat(`}`, 65534) + `}`, + data: `{"a":` + strings.Repeat(`{"a":`, _UnderMaxDepth) + `0` + strings.Repeat(`}`, _UnderMaxDepth) + `}`, errMaxDepth: false, }, { name: "ObjectOverMaxNestingDepth", - data: `{"a":` + strings.Repeat(`{"a":`, 65537) + `0` + strings.Repeat(`}`, 65537) + `}`, + data: `{"a":` + strings.Repeat(`{"a":`, _OverMaxDepth) + `0` + strings.Repeat(`}`, _OverMaxDepth) + `}`, errMaxDepth: true, }, { diff --git a/decoder/pools.go b/decoder/pools.go index a2e6db63e..a11bced7b 100644 --- a/decoder/pools.go +++ b/decoder/pools.go @@ -28,7 +28,7 @@ import ( const ( _MinSlice = 16 - _MaxStack = 65536 // 64k slots + _MaxStack = 4096 // 4k slots _MaxStackBytes = _MaxStack * _PtrBytes _MaxDigitNums = 800 // used in atof fallback algorithm ) diff --git a/encoder/encoder_test.go b/encoder/encoder_test.go index 7b86d74ea..5399a0811 100644 --- a/encoder/encoder_test.go +++ b/encoder/encoder_test.go @@ -524,7 +524,7 @@ type f64Bench struct { name string float float64 } -func BenchmarkEncoder_Float64(b *testing.B) { +func BenchmarkEncode_Float64(b *testing.B) { var bench = []f64Bench{ {"Zero", 0}, {"ShortDecimal", 1000}, @@ -564,7 +564,7 @@ type f32Bench struct { name string float float32 } -func BenchmarkEncoder_Float32(b *testing.B) { +func BenchmarkEncode_Float32(b *testing.B) { var bench = []f32Bench{ {"Zero", 0}, {"ShortDecimal", 1000}, diff --git a/encoder/pools.go b/encoder/pools.go index aedb16155..8214e1b77 100644 --- a/encoder/pools.go +++ b/encoder/pools.go @@ -27,7 +27,7 @@ import ( ) const ( - _MaxStack = 65536 // 64k states + _MaxStack = 4096 // 4k states _MaxBuffer = 1048576 // 1MB buffer size _StackSize = unsafe.Sizeof(_Stack{}) diff --git a/internal/native/avx/native_amd64.s b/internal/native/avx/native_amd64.s index 20f385972..5b79cd9b5 100644 --- a/internal/native/avx/native_amd64.s +++ b/internal/native/avx/native_amd64.s @@ -6259,7 +6259,7 @@ LBB19_29: LONG $0x0001fce9; BYTE $0x00 // jmp LBB19_75, $508(%rip) LBB19_30: - LONG $0xfffffa81; WORD $0x0000 // cmpl $65535, %edx + LONG $0x0ffffa81; WORD $0x0000 // cmpl $4095, %edx LONG $0x02028f0f; WORD $0x0000 // jg LBB19_58, $514(%rip) WORD $0x428d; BYTE $0x01 // leal $1(%rdx), %eax WORD $0x8941; BYTE $0x07 // movl %eax, (%r15) @@ -6267,7 +6267,7 @@ LBB19_30: LONG $0xfffe04e9; BYTE $0xff // jmp LBB19_55, $-508(%rip) LBB19_32: - LONG $0xfffffa81; WORD $0x0000 // cmpl $65535, %edx + LONG $0x0ffffa81; WORD $0x0000 // cmpl $4095, %edx LONG $0x01e28f0f; WORD $0x0000 // jg LBB19_58, $482(%rip) WORD $0x428d; BYTE $0x01 // leal $1(%rdx), %eax WORD $0x8941; BYTE $0x07 // movl %eax, (%r15) @@ -6299,7 +6299,7 @@ LBB19_37: WORD $0x854d; BYTE $0xed // testq %r13, %r13 LONG $0x016d880f; WORD $0x0000 // js LBB19_75, $365(%rip) WORD $0x6349; BYTE $0x07 // movslq (%r15), %rax - LONG $0xffff3d48; WORD $0x0000 // cmpq $65535, %rax + LONG $0x0fff3d48; WORD $0x0000 // cmpq $4095, %rax LONG $0x01708f0f; WORD $0x0000 // jg LBB19_58, $368(%rip) WORD $0x488d; BYTE $0x01 // leal $1(%rax), %ecx WORD $0x8941; BYTE $0x0f // movl %ecx, (%r15) @@ -6329,7 +6329,7 @@ LBB19_41: LBB19_43: WORD $0x6349; BYTE $0x07 // movslq (%r15), %rax - LONG $0xffff3d48; WORD $0x0000 // cmpq $65535, %rax + LONG $0x0fff3d48; WORD $0x0000 // cmpq $4095, %rax LONG $0x01088f0f; WORD $0x0000 // jg LBB19_58, $264(%rip) WORD $0x488d; BYTE $0x01 // leal $1(%rax), %ecx WORD $0x8941; BYTE $0x0f // movl %ecx, (%r15) @@ -6387,7 +6387,7 @@ LBB19_52: LBB19_53: WORD $0x6349; BYTE $0x07 // movslq (%r15), %rax - LONG $0xffff3d48; WORD $0x0000 // cmpq $65535, %rax + LONG $0x0fff3d48; WORD $0x0000 // cmpq $4095, %rax LONG $0x002d8f0f; WORD $0x0000 // jg LBB19_58, $45(%rip) WORD $0x488d; BYTE $0x01 // leal $1(%rax), %ecx WORD $0x8941; BYTE $0x0f // movl %ecx, (%r15) @@ -10089,7 +10089,7 @@ _Digits: QUAD $0x3939383937393639 // .ascii 8, '96979899' QUAD $0x0000000000000000 // .p2align 4, 0x00 -_LB_a85a2e53: // _pow10_ceil_sig.g +_LB_844550f5: // _pow10_ceil_sig.g QUAD $0xff77b1fcbebcdc4f // .quad -38366372719436721 QUAD $0x25e8e89c13bb0f7b // .quad 2731688931043774331 QUAD $0x9faacf3df73609b1 // .quad -6941508010590729807 @@ -12744,7 +12744,7 @@ _P10_TAB: QUAD $0x4480f0cf064dd592 // .quad 0x4480f0cf064dd592 QUAD $0x0000000000000000 // .p2align 4, 0x00 -_LB_e1a02bb8: // _pow10_ceil_sig_f32.g +_LB_8e30165b: // _pow10_ceil_sig_f32.g QUAD $0x81ceb32c4b43fcf5 // .quad -9093133594791772939 QUAD $0xa2425ff75e14fc32 // .quad -6754730975062328270 QUAD $0xcad2f7f5359a3b3f // .quad -3831727700400522433 diff --git a/internal/native/avx2/native_amd64.s b/internal/native/avx2/native_amd64.s index 89149cd68..8954615f5 100644 --- a/internal/native/avx2/native_amd64.s +++ b/internal/native/avx2/native_amd64.s @@ -6897,7 +6897,7 @@ _fsm_exec: LONG $0x00002de9; BYTE $0x00 // jmp LBB19_7, $45(%rip) LBB19_2: - LONG $0xfffffa81; WORD $0x0000 // cmpl $65535, %edx + LONG $0x0ffffa81; WORD $0x0000 // cmpl $4095, %edx LONG $0x04188f0f; WORD $0x0000 // jg LBB19_64, $1048(%rip) WORD $0x428d; BYTE $0x01 // leal $1(%rdx), %eax WORD $0x0789 // movl %eax, (%rdi) @@ -7045,7 +7045,7 @@ LBB19_29: LONG $0x00023de9; BYTE $0x00 // jmp LBB19_80, $573(%rip) LBB19_31: - LONG $0xfffffa81; WORD $0x0000 // cmpl $65535, %edx + LONG $0x0ffffa81; WORD $0x0000 // cmpl $4095, %edx LONG $0x02438f0f; WORD $0x0000 // jg LBB19_64, $579(%rip) WORD $0x428d; BYTE $0x01 // leal $1(%rdx), %eax WORD $0x0789 // movl %eax, (%rdi) @@ -7097,7 +7097,7 @@ LBB19_39: LONG $0x01a0880f; WORD $0x0000 // js LBB19_80, $416(%rip) WORD $0x894c; BYTE $0xe7 // movq %r12, %rdi LONG $0x24046349 // movslq (%r12), %rax - LONG $0xffff3d48; WORD $0x0000 // cmpq $65535, %rax + LONG $0x0fff3d48; WORD $0x0000 // cmpq $4095, %rax LONG $0x019f8f0f; WORD $0x0000 // jg LBB19_64, $415(%rip) WORD $0x488d; BYTE $0x01 // leal $1(%rax), %ecx WORD $0x0f89 // movl %ecx, (%rdi) @@ -7137,7 +7137,7 @@ LBB19_44: LBB19_46: WORD $0x6348; BYTE $0x07 // movslq (%rdi), %rax - LONG $0xffff3d48; WORD $0x0000 // cmpq $65535, %rax + LONG $0x0fff3d48; WORD $0x0000 // cmpq $4095, %rax LONG $0x01178f0f; WORD $0x0000 // jg LBB19_64, $279(%rip) WORD $0x488d; BYTE $0x01 // leal $1(%rax), %ecx WORD $0x0f89 // movl %ecx, (%rdi) @@ -7197,7 +7197,7 @@ LBB19_55: LBB19_56: WORD $0x6348; BYTE $0x07 // movslq (%rdi), %rax - LONG $0xffff3d48; WORD $0x0000 // cmpq $65535, %rax + LONG $0x0fff3d48; WORD $0x0000 // cmpq $4095, %rax LONG $0x00408f0f; WORD $0x0000 // jg LBB19_64, $64(%rip) WORD $0x488d; BYTE $0x01 // leal $1(%rax), %ecx WORD $0x0f89 // movl %ecx, (%rdi) @@ -11065,7 +11065,7 @@ _Digits: QUAD $0x3939383937393639 // .ascii 8, '96979899' QUAD $0x0000000000000000 // .p2align 4, 0x00 -_LB_f7997817: // _pow10_ceil_sig.g +_LB_9ad33a8d: // _pow10_ceil_sig.g QUAD $0xff77b1fcbebcdc4f // .quad -38366372719436721 QUAD $0x25e8e89c13bb0f7b // .quad 2731688931043774331 QUAD $0x9faacf3df73609b1 // .quad -6941508010590729807 @@ -13720,7 +13720,7 @@ _P10_TAB: QUAD $0x4480f0cf064dd592 // .quad 0x4480f0cf064dd592 QUAD $0x0000000000000000 // .p2align 4, 0x00 -_LB_2989417c: // _pow10_ceil_sig_f32.g +_LB_792baa9e: // _pow10_ceil_sig_f32.g QUAD $0x81ceb32c4b43fcf5 // .quad -9093133594791772939 QUAD $0xa2425ff75e14fc32 // .quad -6754730975062328270 QUAD $0xcad2f7f5359a3b3f // .quad -3831727700400522433 diff --git a/internal/native/sse/native_amd64.s b/internal/native/sse/native_amd64.s index 23875bbb2..ade824050 100644 --- a/internal/native/sse/native_amd64.s +++ b/internal/native/sse/native_amd64.s @@ -6313,7 +6313,7 @@ LBB19_29: LONG $0x0001fce9; BYTE $0x00 // jmp LBB19_75, $508(%rip) LBB19_30: - LONG $0xfffffa81; WORD $0x0000 // cmpl $65535, %edx + LONG $0x0ffffa81; WORD $0x0000 // cmpl $4095, %edx LONG $0x02028f0f; WORD $0x0000 // jg LBB19_58, $514(%rip) WORD $0x428d; BYTE $0x01 // leal $1(%rdx), %eax WORD $0x8941; BYTE $0x07 // movl %eax, (%r15) @@ -6321,7 +6321,7 @@ LBB19_30: LONG $0xfffe04e9; BYTE $0xff // jmp LBB19_55, $-508(%rip) LBB19_32: - LONG $0xfffffa81; WORD $0x0000 // cmpl $65535, %edx + LONG $0x0ffffa81; WORD $0x0000 // cmpl $4095, %edx LONG $0x01e28f0f; WORD $0x0000 // jg LBB19_58, $482(%rip) WORD $0x428d; BYTE $0x01 // leal $1(%rdx), %eax WORD $0x8941; BYTE $0x07 // movl %eax, (%r15) @@ -6353,7 +6353,7 @@ LBB19_37: WORD $0x854d; BYTE $0xed // testq %r13, %r13 LONG $0x016d880f; WORD $0x0000 // js LBB19_75, $365(%rip) WORD $0x6349; BYTE $0x07 // movslq (%r15), %rax - LONG $0xffff3d48; WORD $0x0000 // cmpq $65535, %rax + LONG $0x0fff3d48; WORD $0x0000 // cmpq $4095, %rax LONG $0x01708f0f; WORD $0x0000 // jg LBB19_58, $368(%rip) WORD $0x488d; BYTE $0x01 // leal $1(%rax), %ecx WORD $0x8941; BYTE $0x0f // movl %ecx, (%r15) @@ -6383,7 +6383,7 @@ LBB19_41: LBB19_43: WORD $0x6349; BYTE $0x07 // movslq (%r15), %rax - LONG $0xffff3d48; WORD $0x0000 // cmpq $65535, %rax + LONG $0x0fff3d48; WORD $0x0000 // cmpq $4095, %rax LONG $0x01088f0f; WORD $0x0000 // jg LBB19_58, $264(%rip) WORD $0x488d; BYTE $0x01 // leal $1(%rax), %ecx WORD $0x8941; BYTE $0x0f // movl %ecx, (%r15) @@ -6441,7 +6441,7 @@ LBB19_52: LBB19_53: WORD $0x6349; BYTE $0x07 // movslq (%r15), %rax - LONG $0xffff3d48; WORD $0x0000 // cmpq $65535, %rax + LONG $0x0fff3d48; WORD $0x0000 // cmpq $4095, %rax LONG $0x002d8f0f; WORD $0x0000 // jg LBB19_58, $45(%rip) WORD $0x488d; BYTE $0x01 // leal $1(%rax), %ecx WORD $0x8941; BYTE $0x0f // movl %ecx, (%r15) @@ -10136,7 +10136,7 @@ _Digits: QUAD $0x3939383937393639 // .ascii 8, '96979899' QUAD $0x0000000000000000 // .p2align 4, 0x00 -_LB_50bb57a5: // _pow10_ceil_sig.g +_LB_a03fbdd2: // _pow10_ceil_sig.g QUAD $0xff77b1fcbebcdc4f // .quad -38366372719436721 QUAD $0x25e8e89c13bb0f7b // .quad 2731688931043774331 QUAD $0x9faacf3df73609b1 // .quad -6941508010590729807 @@ -12791,7 +12791,7 @@ _P10_TAB: QUAD $0x4480f0cf064dd592 // .quad 0x4480f0cf064dd592 QUAD $0x0000000000000000 // .p2align 4, 0x00 -_LB_15b156e2: // _pow10_ceil_sig_f32.g +_LB_a00a2ccc: // _pow10_ceil_sig_f32.g QUAD $0x81ceb32c4b43fcf5 // .quad -9093133594791772939 QUAD $0xa2425ff75e14fc32 // .quad -6754730975062328270 QUAD $0xcad2f7f5359a3b3f // .quad -3831727700400522433 diff --git a/internal/native/types/types.go b/internal/native/types/types.go index e19f8a976..899ebb2da 100644 --- a/internal/native/types/types.go +++ b/internal/native/types/types.go @@ -25,6 +25,9 @@ type ValueType int type ParsingError uint type SearchingError uint +// !NOT MODIFIED ONLY. +// This definitions are followed in native/types.h. + const ( V_EOF ValueType = 1 V_NULL ValueType = 2 @@ -55,7 +58,7 @@ const ( ) const ( - MAX_RECURSE = 65536 + MAX_RECURSE = 4096 ) const ( diff --git a/issue_test/pretouch_test.go b/issue_test/pretouch_test.go index c87e3619d..134794b71 100644 --- a/issue_test/pretouch_test.go +++ b/issue_test/pretouch_test.go @@ -302,7 +302,7 @@ func TestPretouchSynteaRoot(t *testing.T) { println("end decode:", e.UnixNano()) d2 := e.Sub(s).Nanoseconds() println("elapsed:", d2, "ns") - if d1 > d2 * 10 { + if d1 > d2 * 20 { t.Fatal("decoder pretouch not finish yet") } diff --git a/native/native.h b/native/native.h index 5585b3a5b..ec88960b4 100644 --- a/native/native.h +++ b/native/native.h @@ -22,38 +22,7 @@ #include #include -#define V_EOF 1 -#define V_NULL 2 -#define V_TRUE 3 -#define V_FALSE 4 -#define V_ARRAY 5 -#define V_OBJECT 6 -#define V_STRING 7 -#define V_DOUBLE 8 -#define V_INTEGER 9 -#define V_KEY_SEP 10 -#define V_ELEM_SEP 11 -#define V_ARRAY_END 12 -#define V_OBJECT_END 13 -#define V_ATOF_NEED_FALLBACK 14 - -#define F_DBLUNQ (1 << 0) -#define F_UNIREP (1 << 1) - -#define VS_NULL 0x6c6c756e // 'null' in little endian -#define VS_TRUE 0x65757274 // 'true' in little endian -#define VS_ALSE 0x65736c61 // 'alse' in little endian ('false' without the 'f') - -#define ERR_EOF 1 -#define ERR_INVAL 2 -#define ERR_ESCAPE 3 -#define ERR_UNICODE 4 -#define ERR_OVERFLOW 5 -#define ERR_NUMBER_FMT 6 -#define ERR_RECURSE_MAX 7 -#define ERR_FLOAT_INF 8 - -#define MAX_RECURSE 65536 +#include "types.h" #define likely(v) (__builtin_expect((v), 1)) #define unlikely(v) (__builtin_expect((v), 0)) diff --git a/native/types.h b/native/types.h new file mode 100644 index 000000000..c36467d32 --- /dev/null +++ b/native/types.h @@ -0,0 +1,40 @@ + +#ifndef TYPES_H +#define TYPES_H + +// !NOT MODIFIED ONLY. +// This definitions are copied from internal/native/types/types.go. + +#define V_EOF 1 +#define V_NULL 2 +#define V_TRUE 3 +#define V_FALSE 4 +#define V_ARRAY 5 +#define V_OBJECT 6 +#define V_STRING 7 +#define V_DOUBLE 8 +#define V_INTEGER 9 +#define V_KEY_SEP 10 +#define V_ELEM_SEP 11 +#define V_ARRAY_END 12 +#define V_OBJECT_END 13 + +#define F_DBLUNQ (1 << 0) +#define F_UNIREP (1 << 1) + +#define VS_NULL 0x6c6c756e // 'null' in little endian +#define VS_TRUE 0x65757274 // 'true' in little endian +#define VS_ALSE 0x65736c61 // 'alse' in little endian ('false' without the 'f') + +#define ERR_EOF 1 +#define ERR_INVAL 2 +#define ERR_ESCAPE 3 +#define ERR_UNICODE 4 +#define ERR_OVERFLOW 5 +#define ERR_NUMBER_FMT 6 +#define ERR_RECURSE_MAX 7 +#define ERR_FLOAT_INF 8 + +#define MAX_RECURSE 4096 + +#endif