Skip to content

Commit

Permalink
opt: reduce memory pool size (#302)
Browse files Browse the repository at this point in the history
* fix: reduce state stack size

* fix types

* opt: shrink encoder stack size

* opt: reduce decoder stack size

* add bench.py loops

Co-authored-by: liuqiang <liuqiang.06@bytedance.com>
Co-authored-by: duanyi.aster <duanyi.aster@bytedance.com>
  • Loading branch information
3 people committed Oct 28, 2022
1 parent 3e6f839 commit 6deed01
Show file tree
Hide file tree
Showing 12 changed files with 82 additions and 64 deletions.
2 changes: 1 addition & 1 deletion bench.py
Expand Up @@ -19,7 +19,7 @@
import subprocess
import argparse

repeat_time = 10
repeat_time = 100
gbench_prefix = "SONIC_NO_ASYNC_GC=1 go test -benchmem -run=none -count=%d "%(repeat_time)

def run(cmd):
Expand Down
14 changes: 10 additions & 4 deletions decode_test.go
Expand Up @@ -38,6 +38,7 @@ import (
`unicode/utf8`

`github.com/bytedance/sonic/decoder`
`github.com/bytedance/sonic/internal/native/types`
`github.com/davecgh/go-spew/spew`
)

Expand Down Expand Up @@ -2361,19 +2362,24 @@ func TestUnmarshalRescanLiteralMangledUnquote(t *testing.T) {
}

func TestUnmarshalMaxDepth(t *testing.T) {
const (
_MaxDepth = types.MAX_RECURSE
_OverMaxDepth = types.MAX_RECURSE + 1
_UnderMaxDepth = types.MAX_RECURSE - 2
)
testcases := []struct {
name string
data string
errMaxDepth bool
}{
{
name: "ArrayUnderMaxNestingDepth",
data: `{"a":` + strings.Repeat(`[`, 65534) + `0` + strings.Repeat(`]`, 65534) + `}`,
data: `{"a":` + strings.Repeat(`[`, _UnderMaxDepth) + `0` + strings.Repeat(`]`, _UnderMaxDepth) + `}`,
errMaxDepth: false,
},
{
name: "ArrayOverMaxNestingDepth",
data: `{"a":` + strings.Repeat(`[`, 65537) + `0` + strings.Repeat(`]`, 65537) + `}`,
data: `{"a":` + strings.Repeat(`[`, _OverMaxDepth) + `0` + strings.Repeat(`]`, _OverMaxDepth) + `}`,
errMaxDepth: true,
},
{
Expand All @@ -2383,12 +2389,12 @@ func TestUnmarshalMaxDepth(t *testing.T) {
},
{
name: "ObjectUnderMaxNestingDepth",
data: `{"a":` + strings.Repeat(`{"a":`, 65534) + `0` + strings.Repeat(`}`, 65534) + `}`,
data: `{"a":` + strings.Repeat(`{"a":`, _UnderMaxDepth) + `0` + strings.Repeat(`}`, _UnderMaxDepth) + `}`,
errMaxDepth: false,
},
{
name: "ObjectOverMaxNestingDepth",
data: `{"a":` + strings.Repeat(`{"a":`, 65537) + `0` + strings.Repeat(`}`, 65537) + `}`,
data: `{"a":` + strings.Repeat(`{"a":`, _OverMaxDepth) + `0` + strings.Repeat(`}`, _OverMaxDepth) + `}`,
errMaxDepth: true,
},
{
Expand Down
2 changes: 1 addition & 1 deletion decoder/pools.go
Expand Up @@ -28,7 +28,7 @@ import (

const (
_MinSlice = 16
_MaxStack = 65536 // 64k slots
_MaxStack = 4096 // 4k slots
_MaxStackBytes = _MaxStack * _PtrBytes
_MaxDigitNums = 800 // used in atof fallback algorithm
)
Expand Down
4 changes: 2 additions & 2 deletions encoder/encoder_test.go
Expand Up @@ -524,7 +524,7 @@ type f64Bench struct {
name string
float float64
}
func BenchmarkEncoder_Float64(b *testing.B) {
func BenchmarkEncode_Float64(b *testing.B) {
var bench = []f64Bench{
{"Zero", 0},
{"ShortDecimal", 1000},
Expand Down Expand Up @@ -564,7 +564,7 @@ type f32Bench struct {
name string
float float32
}
func BenchmarkEncoder_Float32(b *testing.B) {
func BenchmarkEncode_Float32(b *testing.B) {
var bench = []f32Bench{
{"Zero", 0},
{"ShortDecimal", 1000},
Expand Down
2 changes: 1 addition & 1 deletion encoder/pools.go
Expand Up @@ -27,7 +27,7 @@ import (
)

const (
_MaxStack = 65536 // 64k states
_MaxStack = 4096 // 4k states
_MaxBuffer = 1048576 // 1MB buffer size

_StackSize = unsafe.Sizeof(_Stack{})
Expand Down
14 changes: 7 additions & 7 deletions internal/native/avx/native_amd64.s
Expand Up @@ -6259,15 +6259,15 @@ LBB19_29:
LONG $0x0001fce9; BYTE $0x00 // jmp LBB19_75, $508(%rip)

LBB19_30:
LONG $0xfffffa81; WORD $0x0000 // cmpl $65535, %edx
LONG $0x0ffffa81; WORD $0x0000 // cmpl $4095, %edx
LONG $0x02028f0f; WORD $0x0000 // jg LBB19_58, $514(%rip)
WORD $0x428d; BYTE $0x01 // leal $1(%rdx), %eax
WORD $0x8941; BYTE $0x07 // movl %eax, (%r15)
QUAD $0x000000049744c741; BYTE $0x00 // movl $0, $4(%r15,%rdx,4)
LONG $0xfffe04e9; BYTE $0xff // jmp LBB19_55, $-508(%rip)

LBB19_32:
LONG $0xfffffa81; WORD $0x0000 // cmpl $65535, %edx
LONG $0x0ffffa81; WORD $0x0000 // cmpl $4095, %edx
LONG $0x01e28f0f; WORD $0x0000 // jg LBB19_58, $482(%rip)
WORD $0x428d; BYTE $0x01 // leal $1(%rdx), %eax
WORD $0x8941; BYTE $0x07 // movl %eax, (%r15)
Expand Down Expand Up @@ -6299,7 +6299,7 @@ LBB19_37:
WORD $0x854d; BYTE $0xed // testq %r13, %r13
LONG $0x016d880f; WORD $0x0000 // js LBB19_75, $365(%rip)
WORD $0x6349; BYTE $0x07 // movslq (%r15), %rax
LONG $0xffff3d48; WORD $0x0000 // cmpq $65535, %rax
LONG $0x0fff3d48; WORD $0x0000 // cmpq $4095, %rax
LONG $0x01708f0f; WORD $0x0000 // jg LBB19_58, $368(%rip)
WORD $0x488d; BYTE $0x01 // leal $1(%rax), %ecx
WORD $0x8941; BYTE $0x0f // movl %ecx, (%r15)
Expand Down Expand Up @@ -6329,7 +6329,7 @@ LBB19_41:

LBB19_43:
WORD $0x6349; BYTE $0x07 // movslq (%r15), %rax
LONG $0xffff3d48; WORD $0x0000 // cmpq $65535, %rax
LONG $0x0fff3d48; WORD $0x0000 // cmpq $4095, %rax
LONG $0x01088f0f; WORD $0x0000 // jg LBB19_58, $264(%rip)
WORD $0x488d; BYTE $0x01 // leal $1(%rax), %ecx
WORD $0x8941; BYTE $0x0f // movl %ecx, (%r15)
Expand Down Expand Up @@ -6387,7 +6387,7 @@ LBB19_52:

LBB19_53:
WORD $0x6349; BYTE $0x07 // movslq (%r15), %rax
LONG $0xffff3d48; WORD $0x0000 // cmpq $65535, %rax
LONG $0x0fff3d48; WORD $0x0000 // cmpq $4095, %rax
LONG $0x002d8f0f; WORD $0x0000 // jg LBB19_58, $45(%rip)
WORD $0x488d; BYTE $0x01 // leal $1(%rax), %ecx
WORD $0x8941; BYTE $0x0f // movl %ecx, (%r15)
Expand Down Expand Up @@ -10089,7 +10089,7 @@ _Digits:
QUAD $0x3939383937393639 // .ascii 8, '96979899'
QUAD $0x0000000000000000 // .p2align 4, 0x00

_LB_a85a2e53: // _pow10_ceil_sig.g
_LB_844550f5: // _pow10_ceil_sig.g
QUAD $0xff77b1fcbebcdc4f // .quad -38366372719436721
QUAD $0x25e8e89c13bb0f7b // .quad 2731688931043774331
QUAD $0x9faacf3df73609b1 // .quad -6941508010590729807
Expand Down Expand Up @@ -12744,7 +12744,7 @@ _P10_TAB:
QUAD $0x4480f0cf064dd592 // .quad 0x4480f0cf064dd592
QUAD $0x0000000000000000 // .p2align 4, 0x00

_LB_e1a02bb8: // _pow10_ceil_sig_f32.g
_LB_8e30165b: // _pow10_ceil_sig_f32.g
QUAD $0x81ceb32c4b43fcf5 // .quad -9093133594791772939
QUAD $0xa2425ff75e14fc32 // .quad -6754730975062328270
QUAD $0xcad2f7f5359a3b3f // .quad -3831727700400522433
Expand Down
14 changes: 7 additions & 7 deletions internal/native/avx2/native_amd64.s
Expand Up @@ -6897,7 +6897,7 @@ _fsm_exec:
LONG $0x00002de9; BYTE $0x00 // jmp LBB19_7, $45(%rip)

LBB19_2:
LONG $0xfffffa81; WORD $0x0000 // cmpl $65535, %edx
LONG $0x0ffffa81; WORD $0x0000 // cmpl $4095, %edx
LONG $0x04188f0f; WORD $0x0000 // jg LBB19_64, $1048(%rip)
WORD $0x428d; BYTE $0x01 // leal $1(%rdx), %eax
WORD $0x0789 // movl %eax, (%rdi)
Expand Down Expand Up @@ -7045,7 +7045,7 @@ LBB19_29:
LONG $0x00023de9; BYTE $0x00 // jmp LBB19_80, $573(%rip)

LBB19_31:
LONG $0xfffffa81; WORD $0x0000 // cmpl $65535, %edx
LONG $0x0ffffa81; WORD $0x0000 // cmpl $4095, %edx
LONG $0x02438f0f; WORD $0x0000 // jg LBB19_64, $579(%rip)
WORD $0x428d; BYTE $0x01 // leal $1(%rdx), %eax
WORD $0x0789 // movl %eax, (%rdi)
Expand Down Expand Up @@ -7097,7 +7097,7 @@ LBB19_39:
LONG $0x01a0880f; WORD $0x0000 // js LBB19_80, $416(%rip)
WORD $0x894c; BYTE $0xe7 // movq %r12, %rdi
LONG $0x24046349 // movslq (%r12), %rax
LONG $0xffff3d48; WORD $0x0000 // cmpq $65535, %rax
LONG $0x0fff3d48; WORD $0x0000 // cmpq $4095, %rax
LONG $0x019f8f0f; WORD $0x0000 // jg LBB19_64, $415(%rip)
WORD $0x488d; BYTE $0x01 // leal $1(%rax), %ecx
WORD $0x0f89 // movl %ecx, (%rdi)
Expand Down Expand Up @@ -7137,7 +7137,7 @@ LBB19_44:

LBB19_46:
WORD $0x6348; BYTE $0x07 // movslq (%rdi), %rax
LONG $0xffff3d48; WORD $0x0000 // cmpq $65535, %rax
LONG $0x0fff3d48; WORD $0x0000 // cmpq $4095, %rax
LONG $0x01178f0f; WORD $0x0000 // jg LBB19_64, $279(%rip)
WORD $0x488d; BYTE $0x01 // leal $1(%rax), %ecx
WORD $0x0f89 // movl %ecx, (%rdi)
Expand Down Expand Up @@ -7197,7 +7197,7 @@ LBB19_55:

LBB19_56:
WORD $0x6348; BYTE $0x07 // movslq (%rdi), %rax
LONG $0xffff3d48; WORD $0x0000 // cmpq $65535, %rax
LONG $0x0fff3d48; WORD $0x0000 // cmpq $4095, %rax
LONG $0x00408f0f; WORD $0x0000 // jg LBB19_64, $64(%rip)
WORD $0x488d; BYTE $0x01 // leal $1(%rax), %ecx
WORD $0x0f89 // movl %ecx, (%rdi)
Expand Down Expand Up @@ -11065,7 +11065,7 @@ _Digits:
QUAD $0x3939383937393639 // .ascii 8, '96979899'
QUAD $0x0000000000000000 // .p2align 4, 0x00

_LB_f7997817: // _pow10_ceil_sig.g
_LB_9ad33a8d: // _pow10_ceil_sig.g
QUAD $0xff77b1fcbebcdc4f // .quad -38366372719436721
QUAD $0x25e8e89c13bb0f7b // .quad 2731688931043774331
QUAD $0x9faacf3df73609b1 // .quad -6941508010590729807
Expand Down Expand Up @@ -13720,7 +13720,7 @@ _P10_TAB:
QUAD $0x4480f0cf064dd592 // .quad 0x4480f0cf064dd592
QUAD $0x0000000000000000 // .p2align 4, 0x00

_LB_2989417c: // _pow10_ceil_sig_f32.g
_LB_792baa9e: // _pow10_ceil_sig_f32.g
QUAD $0x81ceb32c4b43fcf5 // .quad -9093133594791772939
QUAD $0xa2425ff75e14fc32 // .quad -6754730975062328270
QUAD $0xcad2f7f5359a3b3f // .quad -3831727700400522433
Expand Down
14 changes: 7 additions & 7 deletions internal/native/sse/native_amd64.s
Expand Up @@ -6313,15 +6313,15 @@ LBB19_29:
LONG $0x0001fce9; BYTE $0x00 // jmp LBB19_75, $508(%rip)

LBB19_30:
LONG $0xfffffa81; WORD $0x0000 // cmpl $65535, %edx
LONG $0x0ffffa81; WORD $0x0000 // cmpl $4095, %edx
LONG $0x02028f0f; WORD $0x0000 // jg LBB19_58, $514(%rip)
WORD $0x428d; BYTE $0x01 // leal $1(%rdx), %eax
WORD $0x8941; BYTE $0x07 // movl %eax, (%r15)
QUAD $0x000000049744c741; BYTE $0x00 // movl $0, $4(%r15,%rdx,4)
LONG $0xfffe04e9; BYTE $0xff // jmp LBB19_55, $-508(%rip)

LBB19_32:
LONG $0xfffffa81; WORD $0x0000 // cmpl $65535, %edx
LONG $0x0ffffa81; WORD $0x0000 // cmpl $4095, %edx
LONG $0x01e28f0f; WORD $0x0000 // jg LBB19_58, $482(%rip)
WORD $0x428d; BYTE $0x01 // leal $1(%rdx), %eax
WORD $0x8941; BYTE $0x07 // movl %eax, (%r15)
Expand Down Expand Up @@ -6353,7 +6353,7 @@ LBB19_37:
WORD $0x854d; BYTE $0xed // testq %r13, %r13
LONG $0x016d880f; WORD $0x0000 // js LBB19_75, $365(%rip)
WORD $0x6349; BYTE $0x07 // movslq (%r15), %rax
LONG $0xffff3d48; WORD $0x0000 // cmpq $65535, %rax
LONG $0x0fff3d48; WORD $0x0000 // cmpq $4095, %rax
LONG $0x01708f0f; WORD $0x0000 // jg LBB19_58, $368(%rip)
WORD $0x488d; BYTE $0x01 // leal $1(%rax), %ecx
WORD $0x8941; BYTE $0x0f // movl %ecx, (%r15)
Expand Down Expand Up @@ -6383,7 +6383,7 @@ LBB19_41:

LBB19_43:
WORD $0x6349; BYTE $0x07 // movslq (%r15), %rax
LONG $0xffff3d48; WORD $0x0000 // cmpq $65535, %rax
LONG $0x0fff3d48; WORD $0x0000 // cmpq $4095, %rax
LONG $0x01088f0f; WORD $0x0000 // jg LBB19_58, $264(%rip)
WORD $0x488d; BYTE $0x01 // leal $1(%rax), %ecx
WORD $0x8941; BYTE $0x0f // movl %ecx, (%r15)
Expand Down Expand Up @@ -6441,7 +6441,7 @@ LBB19_52:

LBB19_53:
WORD $0x6349; BYTE $0x07 // movslq (%r15), %rax
LONG $0xffff3d48; WORD $0x0000 // cmpq $65535, %rax
LONG $0x0fff3d48; WORD $0x0000 // cmpq $4095, %rax
LONG $0x002d8f0f; WORD $0x0000 // jg LBB19_58, $45(%rip)
WORD $0x488d; BYTE $0x01 // leal $1(%rax), %ecx
WORD $0x8941; BYTE $0x0f // movl %ecx, (%r15)
Expand Down Expand Up @@ -10136,7 +10136,7 @@ _Digits:
QUAD $0x3939383937393639 // .ascii 8, '96979899'
QUAD $0x0000000000000000 // .p2align 4, 0x00

_LB_50bb57a5: // _pow10_ceil_sig.g
_LB_a03fbdd2: // _pow10_ceil_sig.g
QUAD $0xff77b1fcbebcdc4f // .quad -38366372719436721
QUAD $0x25e8e89c13bb0f7b // .quad 2731688931043774331
QUAD $0x9faacf3df73609b1 // .quad -6941508010590729807
Expand Down Expand Up @@ -12791,7 +12791,7 @@ _P10_TAB:
QUAD $0x4480f0cf064dd592 // .quad 0x4480f0cf064dd592
QUAD $0x0000000000000000 // .p2align 4, 0x00

_LB_15b156e2: // _pow10_ceil_sig_f32.g
_LB_a00a2ccc: // _pow10_ceil_sig_f32.g
QUAD $0x81ceb32c4b43fcf5 // .quad -9093133594791772939
QUAD $0xa2425ff75e14fc32 // .quad -6754730975062328270
QUAD $0xcad2f7f5359a3b3f // .quad -3831727700400522433
Expand Down
5 changes: 4 additions & 1 deletion internal/native/types/types.go
Expand Up @@ -25,6 +25,9 @@ type ValueType int
type ParsingError uint
type SearchingError uint

// !NOT MODIFIED ONLY.
// This definitions are followed in native/types.h.

const (
V_EOF ValueType = 1
V_NULL ValueType = 2
Expand Down Expand Up @@ -55,7 +58,7 @@ const (
)

const (
MAX_RECURSE = 65536
MAX_RECURSE = 4096
)

const (
Expand Down
2 changes: 1 addition & 1 deletion issue_test/pretouch_test.go
Expand Up @@ -302,7 +302,7 @@ func TestPretouchSynteaRoot(t *testing.T) {
println("end decode:", e.UnixNano())
d2 := e.Sub(s).Nanoseconds()
println("elapsed:", d2, "ns")
if d1 > d2 * 10 {
if d1 > d2 * 20 {
t.Fatal("decoder pretouch not finish yet")
}

Expand Down
33 changes: 1 addition & 32 deletions native/native.h
Expand Up @@ -22,38 +22,7 @@
#include <immintrin.h>
#include <stdbool.h>

#define V_EOF 1
#define V_NULL 2
#define V_TRUE 3
#define V_FALSE 4
#define V_ARRAY 5
#define V_OBJECT 6
#define V_STRING 7
#define V_DOUBLE 8
#define V_INTEGER 9
#define V_KEY_SEP 10
#define V_ELEM_SEP 11
#define V_ARRAY_END 12
#define V_OBJECT_END 13
#define V_ATOF_NEED_FALLBACK 14

#define F_DBLUNQ (1 << 0)
#define F_UNIREP (1 << 1)

#define VS_NULL 0x6c6c756e // 'null' in little endian
#define VS_TRUE 0x65757274 // 'true' in little endian
#define VS_ALSE 0x65736c61 // 'alse' in little endian ('false' without the 'f')

#define ERR_EOF 1
#define ERR_INVAL 2
#define ERR_ESCAPE 3
#define ERR_UNICODE 4
#define ERR_OVERFLOW 5
#define ERR_NUMBER_FMT 6
#define ERR_RECURSE_MAX 7
#define ERR_FLOAT_INF 8

#define MAX_RECURSE 65536
#include "types.h"

#define likely(v) (__builtin_expect((v), 1))
#define unlikely(v) (__builtin_expect((v), 0))
Expand Down
40 changes: 40 additions & 0 deletions native/types.h
@@ -0,0 +1,40 @@

#ifndef TYPES_H
#define TYPES_H

// !NOT MODIFIED ONLY.
// This definitions are copied from internal/native/types/types.go.

#define V_EOF 1
#define V_NULL 2
#define V_TRUE 3
#define V_FALSE 4
#define V_ARRAY 5
#define V_OBJECT 6
#define V_STRING 7
#define V_DOUBLE 8
#define V_INTEGER 9
#define V_KEY_SEP 10
#define V_ELEM_SEP 11
#define V_ARRAY_END 12
#define V_OBJECT_END 13

#define F_DBLUNQ (1 << 0)
#define F_UNIREP (1 << 1)

#define VS_NULL 0x6c6c756e // 'null' in little endian
#define VS_TRUE 0x65757274 // 'true' in little endian
#define VS_ALSE 0x65736c61 // 'alse' in little endian ('false' without the 'f')

#define ERR_EOF 1
#define ERR_INVAL 2
#define ERR_ESCAPE 3
#define ERR_UNICODE 4
#define ERR_OVERFLOW 5
#define ERR_NUMBER_FMT 6
#define ERR_RECURSE_MAX 7
#define ERR_FLOAT_INF 8

#define MAX_RECURSE 4096

#endif

0 comments on commit 6deed01

Please sign in to comment.