From 837dfaa128f21008600ce6affcfd5913ca4eb88f Mon Sep 17 00:00:00 2001 From: John Floren Date: Tue, 17 Nov 2020 14:41:30 -0700 Subject: [PATCH 1/2] Implement fixes for EachKey when you have lots of keys, or big arrays. --- go.mod | 1 + parser.go | 28 +++++---- parser_error_test.go | 140 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 156 insertions(+), 13 deletions(-) diff --git a/go.mod b/go.mod index 4f35768..7ede21f 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,4 @@ module github.com/buger/jsonparser go 1.13 + diff --git a/parser.go b/parser.go index 26d1cd8..2d73376 100644 --- a/parser.go +++ b/parser.go @@ -380,7 +380,8 @@ func sameTree(p1, p2 []string) bool { } func EachKey(data []byte, cb func(int, []byte, ValueType, error), paths ...[]string) int { - var pathFlags int64 + var x struct{} + pathFlags := make([]bool, len(paths)) var level, pathsMatched, i int ln := len(data) @@ -439,7 +440,7 @@ func EachKey(data []byte, cb func(int, []byte, ValueType, error), paths ...[]str pathsBuf[level-1] = bytesToString(&keyUnesc) for pi, p := range paths { - if len(p) != level || pathFlags&bitwiseFlags[pi+1] != 0 || !equalStr(&keyUnesc, p[level-1]) || !sameTree(p, pathsBuf[:level]) { + if len(p) != level || pathFlags[pi] || !equalStr(&keyUnesc, p[level-1]) || !sameTree(p, pathsBuf[:level]) { continue } @@ -447,7 +448,7 @@ func EachKey(data []byte, cb func(int, []byte, ValueType, error), paths ...[]str i++ pathsMatched++ - pathFlags |= bitwiseFlags[pi+1] + pathFlags[pi] = true v, dt, _, e := Get(data[i:]) cb(pi, v, dt, e) @@ -485,8 +486,9 @@ func EachKey(data []byte, cb func(int, []byte, ValueType, error), paths ...[]str case '}': level-- case '[': - var arrIdxFlags int64 - var pIdxFlags int64 + var ok bool + arrIdxFlags := make(map[int]struct{}) + pIdxFlags := make([]bool, len(paths)) if level < 0 { cb(-1, nil, Unknown, MalformedJsonError) @@ -494,31 +496,31 @@ func EachKey(data []byte, cb func(int, []byte, ValueType, error), paths ...[]str } for pi, p := range paths { - if len(p) < level+1 || pathFlags&bitwiseFlags[pi+1] != 0 || p[level][0] != '[' || !sameTree(p, pathsBuf[:level]) { + if len(p) < level+1 || pathFlags[pi] || p[level][0] != '[' || !sameTree(p, pathsBuf[:level]) { continue } if len(p[level]) >= 2 { aIdx, _ := strconv.Atoi(p[level][1 : len(p[level])-1]) - arrIdxFlags |= bitwiseFlags[aIdx+1] - pIdxFlags |= bitwiseFlags[pi+1] + arrIdxFlags[aIdx] = x + pIdxFlags[pi] = true } } - if arrIdxFlags > 0 { + if len(arrIdxFlags) > 0 { level++ var curIdx int arrOff, _ := ArrayEach(data[i:], func(value []byte, dataType ValueType, offset int, err error) { - if arrIdxFlags&bitwiseFlags[curIdx+1] != 0 { + if _, ok = arrIdxFlags[curIdx]; ok { for pi, p := range paths { - if pIdxFlags&bitwiseFlags[pi+1] != 0 { + if pIdxFlags[pi] { aIdx, _ := strconv.Atoi(p[level-1][1 : len(p[level-1])-1]) if curIdx == aIdx { of := searchKeys(value, p[level:]...) pathsMatched++ - pathFlags |= bitwiseFlags[pi+1] + pathFlags[pi] = true if of != -1 { v, dt, _, e := Get(value[of:]) @@ -930,7 +932,7 @@ func ArrayEach(data []byte, cb func(value []byte, dataType ValueType, offset int return -1, MalformedJsonError } - offset = nT+1 + offset = nT + 1 if len(keys) > 0 { if offset = searchKeys(data, keys...); offset == -1 { diff --git a/parser_error_test.go b/parser_error_test.go index 1cc23ce..9a384c4 100644 --- a/parser_error_test.go +++ b/parser_error_test.go @@ -2,6 +2,7 @@ package jsonparser import ( "fmt" + "strings" "testing" ) @@ -38,3 +39,142 @@ func TestPanickingErrors(t *testing.T) { t.Error("Expected error...") } } + +// check having a very deep key depth +func TestKeyDepth(t *testing.T) { + var sb strings.Builder + var keys []string + //build data + sb.WriteString("{") + for i := 0; i < 128; i++ { + fmt.Fprintf(&sb, `"key%d": %dx,`, i, i) + keys = append(keys, fmt.Sprintf("key%d", i)) + } + sb.WriteString("}") + + data := []byte(sb.String()) + EachKey(data, func(offset int, value []byte, dt ValueType, err error) { + return + }, keys) +} + +// check having a bunch of keys in a call to EachKey +func TestKeyCount(t *testing.T) { + var sb strings.Builder + var keys [][]string + //build data + sb.WriteString("{") + for i := 0; i < 128; i++ { + fmt.Fprintf(&sb, `"key%d":"%d"`, i, i) + if i < 127 { + sb.WriteString(",") + } + keys = append(keys, []string{fmt.Sprintf("key%d", i)}) + } + sb.WriteString("}") + + data := []byte(sb.String()) + EachKey(data, func(offset int, value []byte, dt ValueType, err error) { + return + }, keys...) +} + +// try pulling lots of keys out of a big array +func TestKeyDepthArray(t *testing.T) { + var sb strings.Builder + var keys []string + //build data + sb.WriteString("[") + for i := 0; i < 128; i++ { + fmt.Fprintf(&sb, `{"key": %d},`, i) + keys = append(keys, fmt.Sprintf("[%d].key", i)) + } + sb.WriteString("]") + + data := []byte(sb.String()) + EachKey(data, func(offset int, value []byte, dt ValueType, err error) { + return + }, keys) +} + +// check having a bunch of keys +func TestKeyCountArray(t *testing.T) { + var sb strings.Builder + var keys [][]string + //build data + sb.WriteString("[") + for i := 0; i < 128; i++ { + fmt.Fprintf(&sb, `{"key":"%d"}`, i) + if i < 127 { + sb.WriteString(",") + } + keys = append(keys, []string{fmt.Sprintf("[%d].key", i)}) + } + sb.WriteString("]") + + data := []byte(sb.String()) + EachKey(data, func(offset int, value []byte, dt ValueType, err error) { + return + }, keys...) +} + +// check having a bunch of keys in a super deep array +func TestEachKeyArray(t *testing.T) { + var sb strings.Builder + var keys [][]string + //build data + sb.WriteString(`[`) + for i := 0; i < 127; i++ { + fmt.Fprintf(&sb, `%d`, i) + if i < 127 { + sb.WriteString(",") + } + if i < 32 { + keys = append(keys, []string{fmt.Sprintf("[%d]", 128+i)}) + } + } + sb.WriteString(`]`) + + data := []byte(sb.String()) + EachKey(data, func(offset int, value []byte, dt ValueType, err error) { + return + }, keys...) +} + +func TestLargeArray(t *testing.T) { + var sb strings.Builder + //build data + sb.WriteString(`[`) + for i := 0; i < 127; i++ { + fmt.Fprintf(&sb, `%d`, i) + if i < 127 { + sb.WriteString(",") + } + } + sb.WriteString(`]`) + keys := [][]string{[]string{`[1]`}} + + data := []byte(sb.String()) + EachKey(data, func(offset int, value []byte, dt ValueType, err error) { + return + }, keys...) +} + +func TestArrayOutOfBounds(t *testing.T) { + var sb strings.Builder + //build data + sb.WriteString(`[`) + for i := 0; i < 61; i++ { + fmt.Fprintf(&sb, `%d`, i) + if i < 61 { + sb.WriteString(",") + } + } + sb.WriteString(`]`) + keys := [][]string{[]string{`[128]`}} + + data := []byte(sb.String()) + EachKey(data, func(offset int, value []byte, dt ValueType, err error) { + return + }, keys...) +} From aa3d476efa3454671a92892c8a8ede397a8b50c8 Mon Sep 17 00:00:00 2001 From: John Floren Date: Tue, 17 Nov 2020 14:54:55 -0700 Subject: [PATCH 2/2] remove extraneous bitwiseFlags variable --- parser.go | 9 --------- 1 file changed, 9 deletions(-) diff --git a/parser.go b/parser.go index 2d73376..b2f8759 100644 --- a/parser.go +++ b/parser.go @@ -4,7 +4,6 @@ import ( "bytes" "errors" "fmt" - "math" "strconv" ) @@ -356,14 +355,6 @@ func searchKeys(data []byte, keys ...string) int { return -1 } -var bitwiseFlags []int64 - -func init() { - for i := 0; i < 63; i++ { - bitwiseFlags = append(bitwiseFlags, int64(math.Pow(2, float64(i)))) - } -} - func sameTree(p1, p2 []string) bool { minLen := len(p1) if len(p2) < minLen {