Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize a couple of nested loops in the scanner. #537

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
12 changes: 12 additions & 0 deletions decode_test.go
Expand Up @@ -722,6 +722,18 @@ var unmarshalTests = []struct {
"a: 5.5\n",
&struct{ A jsonNumberT }{"5.5"},
},
{
`
a:
b
b:
? a
: a`,
&M{"a": "b",
"b": M{
"a": "a",
}},
},
}

type M map[interface{}]interface{}
Expand Down
10 changes: 10 additions & 0 deletions limit_test.go
Expand Up @@ -37,6 +37,8 @@ var limitTests = []struct {
{name: "10kb of maps", data: []byte(`a: &a [{a}` + strings.Repeat(`,{a}`, 10*1024/4-1) + `]`)},
{name: "100kb of maps", data: []byte(`a: &a [{a}` + strings.Repeat(`,{a}`, 100*1024/4-1) + `]`)},
{name: "1000kb of maps", data: []byte(`a: &a [{a}` + strings.Repeat(`,{a}`, 1000*1024/4-1) + `]`)},
{name: "1000kb of giant slice nested at max-depth", data: []byte(strings.Repeat(`[`, 10000) + `1` + strings.Repeat(`,1`, 1000*1024/2-20000-1) + strings.Repeat(`]`, 10000))},
{name: "1000kb of tokens nested at max-depth of non-simple-token maps", data: []byte("{a,a:\n" + strings.Repeat(" {a,a:", 10000-1) + `1` + strings.Repeat(",1", 1000*1024/2-6*10000-1) + strings.Repeat(`}`, 10000))},
}

func (s *S) TestLimits(c *C) {
Expand Down Expand Up @@ -82,6 +84,14 @@ func Benchmark1000KBMaps(b *testing.B) {
benchmark(b, "1000kb of maps")
}

func BenchmarkDeepSlice(b *testing.B) {
benchmark(b, "1000kb of giant slice nested at max-depth")
}

func BenchmarkDeepFlow(b *testing.B) {
benchmark(b, "1000kb of tokens nested at max-depth of non-simple-token maps")
}

func benchmark(b *testing.B, name string) {
for _, t := range limitTests {
if t.name != name {
Expand Down
49 changes: 36 additions & 13 deletions scannerc.go
Expand Up @@ -637,14 +637,7 @@ func yaml_parser_fetch_more_tokens(parser *yaml_parser_t) bool {
if !yaml_parser_stale_simple_keys(parser) {
return false
}

for i := range parser.simple_keys {
simple_key := &parser.simple_keys[i]
if simple_key.possible && simple_key.token_number == parser.tokens_parsed {
need_more_tokens = true
break
}
}
_, need_more_tokens = parser.simple_keys_possible_by_token_number[parser.tokens_parsed]
}

// We are finished.
Expand Down Expand Up @@ -841,14 +834,21 @@ func yaml_parser_fetch_next_token(parser *yaml_parser_t) bool {
// cannot contain simple keys anymore.
func yaml_parser_stale_simple_keys(parser *yaml_parser_t) bool {
// Check for a potential simple key for each flow level.
for i := range parser.simple_keys {
for i := parser.simple_keys_min_possible_index; i < len(parser.simple_keys); i++ {
simple_key := &parser.simple_keys[i]

if !simple_key.possible {
// If this key was already marked as not possible, just move on.
// This might be the uninitialized key at the top of the stack, so
// keep min_possible at i instead of i + 1.
parser.simple_keys_min_possible_index = i
continue
}
// The specification requires that a simple key
//
// - is limited to a single line,
// - is shorter than 1024 characters.
if simple_key.possible && (simple_key.mark.line < parser.mark.line || simple_key.mark.index+1024 < parser.mark.index) {
if simple_key.mark.line < parser.mark.line || simple_key.mark.index+1024 < parser.mark.index {

// Check if the potential simple key to be removed is required.
if simple_key.required {
Expand All @@ -857,8 +857,15 @@ func yaml_parser_stale_simple_keys(parser *yaml_parser_t) bool {
"could not find expected ':'")
}
simple_key.possible = false
delete(parser.simple_keys_possible_by_token_number, simple_key.token_number)
parser.simple_keys_min_possible_index = i + 1
} else {
// Once we find a key that's not stale, the rest will be on the
// same line and shorter, so don't need to be checked now.
break
}
}

return true
}

Expand All @@ -885,6 +892,8 @@ func yaml_parser_save_simple_key(parser *yaml_parser_t) bool {
if !yaml_parser_remove_simple_key(parser) {
return false
}
parser.simple_keys_possible_by_token_number[simple_key.token_number] = struct{}{}

parser.simple_keys[len(parser.simple_keys)-1] = simple_key
}
return true
Expand All @@ -900,9 +909,13 @@ func yaml_parser_remove_simple_key(parser *yaml_parser_t) bool {
"while scanning a simple key", parser.simple_keys[i].mark,
"could not find expected ':'")
}
// Remove the key from the stack.
parser.simple_keys[i].possible = false
delete(parser.simple_keys_possible_by_token_number, parser.simple_keys[i].token_number)
}
if parser.simple_keys_min_possible_index > i {
parser.simple_keys_min_possible_index = i
}
// Remove the key from the stack.
parser.simple_keys[i].possible = false
return true
}

Expand All @@ -928,7 +941,14 @@ func yaml_parser_increase_flow_level(parser *yaml_parser_t) bool {
func yaml_parser_decrease_flow_level(parser *yaml_parser_t) bool {
if parser.flow_level > 0 {
parser.flow_level--
parser.simple_keys = parser.simple_keys[:len(parser.simple_keys)-1]
last := len(parser.simple_keys) - 1
if parser.simple_keys[last].possible {
delete(parser.simple_keys_possible_by_token_number, parser.simple_keys[last].token_number)
}
parser.simple_keys = parser.simple_keys[:last]
if parser.simple_keys_min_possible_index > last {
parser.simple_keys_min_possible_index = last
}
}
return true
}
Expand Down Expand Up @@ -1005,6 +1025,8 @@ func yaml_parser_fetch_stream_start(parser *yaml_parser_t) bool {
// Initialize the simple key stack.
parser.simple_keys = append(parser.simple_keys, yaml_simple_key_t{})

parser.simple_keys_possible_by_token_number = make(map[int]struct{})

// A simple key is allowed at the beginning of the stream.
parser.simple_key_allowed = true

Expand Down Expand Up @@ -1304,6 +1326,7 @@ func yaml_parser_fetch_value(parser *yaml_parser_t) bool {

// Remove the simple key.
simple_key.possible = false
delete(parser.simple_keys_possible_by_token_number, simple_key.token_number)

// A simple key cannot follow another simple key.
parser.simple_key_allowed = false
Expand Down
6 changes: 4 additions & 2 deletions yamlh.go
Expand Up @@ -577,8 +577,10 @@ type yaml_parser_t struct {
indent int // The current indentation level.
indents []int // The indentation levels stack.

simple_key_allowed bool // May a simple key occur at the current position?
simple_keys []yaml_simple_key_t // The stack of simple keys.
simple_key_allowed bool // May a simple key occur at the current position?
simple_keys []yaml_simple_key_t // The stack of simple keys.
simple_keys_min_possible_index int // Where in the stack should we restart scanning for staleness?
simple_keys_possible_by_token_number map[int]struct{} // Does this token_number represent a possible simple_key?

// Parser stuff

Expand Down