Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize cases with long potential simple_keys #555

Merged
merged 3 commits into from Jan 21, 2020
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 5 additions & 0 deletions limit_test.go
Expand Up @@ -39,6 +39,7 @@ var limitTests = []struct {
{name: "1000kb of maps", data: []byte(`a: &a [{a}` + strings.Repeat(`,{a}`, 1000*1024/4-1) + `]`)},
{name: "1000kb slice nested at max-depth", data: []byte(strings.Repeat(`[`, 10000) + `1` + strings.Repeat(`,1`, 1000*1024/2-20000-1) + strings.Repeat(`]`, 10000))},
{name: "1000kb slice nested in maps at max-depth", data: []byte("{a,b:\n" + strings.Repeat(" {a,b:", 10000-2) + ` [1` + strings.Repeat(",1", 1000*1024/2-6*10000-1) + `]` + strings.Repeat(`}`, 10000-1))},
{name: "1000kb of 10000-nested lines", data: []byte(strings.Repeat(`- `+strings.Repeat(`[`, 10000)+strings.Repeat(`]`, 10000)+"\n", 1000*1024/20000))},
}

func (s *S) TestLimits(c *C) {
Expand Down Expand Up @@ -92,6 +93,10 @@ func BenchmarkDeepFlow(b *testing.B) {
benchmark(b, "1000kb slice nested in maps at max-depth")
}

func Benchmark1000KBMaxDepthNested(b *testing.B) {
benchmark(b, "1000kb of 10000-nested lines")
}

func benchmark(b *testing.B, name string) {
for _, t := range limitTests {
if t.name != name {
Expand Down
38 changes: 32 additions & 6 deletions scannerc.go
Expand Up @@ -633,15 +633,20 @@ func yaml_parser_fetch_more_tokens(parser *yaml_parser_t) bool {
// Queue is empty.
need_more_tokens = true
} else {
// Check for any now stale keys in a required position.
if !yaml_parser_stale_simple_keys(parser) {
return false
}
// Check if any potential simple key may occupy the head position.
for i := len(parser.simple_keys) - 1; i >= 0; i-- {
simple_key := &parser.simple_keys[i]
if !simple_key.possible {
break
}
cjcullen marked this conversation as resolved.
Show resolved Hide resolved
if simple_key.token_number < parser.tokens_parsed {
break
}
if valid, ok := yaml_simple_key_is_valid(parser, simple_key); !ok {
return false
} else if valid && simple_key.token_number == parser.tokens_parsed {
if simple_key.token_number == parser.tokens_parsed {
need_more_tokens = true
break
}
Expand Down Expand Up @@ -859,6 +864,20 @@ func yaml_simple_key_is_valid(parser *yaml_parser_t, simple_key *yaml_simple_key
return true, true
}

func yaml_parser_stale_simple_keys(parser *yaml_parser_t) bool {
// fmt.Println(parser.simple_keys_min_possible_index, ":", len(parser.simple_keys))
cjcullen marked this conversation as resolved.
Show resolved Hide resolved
for i := parser.simple_keys_min_possible_index; i < len(parser.simple_keys); i++ {
cjcullen marked this conversation as resolved.
Show resolved Hide resolved
simple_key := &parser.simple_keys[i]
if valid, ok := yaml_simple_key_is_valid(parser, simple_key); !ok {
return false
} else if !valid {
parser.simple_keys_min_possible_index = i + 1
}

}
return true
}

// Check if a simple key may start at the current position and add it if
// needed.
func yaml_parser_save_simple_key(parser *yaml_parser_t) bool {
Expand Down Expand Up @@ -897,9 +916,12 @@ func yaml_parser_remove_simple_key(parser *yaml_parser_t) bool {
"while scanning a simple key", parser.simple_keys[i].mark,
"could not find expected ':'")
}
// Remove the key from the stack.
parser.simple_keys[i].possible = false
if parser.simple_keys_min_possible_index > i {
parser.simple_keys_min_possible_index = i
}
}
// Remove the key from the stack.
parser.simple_keys[i].possible = false
return true
}

Expand Down Expand Up @@ -930,7 +952,11 @@ func yaml_parser_increase_flow_level(parser *yaml_parser_t) bool {
func yaml_parser_decrease_flow_level(parser *yaml_parser_t) bool {
if parser.flow_level > 0 {
parser.flow_level--
parser.simple_keys = parser.simple_keys[:len(parser.simple_keys)-1]
last := len(parser.simple_keys) - 1
parser.simple_keys = parser.simple_keys[:last]
if parser.simple_keys_min_possible_index > last {
parser.simple_keys_min_possible_index = last
}
}
return true
}
Expand Down
5 changes: 3 additions & 2 deletions yamlh.go
Expand Up @@ -577,8 +577,9 @@ type yaml_parser_t struct {
indent int // The current indentation level.
indents []int // The indentation levels stack.

simple_key_allowed bool // May a simple key occur at the current position?
simple_keys []yaml_simple_key_t // The stack of simple keys.
simple_key_allowed bool // May a simple key occur at the current position?
simple_keys []yaml_simple_key_t // The stack of simple keys.
simple_keys_min_possible_index int

// Parser stuff

Expand Down