Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Port stale simple_keys fix to v2 #543

Merged
merged 2 commits into from Nov 19, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
12 changes: 12 additions & 0 deletions decode_test.go
Expand Up @@ -722,6 +722,18 @@ var unmarshalTests = []struct {
"a: 5.5\n",
&struct{ A jsonNumberT }{"5.5"},
},
{
`
a:
b
b:
? a
: a`,
&M{"a": "b",
"b": M{
"a": "a",
}},
},
}

type M map[interface{}]interface{}
Expand Down
10 changes: 10 additions & 0 deletions limit_test.go
Expand Up @@ -37,6 +37,8 @@ var limitTests = []struct {
{name: "10kb of maps", data: []byte(`a: &a [{a}` + strings.Repeat(`,{a}`, 10*1024/4-1) + `]`)},
{name: "100kb of maps", data: []byte(`a: &a [{a}` + strings.Repeat(`,{a}`, 100*1024/4-1) + `]`)},
{name: "1000kb of maps", data: []byte(`a: &a [{a}` + strings.Repeat(`,{a}`, 1000*1024/4-1) + `]`)},
{name: "1000kb slice nested at max-depth", data: []byte(strings.Repeat(`[`, 10000) + `1` + strings.Repeat(`,1`, 1000*1024/2-20000-1) + strings.Repeat(`]`, 10000))},
{name: "1000kb slice nested in maps at max-depth", data: []byte("{a,b:\n" + strings.Repeat(" {a,b:", 10000-2) + ` [1` + strings.Repeat(",1", 1000*1024/2-6*10000-1) + `]` + strings.Repeat(`}`, 10000-1))},
}

func (s *S) TestLimits(c *C) {
Expand Down Expand Up @@ -82,6 +84,14 @@ func Benchmark1000KBMaps(b *testing.B) {
benchmark(b, "1000kb of maps")
}

func BenchmarkDeepSlice(b *testing.B) {
benchmark(b, "1000kb slice nested at max-depth")
}

func BenchmarkDeepFlow(b *testing.B) {
benchmark(b, "1000kb slice nested in maps at max-depth")
}

func benchmark(b *testing.B, name string) {
for _, t := range limitTests {
if t.name != name {
Expand Down
69 changes: 35 additions & 34 deletions scannerc.go
Expand Up @@ -634,13 +634,12 @@ func yaml_parser_fetch_more_tokens(parser *yaml_parser_t) bool {
need_more_tokens = true
} else {
// Check if any potential simple key may occupy the head position.
if !yaml_parser_stale_simple_keys(parser) {
return false
}

for i := range parser.simple_keys {
for i := len(parser.simple_keys) - 1; i >= 0; i-- {
simple_key := &parser.simple_keys[i]
if simple_key.possible && simple_key.token_number == parser.tokens_parsed {
if simple_key.token_number < parser.tokens_parsed {
break
}
if yaml_simple_key_is_valid(parser, simple_key) && simple_key.token_number == parser.tokens_parsed {
need_more_tokens = true
break
}
Expand Down Expand Up @@ -678,11 +677,6 @@ func yaml_parser_fetch_next_token(parser *yaml_parser_t) bool {
return false
}

// Remove obsolete potential simple keys.
if !yaml_parser_stale_simple_keys(parser) {
return false
}

// Check the indentation level against the current column.
if !yaml_parser_unroll_indent(parser, parser.mark.column) {
return false
Expand Down Expand Up @@ -837,27 +831,28 @@ func yaml_parser_fetch_next_token(parser *yaml_parser_t) bool {
"found character that cannot start any token")
}

// Check the list of potential simple keys and remove the positions that
// cannot contain simple keys anymore.
func yaml_parser_stale_simple_keys(parser *yaml_parser_t) bool {
// Check for a potential simple key for each flow level.
for i := range parser.simple_keys {
simple_key := &parser.simple_keys[i]

// The specification requires that a simple key
//
// - is limited to a single line,
// - is shorter than 1024 characters.
if simple_key.possible && (simple_key.mark.line < parser.mark.line || simple_key.mark.index+1024 < parser.mark.index) {

// Check if the potential simple key to be removed is required.
if simple_key.required {
return yaml_parser_set_scanner_error(parser,
"while scanning a simple key", simple_key.mark,
"could not find expected ':'")
}
simple_key.possible = false
func yaml_simple_key_is_valid(parser *yaml_parser_t, simple_key *yaml_simple_key_t) bool {
if !simple_key.possible {
return false
}

// The 1.2 specification says:
//
// "If the ? indicator is omitted, parsing needs to see past the
// implicit key to recognize it as such. To limit the amount of
// lookahead required, the “:” indicator must appear at most 1024
// Unicode characters beyond the start of the key. In addition, the key
// is restricted to a single line."
//
if simple_key.mark.line < parser.mark.line || simple_key.mark.index+1024 < parser.mark.index {
// Check if the potential simple key to be removed is required.
if simple_key.required {
return yaml_parser_set_scanner_error(parser,
"while scanning a simple key", simple_key.mark,
"could not find expected ':'")
}
simple_key.possible = false
return false
}
return true
}
Expand All @@ -879,8 +874,8 @@ func yaml_parser_save_simple_key(parser *yaml_parser_t) bool {
possible: true,
required: required,
token_number: parser.tokens_parsed + (len(parser.tokens) - parser.tokens_head),
mark: parser.mark,
}
simple_key.mark = parser.mark

if !yaml_parser_remove_simple_key(parser) {
return false
Expand Down Expand Up @@ -912,7 +907,12 @@ const max_flow_level = 10000
// Increase the flow level and resize the simple key list if needed.
func yaml_parser_increase_flow_level(parser *yaml_parser_t) bool {
// Reset the simple key on the next level.
parser.simple_keys = append(parser.simple_keys, yaml_simple_key_t{})
parser.simple_keys = append(parser.simple_keys, yaml_simple_key_t{
possible: false,
required: false,
token_number: parser.tokens_parsed + (len(parser.tokens) - parser.tokens_head),
mark: parser.mark,
})

// Increase the flow level.
parser.flow_level++
Expand Down Expand Up @@ -1286,7 +1286,8 @@ func yaml_parser_fetch_value(parser *yaml_parser_t) bool {
simple_key := &parser.simple_keys[len(parser.simple_keys)-1]

// Have we found a simple key?
if simple_key.possible {
if yaml_simple_key_is_valid(parser, simple_key) {

// Create the KEY token and insert it into the queue.
token := yaml_token_t{
typ: yaml_KEY_TOKEN,
Expand Down