diff --git a/decode_test.go b/decode_test.go index f3af685e..1c80cbf4 100644 --- a/decode_test.go +++ b/decode_test.go @@ -722,6 +722,18 @@ var unmarshalTests = []struct { "a: 5.5\n", &struct{ A jsonNumberT }{"5.5"}, }, + { + ` +a: + b +b: + ? a + : a`, + &M{"a": "b", + "b": M{ + "a": "a", + }}, + }, } type M map[interface{}]interface{} diff --git a/limit_test.go b/limit_test.go index ba1c0800..f160c873 100644 --- a/limit_test.go +++ b/limit_test.go @@ -37,6 +37,8 @@ var limitTests = []struct { {name: "10kb of maps", data: []byte(`a: &a [{a}` + strings.Repeat(`,{a}`, 10*1024/4-1) + `]`)}, {name: "100kb of maps", data: []byte(`a: &a [{a}` + strings.Repeat(`,{a}`, 100*1024/4-1) + `]`)}, {name: "1000kb of maps", data: []byte(`a: &a [{a}` + strings.Repeat(`,{a}`, 1000*1024/4-1) + `]`)}, + {name: "1000kb slice nested at max-depth", data: []byte(strings.Repeat(`[`, 10000) + `1` + strings.Repeat(`,1`, 1000*1024/2-20000-1) + strings.Repeat(`]`, 10000))}, + {name: "1000kb slice nested in maps at max-depth", data: []byte("{a,b:\n" + strings.Repeat(" {a,b:", 10000-2) + ` [1` + strings.Repeat(",1", 1000*1024/2-6*10000-1) + `]` + strings.Repeat(`}`, 10000-1))}, } func (s *S) TestLimits(c *C) { @@ -82,6 +84,14 @@ func Benchmark1000KBMaps(b *testing.B) { benchmark(b, "1000kb of maps") } +func BenchmarkDeepSlice(b *testing.B) { + benchmark(b, "1000kb slice nested at max-depth") +} + +func BenchmarkDeepFlow(b *testing.B) { + benchmark(b, "1000kb slice nested in maps at max-depth") +} + func benchmark(b *testing.B, name string) { for _, t := range limitTests { if t.name != name { diff --git a/scannerc.go b/scannerc.go index 570b8ecd..b044a9cd 100644 --- a/scannerc.go +++ b/scannerc.go @@ -634,13 +634,12 @@ func yaml_parser_fetch_more_tokens(parser *yaml_parser_t) bool { need_more_tokens = true } else { // Check if any potential simple key may occupy the head position. - if !yaml_parser_stale_simple_keys(parser) { - return false - } - - for i := range parser.simple_keys { + for i := len(parser.simple_keys) - 1; i >= 0; i-- { simple_key := &parser.simple_keys[i] - if simple_key.possible && simple_key.token_number == parser.tokens_parsed { + if simple_key.token_number < parser.tokens_parsed { + break + } + if yaml_simple_key_is_valid(parser, simple_key) && simple_key.token_number == parser.tokens_parsed { need_more_tokens = true break } @@ -678,11 +677,6 @@ func yaml_parser_fetch_next_token(parser *yaml_parser_t) bool { return false } - // Remove obsolete potential simple keys. - if !yaml_parser_stale_simple_keys(parser) { - return false - } - // Check the indentation level against the current column. if !yaml_parser_unroll_indent(parser, parser.mark.column) { return false @@ -837,27 +831,28 @@ func yaml_parser_fetch_next_token(parser *yaml_parser_t) bool { "found character that cannot start any token") } -// Check the list of potential simple keys and remove the positions that -// cannot contain simple keys anymore. -func yaml_parser_stale_simple_keys(parser *yaml_parser_t) bool { - // Check for a potential simple key for each flow level. - for i := range parser.simple_keys { - simple_key := &parser.simple_keys[i] - - // The specification requires that a simple key - // - // - is limited to a single line, - // - is shorter than 1024 characters. - if simple_key.possible && (simple_key.mark.line < parser.mark.line || simple_key.mark.index+1024 < parser.mark.index) { - - // Check if the potential simple key to be removed is required. - if simple_key.required { - return yaml_parser_set_scanner_error(parser, - "while scanning a simple key", simple_key.mark, - "could not find expected ':'") - } - simple_key.possible = false +func yaml_simple_key_is_valid(parser *yaml_parser_t, simple_key *yaml_simple_key_t) bool { + if !simple_key.possible { + return false + } + + // The 1.2 specification says: + // + // "If the ? indicator is omitted, parsing needs to see past the + // implicit key to recognize it as such. To limit the amount of + // lookahead required, the “:” indicator must appear at most 1024 + // Unicode characters beyond the start of the key. In addition, the key + // is restricted to a single line." + // + if simple_key.mark.line < parser.mark.line || simple_key.mark.index+1024 < parser.mark.index { + // Check if the potential simple key to be removed is required. + if simple_key.required { + return yaml_parser_set_scanner_error(parser, + "while scanning a simple key", simple_key.mark, + "could not find expected ':'") } + simple_key.possible = false + return false } return true } @@ -879,8 +874,8 @@ func yaml_parser_save_simple_key(parser *yaml_parser_t) bool { possible: true, required: required, token_number: parser.tokens_parsed + (len(parser.tokens) - parser.tokens_head), + mark: parser.mark, } - simple_key.mark = parser.mark if !yaml_parser_remove_simple_key(parser) { return false @@ -912,7 +907,12 @@ const max_flow_level = 10000 // Increase the flow level and resize the simple key list if needed. func yaml_parser_increase_flow_level(parser *yaml_parser_t) bool { // Reset the simple key on the next level. - parser.simple_keys = append(parser.simple_keys, yaml_simple_key_t{}) + parser.simple_keys = append(parser.simple_keys, yaml_simple_key_t{ + possible: false, + required: false, + token_number: parser.tokens_parsed + (len(parser.tokens) - parser.tokens_head), + mark: parser.mark, + }) // Increase the flow level. parser.flow_level++ @@ -1286,7 +1286,8 @@ func yaml_parser_fetch_value(parser *yaml_parser_t) bool { simple_key := &parser.simple_keys[len(parser.simple_keys)-1] // Have we found a simple key? - if simple_key.possible { + if yaml_simple_key_is_valid(parser, simple_key) { + // Create the KEY token and insert it into the queue. token := yaml_token_t{ typ: yaml_KEY_TOKEN,