diff --git a/decode_test.go b/decode_test.go index f3af685e..1c80cbf4 100644 --- a/decode_test.go +++ b/decode_test.go @@ -722,6 +722,18 @@ var unmarshalTests = []struct { "a: 5.5\n", &struct{ A jsonNumberT }{"5.5"}, }, + { + ` +a: + b +b: + ? a + : a`, + &M{"a": "b", + "b": M{ + "a": "a", + }}, + }, } type M map[interface{}]interface{} diff --git a/limit_test.go b/limit_test.go index ba1c0800..e16dedc2 100644 --- a/limit_test.go +++ b/limit_test.go @@ -37,6 +37,8 @@ var limitTests = []struct { {name: "10kb of maps", data: []byte(`a: &a [{a}` + strings.Repeat(`,{a}`, 10*1024/4-1) + `]`)}, {name: "100kb of maps", data: []byte(`a: &a [{a}` + strings.Repeat(`,{a}`, 100*1024/4-1) + `]`)}, {name: "1000kb of maps", data: []byte(`a: &a [{a}` + strings.Repeat(`,{a}`, 1000*1024/4-1) + `]`)}, + {name: "1000kb of giant slice nested at max-depth", data: []byte(strings.Repeat(`[`, 10000) + `1` + strings.Repeat(`,1`, 1000*1024/2-20000-1) + strings.Repeat(`]`, 10000))}, + {name: "1000kb of tokens nested at max-depth of non-simple-token maps", data: []byte("{a,a:\n" + strings.Repeat(" {a,a:", 10000-1) + `1` + strings.Repeat(",1", 1000*1024/2-6*10000-1) + strings.Repeat(`}`, 10000))}, } func (s *S) TestLimits(c *C) { @@ -82,6 +84,14 @@ func Benchmark1000KBMaps(b *testing.B) { benchmark(b, "1000kb of maps") } +func BenchmarkDeepSlice(b *testing.B) { + benchmark(b, "1000kb of giant slice nested at max-depth") +} + +func BenchmarkDeepFlow(b *testing.B) { + benchmark(b, "1000kb of tokens nested at max-depth of non-simple-token maps") +} + func benchmark(b *testing.B, name string) { for _, t := range limitTests { if t.name != name { diff --git a/scannerc.go b/scannerc.go index 570b8ecd..9998d202 100644 --- a/scannerc.go +++ b/scannerc.go @@ -637,14 +637,7 @@ func yaml_parser_fetch_more_tokens(parser *yaml_parser_t) bool { if !yaml_parser_stale_simple_keys(parser) { return false } - - for i := range parser.simple_keys { - simple_key := &parser.simple_keys[i] - if simple_key.possible && simple_key.token_number == parser.tokens_parsed { - need_more_tokens = true - break - } - } + _, need_more_tokens = parser.simple_keys_possible_by_token_number[parser.tokens_parsed] } // We are finished. @@ -841,14 +834,21 @@ func yaml_parser_fetch_next_token(parser *yaml_parser_t) bool { // cannot contain simple keys anymore. func yaml_parser_stale_simple_keys(parser *yaml_parser_t) bool { // Check for a potential simple key for each flow level. - for i := range parser.simple_keys { + for i := parser.simple_keys_min_possible_index; i < len(parser.simple_keys); i++ { simple_key := &parser.simple_keys[i] + if !simple_key.possible { + // If this key was already marked as not possible, just move on. + // This might be the uninitialized key at the top of the stack, so + // keep min_possible at i instead of i + 1. + parser.simple_keys_min_possible_index = i + continue + } // The specification requires that a simple key // // - is limited to a single line, // - is shorter than 1024 characters. - if simple_key.possible && (simple_key.mark.line < parser.mark.line || simple_key.mark.index+1024 < parser.mark.index) { + if simple_key.mark.line < parser.mark.line || simple_key.mark.index+1024 < parser.mark.index { // Check if the potential simple key to be removed is required. if simple_key.required { @@ -857,8 +857,15 @@ func yaml_parser_stale_simple_keys(parser *yaml_parser_t) bool { "could not find expected ':'") } simple_key.possible = false + delete(parser.simple_keys_possible_by_token_number, simple_key.token_number) + parser.simple_keys_min_possible_index = i + 1 + } else { + // Once we find a key that's not stale, the rest will be on the + // same line and shorter, so don't need to be checked now. + break } } + return true } @@ -885,6 +892,8 @@ func yaml_parser_save_simple_key(parser *yaml_parser_t) bool { if !yaml_parser_remove_simple_key(parser) { return false } + parser.simple_keys_possible_by_token_number[simple_key.token_number] = struct{}{} + parser.simple_keys[len(parser.simple_keys)-1] = simple_key } return true @@ -900,9 +909,13 @@ func yaml_parser_remove_simple_key(parser *yaml_parser_t) bool { "while scanning a simple key", parser.simple_keys[i].mark, "could not find expected ':'") } + // Remove the key from the stack. + parser.simple_keys[i].possible = false + delete(parser.simple_keys_possible_by_token_number, parser.simple_keys[i].token_number) + } + if parser.simple_keys_min_possible_index > i { + parser.simple_keys_min_possible_index = i } - // Remove the key from the stack. - parser.simple_keys[i].possible = false return true } @@ -928,7 +941,14 @@ func yaml_parser_increase_flow_level(parser *yaml_parser_t) bool { func yaml_parser_decrease_flow_level(parser *yaml_parser_t) bool { if parser.flow_level > 0 { parser.flow_level-- - parser.simple_keys = parser.simple_keys[:len(parser.simple_keys)-1] + last := len(parser.simple_keys) - 1 + if parser.simple_keys[last].possible { + delete(parser.simple_keys_possible_by_token_number, parser.simple_keys[last].token_number) + } + parser.simple_keys = parser.simple_keys[:last] + if parser.simple_keys_min_possible_index > last { + parser.simple_keys_min_possible_index = last + } } return true } @@ -1005,6 +1025,8 @@ func yaml_parser_fetch_stream_start(parser *yaml_parser_t) bool { // Initialize the simple key stack. parser.simple_keys = append(parser.simple_keys, yaml_simple_key_t{}) + parser.simple_keys_possible_by_token_number = make(map[int]struct{}) + // A simple key is allowed at the beginning of the stream. parser.simple_key_allowed = true @@ -1304,6 +1326,7 @@ func yaml_parser_fetch_value(parser *yaml_parser_t) bool { // Remove the simple key. simple_key.possible = false + delete(parser.simple_keys_possible_by_token_number, simple_key.token_number) // A simple key cannot follow another simple key. parser.simple_key_allowed = false diff --git a/yamlh.go b/yamlh.go index e25cee56..2a6b8385 100644 --- a/yamlh.go +++ b/yamlh.go @@ -577,8 +577,10 @@ type yaml_parser_t struct { indent int // The current indentation level. indents []int // The indentation levels stack. - simple_key_allowed bool // May a simple key occur at the current position? - simple_keys []yaml_simple_key_t // The stack of simple keys. + simple_key_allowed bool // May a simple key occur at the current position? + simple_keys []yaml_simple_key_t // The stack of simple keys. + simple_keys_min_possible_index int // Where in the stack should we restart scanning for staleness? + simple_keys_possible_by_token_number map[int]struct{} // Does this token_number represent a possible simple_key? // Parser stuff