From 05262c8778b068d5a6be0044ad83d6c775e1f8c9 Mon Sep 17 00:00:00 2001 From: Gustavo Niemeyer Date: Thu, 10 Oct 2019 10:08:21 +0100 Subject: [PATCH] Improve heuristics preventing CPU/memory abuse (#515) This is a forward port of v2 commit f221b84 by Jordan Liggitt. --- decode.go | 31 +++++++++++++- limit_test.go | 113 ++++++++++++++++++++++++++++++++++++++++++++++++++ scannerc.go | 16 +++++++ 3 files changed, 159 insertions(+), 1 deletion(-) create mode 100644 limit_test.go diff --git a/decode.go b/decode.go index 7c15d987..be63169b 100644 --- a/decode.go +++ b/decode.go @@ -439,12 +439,41 @@ func (d *decoder) fieldByIndex(n *Node, v reflect.Value, index []int) (field ref return v } +const ( + // 400,000 decode operations is ~500kb of dense object declarations, or + // ~5kb of dense object declarations with 10000% alias expansion + alias_ratio_range_low = 400000 + + // 4,000,000 decode operations is ~5MB of dense object declarations, or + // ~4.5MB of dense object declarations with 10% alias expansion + alias_ratio_range_high = 4000000 + + // alias_ratio_range is the range over which we scale allowed alias ratios + alias_ratio_range = float64(alias_ratio_range_high - alias_ratio_range_low) +) + +func allowedAliasRatio(decodeCount int) float64 { + switch { + case decodeCount <= alias_ratio_range_low: + // allow 99% to come from alias expansion for small-to-medium documents + return 0.99 + case decodeCount >= alias_ratio_range_high: + // allow 10% to come from alias expansion for very large documents + return 0.10 + default: + // scale smoothly from 99% down to 10% over the range. + // this maps to 396,000 - 400,000 allowed alias-driven decodes over the range. + // 400,000 decode operations is ~100MB of allocations in worst-case scenarios (single-item maps). + return 0.99 - 0.89*(float64(decodeCount-alias_ratio_range_low)/alias_ratio_range) + } +} + func (d *decoder) unmarshal(n *Node, out reflect.Value) (good bool) { d.decodeCount++ if d.aliasDepth > 0 { d.aliasCount++ } - if d.aliasCount > 100 && d.decodeCount > 1000 && float64(d.aliasCount)/float64(d.decodeCount) > 0.99 { + if d.aliasCount > 100 && d.decodeCount > 1000 && float64(d.aliasCount)/float64(d.decodeCount) > allowedAliasRatio(d.decodeCount) { failf("document contains excessive aliasing") } if out.Type() == nodeType { diff --git a/limit_test.go b/limit_test.go new file mode 100644 index 00000000..ba1c0800 --- /dev/null +++ b/limit_test.go @@ -0,0 +1,113 @@ +package yaml_test + +import ( + "strings" + "testing" + + . "gopkg.in/check.v1" + "gopkg.in/yaml.v2" +) + +var limitTests = []struct { + name string + data []byte + error string +}{ + { + name: "1000kb of maps with 100 aliases", + data: []byte(`{a: &a [{a}` + strings.Repeat(`,{a}`, 1000*1024/4-100) + `], b: &b [*a` + strings.Repeat(`,*a`, 99) + `]}`), + error: "yaml: document contains excessive aliasing", + }, { + name: "1000kb of deeply nested slices", + data: []byte(strings.Repeat(`[`, 1000*1024)), + error: "yaml: exceeded max depth of 10000", + }, { + name: "1000kb of deeply nested maps", + data: []byte("x: " + strings.Repeat(`{`, 1000*1024)), + error: "yaml: exceeded max depth of 10000", + }, { + name: "1000kb of deeply nested indents", + data: []byte(strings.Repeat(`- `, 1000*1024)), + error: "yaml: exceeded max depth of 10000", + }, { + name: "1000kb of 1000-indent lines", + data: []byte(strings.Repeat(strings.Repeat(`- `, 1000)+"\n", 1024/2)), + }, + {name: "1kb of maps", data: []byte(`a: &a [{a}` + strings.Repeat(`,{a}`, 1*1024/4-1) + `]`)}, + {name: "10kb of maps", data: []byte(`a: &a [{a}` + strings.Repeat(`,{a}`, 10*1024/4-1) + `]`)}, + {name: "100kb of maps", data: []byte(`a: &a [{a}` + strings.Repeat(`,{a}`, 100*1024/4-1) + `]`)}, + {name: "1000kb of maps", data: []byte(`a: &a [{a}` + strings.Repeat(`,{a}`, 1000*1024/4-1) + `]`)}, +} + +func (s *S) TestLimits(c *C) { + if testing.Short() { + return + } + for _, tc := range limitTests { + var v interface{} + err := yaml.Unmarshal(tc.data, &v) + if len(tc.error) > 0 { + c.Assert(err, ErrorMatches, tc.error, Commentf("testcase: %s", tc.name)) + } else { + c.Assert(err, IsNil, Commentf("testcase: %s", tc.name)) + } + } +} + +func Benchmark1000KB100Aliases(b *testing.B) { + benchmark(b, "1000kb of maps with 100 aliases") +} +func Benchmark1000KBDeeplyNestedSlices(b *testing.B) { + benchmark(b, "1000kb of deeply nested slices") +} +func Benchmark1000KBDeeplyNestedMaps(b *testing.B) { + benchmark(b, "1000kb of deeply nested maps") +} +func Benchmark1000KBDeeplyNestedIndents(b *testing.B) { + benchmark(b, "1000kb of deeply nested indents") +} +func Benchmark1000KB1000IndentLines(b *testing.B) { + benchmark(b, "1000kb of 1000-indent lines") +} +func Benchmark1KBMaps(b *testing.B) { + benchmark(b, "1kb of maps") +} +func Benchmark10KBMaps(b *testing.B) { + benchmark(b, "10kb of maps") +} +func Benchmark100KBMaps(b *testing.B) { + benchmark(b, "100kb of maps") +} +func Benchmark1000KBMaps(b *testing.B) { + benchmark(b, "1000kb of maps") +} + +func benchmark(b *testing.B, name string) { + for _, t := range limitTests { + if t.name != name { + continue + } + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + var v interface{} + err := yaml.Unmarshal(t.data, &v) + if len(t.error) > 0 { + if err == nil { + b.Errorf("expected error, got none") + } else if err.Error() != t.error { + b.Errorf("expected error '%s', got '%s'", t.error, err.Error()) + } + } else { + if err != nil { + b.Errorf("unexpected error: %v", err) + } + } + } + + return + } + + b.Errorf("testcase %q not found", name) +} diff --git a/scannerc.go b/scannerc.go index 4a9c6564..e33f4959 100644 --- a/scannerc.go +++ b/scannerc.go @@ -961,6 +961,9 @@ func yaml_parser_remove_simple_key(parser *yaml_parser_t) bool { return true } +// max_flow_level limits the flow_level +const max_flow_level = 10000 + // Increase the flow level and resize the simple key list if needed. func yaml_parser_increase_flow_level(parser *yaml_parser_t) bool { // Reset the simple key on the next level. @@ -968,6 +971,11 @@ func yaml_parser_increase_flow_level(parser *yaml_parser_t) bool { // Increase the flow level. parser.flow_level++ + if parser.flow_level > max_flow_level { + return yaml_parser_set_scanner_error(parser, + "while increasing flow level", parser.simple_keys[len(parser.simple_keys)-1].mark, + fmt.Sprintf("exceeded max depth of %d", max_flow_level)) + } return true } @@ -980,6 +988,9 @@ func yaml_parser_decrease_flow_level(parser *yaml_parser_t) bool { return true } +// max_indents limits the indents stack size +const max_indents = 10000 + // Push the current indentation level to the stack and set the new level // the current column is greater than the indentation level. In this case, // append or insert the specified token into the token queue. @@ -994,6 +1005,11 @@ func yaml_parser_roll_indent(parser *yaml_parser_t, column, number int, typ yaml // indentation level. parser.indents = append(parser.indents, parser.indent) parser.indent = column + if len(parser.indents) > max_indents { + return yaml_parser_set_scanner_error(parser, + "while increasing indent level", parser.simple_keys[len(parser.simple_keys)-1].mark, + fmt.Sprintf("exceeded max depth of %d", max_indents)) + } // Create a token and insert it into the queue. token := yaml_token_t{