Skip to content

Commit

Permalink
Improve heuristics preventing CPU/memory abuse (#515)
Browse files Browse the repository at this point in the history
This is a forward port of v2 commit f221b84 by Jordan Liggitt.
  • Loading branch information
niemeyer committed Oct 10, 2019
1 parent c8b7dad commit 05262c8
Show file tree
Hide file tree
Showing 3 changed files with 159 additions and 1 deletion.
31 changes: 30 additions & 1 deletion decode.go
Expand Up @@ -439,12 +439,41 @@ func (d *decoder) fieldByIndex(n *Node, v reflect.Value, index []int) (field ref
return v
}

const (
// 400,000 decode operations is ~500kb of dense object declarations, or
// ~5kb of dense object declarations with 10000% alias expansion
alias_ratio_range_low = 400000

// 4,000,000 decode operations is ~5MB of dense object declarations, or
// ~4.5MB of dense object declarations with 10% alias expansion
alias_ratio_range_high = 4000000

// alias_ratio_range is the range over which we scale allowed alias ratios
alias_ratio_range = float64(alias_ratio_range_high - alias_ratio_range_low)
)

func allowedAliasRatio(decodeCount int) float64 {
switch {
case decodeCount <= alias_ratio_range_low:
// allow 99% to come from alias expansion for small-to-medium documents
return 0.99
case decodeCount >= alias_ratio_range_high:
// allow 10% to come from alias expansion for very large documents
return 0.10
default:
// scale smoothly from 99% down to 10% over the range.
// this maps to 396,000 - 400,000 allowed alias-driven decodes over the range.
// 400,000 decode operations is ~100MB of allocations in worst-case scenarios (single-item maps).
return 0.99 - 0.89*(float64(decodeCount-alias_ratio_range_low)/alias_ratio_range)
}
}

func (d *decoder) unmarshal(n *Node, out reflect.Value) (good bool) {
d.decodeCount++
if d.aliasDepth > 0 {
d.aliasCount++
}
if d.aliasCount > 100 && d.decodeCount > 1000 && float64(d.aliasCount)/float64(d.decodeCount) > 0.99 {
if d.aliasCount > 100 && d.decodeCount > 1000 && float64(d.aliasCount)/float64(d.decodeCount) > allowedAliasRatio(d.decodeCount) {
failf("document contains excessive aliasing")
}
if out.Type() == nodeType {
Expand Down
113 changes: 113 additions & 0 deletions limit_test.go
@@ -0,0 +1,113 @@
package yaml_test

import (
"strings"
"testing"

. "gopkg.in/check.v1"
"gopkg.in/yaml.v2"
)

var limitTests = []struct {
name string
data []byte
error string
}{
{
name: "1000kb of maps with 100 aliases",
data: []byte(`{a: &a [{a}` + strings.Repeat(`,{a}`, 1000*1024/4-100) + `], b: &b [*a` + strings.Repeat(`,*a`, 99) + `]}`),
error: "yaml: document contains excessive aliasing",
}, {
name: "1000kb of deeply nested slices",
data: []byte(strings.Repeat(`[`, 1000*1024)),
error: "yaml: exceeded max depth of 10000",
}, {
name: "1000kb of deeply nested maps",
data: []byte("x: " + strings.Repeat(`{`, 1000*1024)),
error: "yaml: exceeded max depth of 10000",
}, {
name: "1000kb of deeply nested indents",
data: []byte(strings.Repeat(`- `, 1000*1024)),
error: "yaml: exceeded max depth of 10000",
}, {
name: "1000kb of 1000-indent lines",
data: []byte(strings.Repeat(strings.Repeat(`- `, 1000)+"\n", 1024/2)),
},
{name: "1kb of maps", data: []byte(`a: &a [{a}` + strings.Repeat(`,{a}`, 1*1024/4-1) + `]`)},
{name: "10kb of maps", data: []byte(`a: &a [{a}` + strings.Repeat(`,{a}`, 10*1024/4-1) + `]`)},
{name: "100kb of maps", data: []byte(`a: &a [{a}` + strings.Repeat(`,{a}`, 100*1024/4-1) + `]`)},
{name: "1000kb of maps", data: []byte(`a: &a [{a}` + strings.Repeat(`,{a}`, 1000*1024/4-1) + `]`)},
}

func (s *S) TestLimits(c *C) {
if testing.Short() {
return
}
for _, tc := range limitTests {
var v interface{}
err := yaml.Unmarshal(tc.data, &v)
if len(tc.error) > 0 {
c.Assert(err, ErrorMatches, tc.error, Commentf("testcase: %s", tc.name))
} else {
c.Assert(err, IsNil, Commentf("testcase: %s", tc.name))
}
}
}

func Benchmark1000KB100Aliases(b *testing.B) {
benchmark(b, "1000kb of maps with 100 aliases")
}
func Benchmark1000KBDeeplyNestedSlices(b *testing.B) {
benchmark(b, "1000kb of deeply nested slices")
}
func Benchmark1000KBDeeplyNestedMaps(b *testing.B) {
benchmark(b, "1000kb of deeply nested maps")
}
func Benchmark1000KBDeeplyNestedIndents(b *testing.B) {
benchmark(b, "1000kb of deeply nested indents")
}
func Benchmark1000KB1000IndentLines(b *testing.B) {
benchmark(b, "1000kb of 1000-indent lines")
}
func Benchmark1KBMaps(b *testing.B) {
benchmark(b, "1kb of maps")
}
func Benchmark10KBMaps(b *testing.B) {
benchmark(b, "10kb of maps")
}
func Benchmark100KBMaps(b *testing.B) {
benchmark(b, "100kb of maps")
}
func Benchmark1000KBMaps(b *testing.B) {
benchmark(b, "1000kb of maps")
}

func benchmark(b *testing.B, name string) {
for _, t := range limitTests {
if t.name != name {
continue
}

b.ResetTimer()

for i := 0; i < b.N; i++ {
var v interface{}
err := yaml.Unmarshal(t.data, &v)
if len(t.error) > 0 {
if err == nil {
b.Errorf("expected error, got none")
} else if err.Error() != t.error {
b.Errorf("expected error '%s', got '%s'", t.error, err.Error())
}
} else {
if err != nil {
b.Errorf("unexpected error: %v", err)
}
}
}

return
}

b.Errorf("testcase %q not found", name)
}
16 changes: 16 additions & 0 deletions scannerc.go
Expand Up @@ -961,13 +961,21 @@ func yaml_parser_remove_simple_key(parser *yaml_parser_t) bool {
return true
}

// max_flow_level limits the flow_level
const max_flow_level = 10000

// Increase the flow level and resize the simple key list if needed.
func yaml_parser_increase_flow_level(parser *yaml_parser_t) bool {
// Reset the simple key on the next level.
parser.simple_keys = append(parser.simple_keys, yaml_simple_key_t{})

// Increase the flow level.
parser.flow_level++
if parser.flow_level > max_flow_level {
return yaml_parser_set_scanner_error(parser,
"while increasing flow level", parser.simple_keys[len(parser.simple_keys)-1].mark,
fmt.Sprintf("exceeded max depth of %d", max_flow_level))
}
return true
}

Expand All @@ -980,6 +988,9 @@ func yaml_parser_decrease_flow_level(parser *yaml_parser_t) bool {
return true
}

// max_indents limits the indents stack size
const max_indents = 10000

// Push the current indentation level to the stack and set the new level
// the current column is greater than the indentation level. In this case,
// append or insert the specified token into the token queue.
Expand All @@ -994,6 +1005,11 @@ func yaml_parser_roll_indent(parser *yaml_parser_t, column, number int, typ yaml
// indentation level.
parser.indents = append(parser.indents, parser.indent)
parser.indent = column
if len(parser.indents) > max_indents {
return yaml_parser_set_scanner_error(parser,
"while increasing indent level", parser.simple_keys[len(parser.simple_keys)-1].mark,
fmt.Sprintf("exceeded max depth of %d", max_indents))
}

// Create a token and insert it into the queue.
token := yaml_token_t{
Expand Down

0 comments on commit 05262c8

Please sign in to comment.