Skip to content

Commit

Permalink
topdown/object: Rework object.union_n to use in-place merge algorithm.
Browse files Browse the repository at this point in the history
This commit fixes a performance regression for the object.union_n builtin,
discovered in issue open-policy-agent#4985.

The original logic for the builtin did pairwise mergeWithOverwrite calls
between the input Objects, resulting in many wasted intermediate result
Objects that were almost immediately discarded. The updated builtin uses
a new algorithm to do a single pass across the input Objects, respecting
the "last assignment wins, with merges" semantics of the original builtin
implementation.

In the included benchmarks, this provides a 2x speed and 2-3x memory
efficiency improvement over using a pure-Rego comprehension to do the
same job, and a 6x or greater improvement over the original implementation
on all metrics as input Object arrays grow larger.

Fixes open-policy-agent#4985

Signed-off-by: Philip Conrad <philipaconrad@gmail.com>
  • Loading branch information
philipaconrad committed Sep 13, 2022
1 parent 478812c commit f5cf66e
Show file tree
Hide file tree
Showing 3 changed files with 248 additions and 10 deletions.
55 changes: 45 additions & 10 deletions topdown/object.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,21 +32,30 @@ func builtinObjectUnionN(_ BuiltinContext, operands []*ast.Term, iter func(*ast.
return err
}

r := ast.NewObject()
err = arr.Iter(func(t *ast.Term) error {
var o ast.Object
o, err = builtins.ObjectOperand(t.Value, 1)
// Because we need merge-with-overwrite behavior, we can iterate
// back-to-front, and get a mostly correct set of key assignments that
// give us the "last assignment wins, with merges" behavior we want.
// However, if a non-object overwrites an object value anywhere in the
// chain of assignments for a key, we have to "freeze" that key to
// prevent accidentally picking up nested objects that could merge with
// it from earlier in the input array.
// Example:
// Input: [{"a": {"b": 2}}, {"a": 4}, {"a": {"c": 3}}]
// Want Output: {"a": {"c": 3}}
result := ast.NewObject()
frozenKeys := map[*ast.Term]struct{}{}
for i := arr.Len() - 1; i >= 0; i-- {
o, ok := arr.Elem(i).Value.(ast.Object)
if !ok {
return builtins.NewOperandElementErr(1, arr, arr.Elem(i).Value, "object")
}
mergewithOverwriteInPlace(result, o, frozenKeys)
if err != nil {
return err
}
r = mergeWithOverwrite(r, o)
return nil
})
if err != nil {
return err
}

return iter(ast.NewTerm(r))
return iter(ast.NewTerm(result))
}

func builtinObjectRemove(_ BuiltinContext, operands []*ast.Term, iter func(*ast.Term) error) error {
Expand Down Expand Up @@ -173,6 +182,32 @@ func mergeWithOverwrite(objA, objB ast.Object) ast.Object {
return merged
}

// Modifies obj with any new keys from other, and recursively
// merges any keys where the values are both objects.
func mergewithOverwriteInPlace(obj, other ast.Object, frozenKeys map[*ast.Term]struct{}) {
other.Foreach(func(k, v *ast.Term) {
v2 := obj.Get(k)
// The key didn't exist in other, keep the original value.
if v2 == nil {
obj.Insert(k, v)
return
}
// The key exists in both. Merge or reject change.
updateValueObj, ok2 := v.Value.(ast.Object)
originalValueObj, ok1 := v2.Value.(ast.Object)
// Both are objects? Merge recursively.
if ok1 && ok2 {
// Check to make sure that this key isn't frozen before merging.
if _, ok := frozenKeys[v2]; !ok {
mergewithOverwriteInPlace(originalValueObj, updateValueObj, frozenKeys)
}
} else {
// Else, original value wins. Freeze the key.
frozenKeys[v2] = struct{}{}
}
})
}

func init() {
RegisterBuiltinFunc(ast.ObjectUnion.Name, builtinObjectUnion)
RegisterBuiltinFunc(ast.ObjectUnionN.Name, builtinObjectUnionN)
Expand Down
122 changes: 122 additions & 0 deletions topdown/object_bench_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
// Copyright 2022 The OPA Authors. All rights reserved.
// Use of this source code is governed by an Apache2
// license that can be found in the LICENSE file.

package topdown

import (
"context"
"fmt"
"testing"

"github.com/open-policy-agent/opa/ast"
"github.com/open-policy-agent/opa/storage"
inmem "github.com/open-policy-agent/opa/storage/inmem/test"
)

func genNxMObjectBenchmarkData(n, m int) ast.Value {
objList := make([]*ast.Term, n)
for i := 0; i < n; i++ {
v := ast.NewObject()
for j := 0; j < m; j++ {
v.Insert(ast.StringTerm(fmt.Sprintf("%d,%d", i, j)), ast.BooleanTerm(true))
}
objList[i] = ast.NewTerm(v)
}
return ast.NewArray(objList...)
}

func BenchmarkObjectUnionN(b *testing.B) {
ctx := context.Background()

sizes := []int{10, 100, 250}

for _, n := range sizes {
for _, m := range sizes {
b.Run(fmt.Sprintf("%dx%d", n, m), func(b *testing.B) {
store := inmem.NewFromObject(map[string]interface{}{"objs": genNxMObjectBenchmarkData(n, m)})
module := `package test
combined := object.union_n(data.objs)`

query := ast.MustParseBody("data.test.combined")
compiler := ast.MustCompileModules(map[string]string{
"test.rego": module,
})

b.ResetTimer()

for i := 0; i < b.N; i++ {

err := storage.Txn(ctx, store, storage.TransactionParams{}, func(txn storage.Transaction) error {

q := NewQuery(query).
WithCompiler(compiler).
WithStore(store).
WithTransaction(txn)

_, err := q.Run(ctx)
if err != nil {
return err
}

return nil
})

if err != nil {
b.Fatal(err)
}
}
})
}
}
}

func BenchmarkObjectUnionNSlow(b *testing.B) {
// This benchmarks the suggested means to implement union
// without using the builtin, to give us an idea of whether or not
// the builtin is actually making things any faster.
ctx := context.Background()

sizes := []int{10, 100, 250}

for _, n := range sizes {
for _, m := range sizes {
b.Run(fmt.Sprintf("%dx%d", n, m), func(b *testing.B) {
store := inmem.NewFromObject(map[string]interface{}{"objs": genNxMObjectBenchmarkData(n, m)})
module := `package test
combined := {k: true | s := data.objs[_]; s[k]}`

query := ast.MustParseBody("data.test.combined")
compiler := ast.MustCompileModules(map[string]string{
"test.rego": module,
})

b.ResetTimer()

for i := 0; i < b.N; i++ {

err := storage.Txn(ctx, store, storage.TransactionParams{}, func(txn storage.Transaction) error {

q := NewQuery(query).
WithCompiler(compiler).
WithStore(store).
WithTransaction(txn)

_, err := q.Run(ctx)
if err != nil {
return err
}

return nil
})

if err != nil {
b.Fatal(err)
}
}
})
}
}
}
81 changes: 81 additions & 0 deletions topdown/object_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
// Copyright 2022 The OPA Authors. All rights reserved.
// Use of this source code is governed by an Apache2
// license that can be found in the LICENSE file.

package topdown

import (
"testing"

"github.com/open-policy-agent/opa/ast"
)

func TestObjectUnionNBuiltin(t *testing.T) {
tests := []struct {
note string
query string
input string
expected string
}{
// NOTE(philipc): These tests assume that erroneous types are
// checked elsewhere, and focus only on functional correctness.
{
note: "Empty",
input: `[]`,
expected: `{}`,
},
{
note: "Singletons",
input: `[{1: true}, {2: true}, {3: true}]`,
expected: `{1: true, 2: true, 3: true}`,
},
{
note: "One object",
input: `[{1: true, 2: true, 3: true}]`,
expected: `{1: true, 2: true, 3: true}`,
},
{
note: "One object + empty",
input: `[{1: true, 2: true, 3: true}, {}]`,
expected: `{1: true, 2: true, 3: true}`,
},
{
note: "Multiple objects, with scalar duplicates",
input: `[{"A": 1, "B": 2, "C": 3}, {"A": 1, "B": 2}, {"C": 3}, {"D": 4, "E": 5}]`,
expected: `{"A": 1, "B": 2, "C": 3, "D": 4, "E": 5}`,
},
{
note: "2x objects, with simple merge on key",
input: `[{"A": 1, "B": {"D": 4}, "C": 3}, {"B": 200}]`,
expected: `{"A": 1, "B": 200, "C": 3,}`,
},
{
note: "2x objects, with complex merge on nested object",
input: `[{"A": 1, "B": {"N1": {"X": true, "Z": false}}, "C": 3}, {"B": {"N1": {"X": 49, "Z": 50}}}]`,
expected: `{"A": 1, "B": {"N1": {"X": 49, "Z": 50}}, "C": 3}`,
},
{
note: "Multiple objects, with scalar, then object, overwrite on nested key",
input: `[{"A": 1, "B": {"N1": {"X": true, "Z": false}}, "C": 3}, {"B": {"N1": 23}}, {"B": {"N1": {"Z": 50}}}]`,
expected: `{"A": 1, "B": {"N1": {"Z": 50}}, "C": 3}`,
},
{
note: "Multiple objects, with complex overwrite on nested key",
input: `[{"A": 1, "B": {"N1": {"X": true, "Z": false}}, "C": 3}, {"B": {"N1": 23}}, {"B": {"N1": {"Z": 50}}}, {"B": {"N1": {"Z": 35}}}]`,
expected: `{"A": 1, "B": {"N1": {"Z": 35}}, "C": 3}`,
},
}

for _, tc := range tests {
inputs := ast.MustParseTerm(tc.input)
result, err := getResult(builtinObjectUnionN, inputs)
if err != nil {
t.Fatal(err)
}

expected := ast.MustParseTerm(tc.expected)
if !result.Equal(expected) {
t.Fatalf("Expected %v but got %v", expected, result)
}
}
}

0 comments on commit f5cf66e

Please sign in to comment.