-
Notifications
You must be signed in to change notification settings - Fork 1.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
topdown/object: Rework object.union_n to use in-place merge algorithm. (
#5127) This commit fixes a performance regression for the object.union_n builtin, discovered in issue #4985. The original logic for the builtin did pairwise mergeWithOverwrite calls between the input Objects, resulting in many wasted intermediate result Objects that were almost immediately discarded. The updated builtin uses a new algorithm to do a single pass across the input Objects, respecting the "last assignment wins, with merges" semantics of the original builtin implementation. In the included benchmarks, this provides a 2x speed and 2-3x memory efficiency improvement over using a pure-Rego comprehension to do the same job, and a 6x or greater improvement over the original implementation on all metrics as input Object arrays grow larger. Fixes #4985 Signed-off-by: Philip Conrad <philipaconrad@gmail.com>
- Loading branch information
1 parent
33ec9fb
commit 5fe4a86
Showing
3 changed files
with
248 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
// Copyright 2022 The OPA Authors. All rights reserved. | ||
// Use of this source code is governed by an Apache2 | ||
// license that can be found in the LICENSE file. | ||
|
||
package topdown | ||
|
||
import ( | ||
"context" | ||
"fmt" | ||
"testing" | ||
|
||
"github.com/open-policy-agent/opa/ast" | ||
"github.com/open-policy-agent/opa/storage" | ||
inmem "github.com/open-policy-agent/opa/storage/inmem/test" | ||
) | ||
|
||
func genNxMObjectBenchmarkData(n, m int) ast.Value { | ||
objList := make([]*ast.Term, n) | ||
for i := 0; i < n; i++ { | ||
v := ast.NewObject() | ||
for j := 0; j < m; j++ { | ||
v.Insert(ast.StringTerm(fmt.Sprintf("%d,%d", i, j)), ast.BooleanTerm(true)) | ||
} | ||
objList[i] = ast.NewTerm(v) | ||
} | ||
return ast.NewArray(objList...) | ||
} | ||
|
||
func BenchmarkObjectUnionN(b *testing.B) { | ||
ctx := context.Background() | ||
|
||
sizes := []int{10, 100, 250} | ||
|
||
for _, n := range sizes { | ||
for _, m := range sizes { | ||
b.Run(fmt.Sprintf("%dx%d", n, m), func(b *testing.B) { | ||
store := inmem.NewFromObject(map[string]interface{}{"objs": genNxMObjectBenchmarkData(n, m)}) | ||
module := `package test | ||
combined := object.union_n(data.objs)` | ||
|
||
query := ast.MustParseBody("data.test.combined") | ||
compiler := ast.MustCompileModules(map[string]string{ | ||
"test.rego": module, | ||
}) | ||
|
||
b.ResetTimer() | ||
|
||
for i := 0; i < b.N; i++ { | ||
|
||
err := storage.Txn(ctx, store, storage.TransactionParams{}, func(txn storage.Transaction) error { | ||
|
||
q := NewQuery(query). | ||
WithCompiler(compiler). | ||
WithStore(store). | ||
WithTransaction(txn) | ||
|
||
_, err := q.Run(ctx) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
return nil | ||
}) | ||
|
||
if err != nil { | ||
b.Fatal(err) | ||
} | ||
} | ||
}) | ||
} | ||
} | ||
} | ||
|
||
func BenchmarkObjectUnionNSlow(b *testing.B) { | ||
// This benchmarks the suggested means to implement union | ||
// without using the builtin, to give us an idea of whether or not | ||
// the builtin is actually making things any faster. | ||
ctx := context.Background() | ||
|
||
sizes := []int{10, 100, 250} | ||
|
||
for _, n := range sizes { | ||
for _, m := range sizes { | ||
b.Run(fmt.Sprintf("%dx%d", n, m), func(b *testing.B) { | ||
store := inmem.NewFromObject(map[string]interface{}{"objs": genNxMObjectBenchmarkData(n, m)}) | ||
module := `package test | ||
combined := {k: true | s := data.objs[_]; s[k]}` | ||
|
||
query := ast.MustParseBody("data.test.combined") | ||
compiler := ast.MustCompileModules(map[string]string{ | ||
"test.rego": module, | ||
}) | ||
|
||
b.ResetTimer() | ||
|
||
for i := 0; i < b.N; i++ { | ||
|
||
err := storage.Txn(ctx, store, storage.TransactionParams{}, func(txn storage.Transaction) error { | ||
|
||
q := NewQuery(query). | ||
WithCompiler(compiler). | ||
WithStore(store). | ||
WithTransaction(txn) | ||
|
||
_, err := q.Run(ctx) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
return nil | ||
}) | ||
|
||
if err != nil { | ||
b.Fatal(err) | ||
} | ||
} | ||
}) | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
// Copyright 2022 The OPA Authors. All rights reserved. | ||
// Use of this source code is governed by an Apache2 | ||
// license that can be found in the LICENSE file. | ||
|
||
package topdown | ||
|
||
import ( | ||
"testing" | ||
|
||
"github.com/open-policy-agent/opa/ast" | ||
) | ||
|
||
func TestObjectUnionNBuiltin(t *testing.T) { | ||
tests := []struct { | ||
note string | ||
query string | ||
input string | ||
expected string | ||
}{ | ||
// NOTE(philipc): These tests assume that erroneous types are | ||
// checked elsewhere, and focus only on functional correctness. | ||
{ | ||
note: "Empty", | ||
input: `[]`, | ||
expected: `{}`, | ||
}, | ||
{ | ||
note: "Singletons", | ||
input: `[{1: true}, {2: true}, {3: true}]`, | ||
expected: `{1: true, 2: true, 3: true}`, | ||
}, | ||
{ | ||
note: "One object", | ||
input: `[{1: true, 2: true, 3: true}]`, | ||
expected: `{1: true, 2: true, 3: true}`, | ||
}, | ||
{ | ||
note: "One object + empty", | ||
input: `[{1: true, 2: true, 3: true}, {}]`, | ||
expected: `{1: true, 2: true, 3: true}`, | ||
}, | ||
{ | ||
note: "Multiple objects, with scalar duplicates", | ||
input: `[{"A": 1, "B": 2, "C": 3}, {"A": 1, "B": 2}, {"C": 3}, {"D": 4, "E": 5}]`, | ||
expected: `{"A": 1, "B": 2, "C": 3, "D": 4, "E": 5}`, | ||
}, | ||
{ | ||
note: "2x objects, with simple merge on key", | ||
input: `[{"A": 1, "B": {"D": 4}, "C": 3}, {"B": 200}]`, | ||
expected: `{"A": 1, "B": 200, "C": 3,}`, | ||
}, | ||
{ | ||
note: "2x objects, with complex merge on nested object", | ||
input: `[{"A": 1, "B": {"N1": {"X": true, "Z": false}}, "C": 3}, {"B": {"N1": {"X": 49, "Z": 50}}}]`, | ||
expected: `{"A": 1, "B": {"N1": {"X": 49, "Z": 50}}, "C": 3}`, | ||
}, | ||
{ | ||
note: "Multiple objects, with scalar, then object, overwrite on nested key", | ||
input: `[{"A": 1, "B": {"N1": {"X": true, "Z": false}}, "C": 3}, {"B": {"N1": 23}}, {"B": {"N1": {"Z": 50}}}]`, | ||
expected: `{"A": 1, "B": {"N1": {"Z": 50}}, "C": 3}`, | ||
}, | ||
{ | ||
note: "Multiple objects, with complex overwrite on nested key", | ||
input: `[{"A": 1, "B": {"N1": {"X": true, "Z": false}}, "C": 3}, {"B": {"N1": 23}}, {"B": {"N1": {"Z": 50}}}, {"B": {"N1": {"Z": 35}}}]`, | ||
expected: `{"A": 1, "B": {"N1": {"Z": 35}}, "C": 3}`, | ||
}, | ||
} | ||
|
||
for _, tc := range tests { | ||
inputs := ast.MustParseTerm(tc.input) | ||
result, err := getResult(builtinObjectUnionN, inputs) | ||
if err != nil { | ||
t.Fatal(err) | ||
} | ||
|
||
expected := ast.MustParseTerm(tc.expected) | ||
if !result.Equal(expected) { | ||
t.Fatalf("Expected %v but got %v", expected, result) | ||
} | ||
} | ||
} |