Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
builtins: Speed up set unions (#4980)
This commit fixes a performance regression for the Set `union` builtin, discovered in issue #4979. The original logic for the builtin did pairwise `Set.Union` calls between the input sets, resulting in many wasted temporary Sets that were almost immediately discarded, and a bunch of duplicated work. The improved builtin inlines the logic from `Set.Union`, so that only one pass is made across the incoming sets' members. Fixes #4979 Signed-off-by: Philip Conrad <philipaconrad@gmail.com>
- Loading branch information
1 parent
b03c188
commit 764aa88
Showing
3 changed files
with
215 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,132 @@ | ||
// Copyright 2022 The OPA Authors. All rights reserved. | ||
// Use of this source code is governed by an Apache2 | ||
// license that can be found in the LICENSE file. | ||
|
||
package topdown | ||
|
||
import ( | ||
"context" | ||
"fmt" | ||
"testing" | ||
|
||
"github.com/open-policy-agent/opa/ast" | ||
"github.com/open-policy-agent/opa/storage" | ||
"github.com/open-policy-agent/opa/storage/inmem" | ||
) | ||
|
||
func BenchmarkSetUnion(b *testing.B) { | ||
ctx := context.Background() | ||
|
||
sizes := []int{10, 100, 250} | ||
|
||
for _, n := range sizes { | ||
for _, m := range sizes { | ||
b.Run(fmt.Sprintf("%dx%d", n, m), func(b *testing.B) { | ||
store := inmem.NewFromObject(map[string]interface{}{"nsets": n, "nsize": m}) | ||
|
||
// Code is lifted from here: | ||
// https://github.com/open-policy-agent/opa/issues/4979#issue-1332019382 | ||
|
||
module := `package test | ||
nums := numbers.range(0, data.nsets) | ||
sizes := numbers.range(0, data.nsize) | ||
sets[n] = x { | ||
nums[n] | ||
x := {sprintf("%d,%d", [n, i]) | sizes[i]} | ||
} | ||
combined := union({s | s := sets[_]})` | ||
|
||
query := ast.MustParseBody("data.test.combined") | ||
compiler := ast.MustCompileModules(map[string]string{ | ||
"test.rego": module, | ||
}) | ||
|
||
b.ResetTimer() | ||
|
||
for i := 0; i < b.N; i++ { | ||
|
||
err := storage.Txn(ctx, store, storage.TransactionParams{}, func(txn storage.Transaction) error { | ||
|
||
q := NewQuery(query). | ||
WithCompiler(compiler). | ||
WithStore(store). | ||
WithTransaction(txn) | ||
|
||
_, err := q.Run(ctx) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
return nil | ||
}) | ||
|
||
if err != nil { | ||
b.Fatal(err) | ||
} | ||
} | ||
}) | ||
} | ||
} | ||
} | ||
|
||
func BenchmarkSetUnionSlow(b *testing.B) { | ||
// This benchmarks the suggested means to implement union | ||
// without using the builtin, to give us an idea of whether or not | ||
// the builtin is actually making things any faster. | ||
ctx := context.Background() | ||
|
||
sizes := []int{10, 100, 250} | ||
|
||
for _, n := range sizes { | ||
for _, m := range sizes { | ||
b.Run(fmt.Sprintf("%dx%d", n, m), func(b *testing.B) { | ||
store := inmem.NewFromObject(map[string]interface{}{"nsets": n, "nsize": m}) | ||
|
||
// Code is lifted from here: | ||
// https://github.com/open-policy-agent/opa/issues/4979#issue-1332019382 | ||
|
||
module := `package test | ||
nums := numbers.range(0, data.nsets) | ||
sizes := numbers.range(0, data.nsize) | ||
sets[n] = x { | ||
nums[n] | ||
x := {sprintf("%d,%d", [n, i]) | sizes[i]} | ||
} | ||
combined := {t | s := sets[_]; s[t]}` | ||
|
||
query := ast.MustParseBody("data.test.combined") | ||
compiler := ast.MustCompileModules(map[string]string{ | ||
"test.rego": module, | ||
}) | ||
|
||
b.ResetTimer() | ||
|
||
for i := 0; i < b.N; i++ { | ||
|
||
err := storage.Txn(ctx, store, storage.TransactionParams{}, func(txn storage.Transaction) error { | ||
|
||
q := NewQuery(query). | ||
WithCompiler(compiler). | ||
WithStore(store). | ||
WithTransaction(txn) | ||
|
||
_, err := q.Run(ctx) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
return nil | ||
}) | ||
|
||
if err != nil { | ||
b.Fatal(err) | ||
} | ||
} | ||
}) | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
// Copyright 2022 The OPA Authors. All rights reserved. | ||
// Use of this source code is governed by an Apache2 | ||
// license that can be found in the LICENSE file. | ||
|
||
package topdown | ||
|
||
import ( | ||
"testing" | ||
|
||
"github.com/open-policy-agent/opa/ast" | ||
) | ||
|
||
func TestSetUnionBuiltin(t *testing.T) { | ||
tests := []struct { | ||
note string | ||
query string | ||
input string | ||
expected string | ||
}{ | ||
// NOTE(philipc): These tests assume that erroneous types are | ||
// checked elsewhere, and focus only on functional correctness. | ||
{ | ||
note: "Empty", | ||
input: `{set()}`, | ||
expected: `set()`, | ||
}, | ||
{ | ||
note: "Singletons", | ||
input: `{{1}, {2}, {3}, {4}, {5}}`, | ||
expected: `{1, 2, 3, 4, 5}`, | ||
}, | ||
{ | ||
note: "One set", | ||
input: `{{1, 2, 3, 4, 5}}`, | ||
expected: `{1, 2, 3, 4, 5}`, | ||
}, | ||
{ | ||
note: "One set + empty", | ||
input: `{{1, 2, 3, 4, 5}, set()}`, | ||
expected: `{1, 2, 3, 4, 5}`, | ||
}, | ||
{ | ||
note: "Multiple sets, with duplicates", | ||
input: `{{1, 2, 3}, {1, 2}, {3}, {4, 5}}`, | ||
expected: `{1, 2, 3, 4, 5}`, | ||
}, | ||
} | ||
|
||
for _, tc := range tests { | ||
inputs := ast.MustParseTerm(tc.input) | ||
result, err := getResult(functionalWrapper1("union", builtinSetUnion), inputs) | ||
if err != nil { | ||
t.Fatal(err) | ||
} | ||
|
||
expected := ast.MustParseTerm(tc.expected) | ||
if !result.Equal(expected) { | ||
t.Fatalf("Expected %v but got %v", expected, result) | ||
} | ||
} | ||
} | ||
|
||
// Used to get older-style (ast.Term, error) tuples out of newer functions. | ||
func getResult(fn BuiltinFunc, operands ...*ast.Term) (*ast.Term, error) { | ||
var result *ast.Term | ||
extractionFn := func(r *ast.Term) error { | ||
result = r | ||
return nil | ||
} | ||
err := fn(BuiltinContext{}, operands, extractionFn) | ||
if err != nil { | ||
return nil, err | ||
} | ||
return result, nil | ||
} |