Skip to content

Commit

Permalink
builtins: Speed up set unions (#4980)
Browse files Browse the repository at this point in the history
This commit fixes a performance regression for the Set `union` builtin, discovered in issue #4979.

The original logic for the builtin did pairwise `Set.Union` calls between the input sets, resulting in many wasted temporary Sets that were almost immediately discarded, and a bunch of duplicated work. The improved builtin inlines the logic from `Set.Union`, so that only one pass is made across the incoming sets' members.

Fixes #4979

Signed-off-by: Philip Conrad <philipaconrad@gmail.com>
  • Loading branch information
philipaconrad committed Aug 11, 2022
1 parent b03c188 commit 764aa88
Show file tree
Hide file tree
Showing 3 changed files with 215 additions and 4 deletions.
12 changes: 8 additions & 4 deletions topdown/sets.go
Expand Up @@ -58,22 +58,26 @@ func builtinSetIntersection(a ast.Value) (ast.Value, error) {

// builtinSetUnion returns the union of the given input sets
func builtinSetUnion(a ast.Value) (ast.Value, error) {
// The set union logic here is duplicated and manually inlined on
// purpose. By lifting this logic up a level, and not doing pairwise
// set unions, we avoid a number of heap allocations. This improves
// performance dramatically over the naive approach.
result := ast.NewSet()

inputSet, err := builtins.SetOperand(a, 1)
if err != nil {
return nil, err
}

result := ast.NewSet()

err = inputSet.Iter(func(x *ast.Term) error {
n, err := builtins.SetOperand(x.Value, 1)
item, err := builtins.SetOperand(x.Value, 1)
if err != nil {
return err
}
result = result.Union(n)
item.Foreach(result.Add)
return nil
})

return result, err
}

Expand Down
132 changes: 132 additions & 0 deletions topdown/sets_bench_test.go
@@ -0,0 +1,132 @@
// Copyright 2022 The OPA Authors. All rights reserved.
// Use of this source code is governed by an Apache2
// license that can be found in the LICENSE file.

package topdown

import (
"context"
"fmt"
"testing"

"github.com/open-policy-agent/opa/ast"
"github.com/open-policy-agent/opa/storage"
"github.com/open-policy-agent/opa/storage/inmem"
)

func BenchmarkSetUnion(b *testing.B) {
ctx := context.Background()

sizes := []int{10, 100, 250}

for _, n := range sizes {
for _, m := range sizes {
b.Run(fmt.Sprintf("%dx%d", n, m), func(b *testing.B) {
store := inmem.NewFromObject(map[string]interface{}{"nsets": n, "nsize": m})

// Code is lifted from here:
// https://github.com/open-policy-agent/opa/issues/4979#issue-1332019382

module := `package test
nums := numbers.range(0, data.nsets)
sizes := numbers.range(0, data.nsize)
sets[n] = x {
nums[n]
x := {sprintf("%d,%d", [n, i]) | sizes[i]}
}
combined := union({s | s := sets[_]})`

query := ast.MustParseBody("data.test.combined")
compiler := ast.MustCompileModules(map[string]string{
"test.rego": module,
})

b.ResetTimer()

for i := 0; i < b.N; i++ {

err := storage.Txn(ctx, store, storage.TransactionParams{}, func(txn storage.Transaction) error {

q := NewQuery(query).
WithCompiler(compiler).
WithStore(store).
WithTransaction(txn)

_, err := q.Run(ctx)
if err != nil {
return err
}

return nil
})

if err != nil {
b.Fatal(err)
}
}
})
}
}
}

func BenchmarkSetUnionSlow(b *testing.B) {
// This benchmarks the suggested means to implement union
// without using the builtin, to give us an idea of whether or not
// the builtin is actually making things any faster.
ctx := context.Background()

sizes := []int{10, 100, 250}

for _, n := range sizes {
for _, m := range sizes {
b.Run(fmt.Sprintf("%dx%d", n, m), func(b *testing.B) {
store := inmem.NewFromObject(map[string]interface{}{"nsets": n, "nsize": m})

// Code is lifted from here:
// https://github.com/open-policy-agent/opa/issues/4979#issue-1332019382

module := `package test
nums := numbers.range(0, data.nsets)
sizes := numbers.range(0, data.nsize)
sets[n] = x {
nums[n]
x := {sprintf("%d,%d", [n, i]) | sizes[i]}
}
combined := {t | s := sets[_]; s[t]}`

query := ast.MustParseBody("data.test.combined")
compiler := ast.MustCompileModules(map[string]string{
"test.rego": module,
})

b.ResetTimer()

for i := 0; i < b.N; i++ {

err := storage.Txn(ctx, store, storage.TransactionParams{}, func(txn storage.Transaction) error {

q := NewQuery(query).
WithCompiler(compiler).
WithStore(store).
WithTransaction(txn)

_, err := q.Run(ctx)
if err != nil {
return err
}

return nil
})

if err != nil {
b.Fatal(err)
}
}
})
}
}
}
75 changes: 75 additions & 0 deletions topdown/sets_test.go
@@ -0,0 +1,75 @@
// Copyright 2022 The OPA Authors. All rights reserved.
// Use of this source code is governed by an Apache2
// license that can be found in the LICENSE file.

package topdown

import (
"testing"

"github.com/open-policy-agent/opa/ast"
)

func TestSetUnionBuiltin(t *testing.T) {
tests := []struct {
note string
query string
input string
expected string
}{
// NOTE(philipc): These tests assume that erroneous types are
// checked elsewhere, and focus only on functional correctness.
{
note: "Empty",
input: `{set()}`,
expected: `set()`,
},
{
note: "Singletons",
input: `{{1}, {2}, {3}, {4}, {5}}`,
expected: `{1, 2, 3, 4, 5}`,
},
{
note: "One set",
input: `{{1, 2, 3, 4, 5}}`,
expected: `{1, 2, 3, 4, 5}`,
},
{
note: "One set + empty",
input: `{{1, 2, 3, 4, 5}, set()}`,
expected: `{1, 2, 3, 4, 5}`,
},
{
note: "Multiple sets, with duplicates",
input: `{{1, 2, 3}, {1, 2}, {3}, {4, 5}}`,
expected: `{1, 2, 3, 4, 5}`,
},
}

for _, tc := range tests {
inputs := ast.MustParseTerm(tc.input)
result, err := getResult(functionalWrapper1("union", builtinSetUnion), inputs)
if err != nil {
t.Fatal(err)
}

expected := ast.MustParseTerm(tc.expected)
if !result.Equal(expected) {
t.Fatalf("Expected %v but got %v", expected, result)
}
}
}

// Used to get older-style (ast.Term, error) tuples out of newer functions.
func getResult(fn BuiltinFunc, operands ...*ast.Term) (*ast.Term, error) {
var result *ast.Term
extractionFn := func(r *ast.Term) error {
result = r
return nil
}
err := fn(BuiltinContext{}, operands, extractionFn)
if err != nil {
return nil, err
}
return result, nil
}

0 comments on commit 764aa88

Please sign in to comment.