Skip to content

Commit

Permalink
ast: hash containers on insert/update (open-policy-agent#4367)
Browse files Browse the repository at this point in the history
* ast: hash containers at insertion/creation time

This allows us to use the Hashes for comparisons, too, since they're
cheaply available all the time.

* rego_test: add test for concurrent eval of PreparedEvalResult

Fixes open-policy-agent#4345.

Signed-off-by: Stephan Renatus <stephan.renatus@gmail.com>
  • Loading branch information
srenatus committed Feb 21, 2022
1 parent b005ba3 commit 1dfa6bd
Show file tree
Hide file tree
Showing 3 changed files with 166 additions and 31 deletions.
68 changes: 38 additions & 30 deletions ast/term.go
Expand Up @@ -1090,28 +1090,38 @@ type QueryIterator func(map[Var]Value, Value) error

// ArrayTerm creates a new Term with an Array value.
func ArrayTerm(a ...*Term) *Term {
return &Term{Value: &Array{elems: a, hash: 0, ground: termSliceIsGround(a)}}
return NewTerm(NewArray(a...))
}

// NewArray creates an Array with the terms provided. The array will
// use the provided term slice.
func NewArray(a ...*Term) *Array {
return &Array{elems: a, hash: 0, ground: termSliceIsGround(a)}
hs := make([]int, len(a))
for i, e := range a {
hs[i] = e.Value.Hash()
}
arr := &Array{elems: a, hashs: hs, ground: termSliceIsGround(a)}
arr.rehash()
return arr
}

// Array represents an array as defined by the language. Arrays are similar to the
// same types as defined by JSON with the exception that they can contain Vars
// and References.
type Array struct {
elems []*Term
hashs []int // element hashes
hash int
ground bool
}

// Copy returns a deep copy of arr.
func (arr *Array) Copy() *Array {
cpy := make([]int, len(arr.elems))
copy(cpy, arr.hashs)
return &Array{
elems: termSliceCopy(arr.elems),
hashs: cpy,
hash: arr.hash,
ground: arr.IsGround()}
}
Expand Down Expand Up @@ -1173,16 +1183,12 @@ func (arr *Array) Sorted() *Array {
}
sort.Sort(termSlice(cpy))
a := NewArray(cpy...)
a.hash = arr.hash
a.hashs = arr.hashs
return a
}

// Hash returns the hash code for the Value.
func (arr *Array) Hash() int {
if arr.hash == 0 {
arr.hash = termSliceHash(arr.elems)
}

return arr.hash
}

Expand Down Expand Up @@ -1222,11 +1228,19 @@ func (arr *Array) Elem(i int) *Term {
return arr.elems[i]
}

// rehash updates the cached hash of arr.
func (arr *Array) rehash() {
arr.hash = 0
for _, h := range arr.hashs {
arr.hash += h
}
}

// set sets the element i of arr.
func (arr *Array) set(i int, v *Term) {
arr.ground = arr.ground && v.IsGround()
arr.elems[i] = v
arr.hash = 0
arr.hashs[i] = v.Value.Hash()
}

// Slice returns a slice of arr starting from i index to j. -1
Expand All @@ -1235,15 +1249,21 @@ func (arr *Array) set(i int, v *Term) {
// the other.
func (arr *Array) Slice(i, j int) *Array {
var elems []*Term
var hashs []int
if j == -1 {
elems = arr.elems[i:]
hashs = arr.hashs[i:]
} else {
elems = arr.elems[i:j]
hashs = arr.hashs[i:j]
}
// If arr is ground, the slice is, too.
// If it's not, the slice could still be.
gr := arr.ground || termSliceIsGround(elems)
return &Array{elems: elems, ground: gr}

s := &Array{elems: elems, hashs: hashs, ground: gr}
s.rehash()
return s
}

// Iter calls f on each element in arr. If f returns an error,
Expand Down Expand Up @@ -1280,7 +1300,8 @@ func (arr *Array) Foreach(f func(*Term)) {
func (arr *Array) Append(v *Term) *Array {
cpy := *arr
cpy.elems = append(arr.elems, v)
cpy.hash = 0
cpy.hashs = append(arr.hashs, v.Value.Hash())
cpy.hash = arr.hash + v.Value.Hash()
cpy.ground = arr.ground && v.IsGround()
return &cpy
}
Expand Down Expand Up @@ -1359,11 +1380,6 @@ func (s *set) IsGround() bool {

// Hash returns a hash code for s.
func (s *set) Hash() int {
if s.hash == 0 {
s.Foreach(func(x *Term) {
s.hash += x.Hash()
})
}
return s.hash
}

Expand Down Expand Up @@ -1556,6 +1572,7 @@ func (s *set) Slice() []*Term {

func (s *set) insert(x *Term) {
hash := x.Hash()
insertHash := hash
// This `equal` utility is duplicated and manually inlined a number of
// time in this file. Inlining it avoids heap allocations, so it makes
// a big performance difference: some operations like lookup become twice
Expand Down Expand Up @@ -1633,16 +1650,16 @@ func (s *set) insert(x *Term) {
equal = func(y Value) bool { return Compare(x, y) == 0 }
}

for curr, ok := s.elems[hash]; ok; {
for curr, ok := s.elems[insertHash]; ok; {
if equal(curr.Value) {
return
}

hash++
curr, ok = s.elems[hash]
insertHash++
curr, ok = s.elems[insertHash]
}

s.elems[hash] = x
s.elems[insertHash] = x
i := sort.Search(len(s.keys), func(i int) bool { return Compare(x, s.keys[i]) < 0 })
if i < len(s.keys) {
// insert at position `i`:
Expand All @@ -1653,7 +1670,7 @@ func (s *set) insert(x *Term) {
s.keys = append(s.keys, x)
}

s.hash = 0
s.hash += hash
s.ground = s.ground && x.IsGround()
}

Expand Down Expand Up @@ -1888,14 +1905,6 @@ func (obj *object) Get(k *Term) *Term {

// Hash returns the hash code for the Value.
func (obj *object) Hash() int {
if obj.hash == 0 {
for h, curr := range obj.elems {
for ; curr != nil; curr = curr.next {
obj.hash += h
obj.hash += curr.value.Hash()
}
}
}
return obj.hash
}

Expand Down Expand Up @@ -2280,7 +2289,6 @@ func (obj *object) insert(k, v *Term) {
}

curr.value = v
obj.hash = 0
return
}
}
Expand All @@ -2299,7 +2307,7 @@ func (obj *object) insert(k, v *Term) {
} else {
obj.keys = append(obj.keys, elem)
}
obj.hash = 0
obj.hash += hash + v.Hash()

if k.IsGround() {
obj.ground++
Expand Down
73 changes: 72 additions & 1 deletion ast/term_test.go
Expand Up @@ -418,7 +418,7 @@ func TestFind(t *testing.T) {
}
}

func TestHash(t *testing.T) {
func TestHashObject(t *testing.T) {

doc := `{"a": [[true, {"b": [null]}, {"c": "d"}]], "e": {100: a[i].b}, "k": ["foo" | true], "o": {"foo": "bar" | true}, "sc": {"foo" | true}, "s": {1, 2, {3, 4}}, "big": 1e+1000}`

Expand All @@ -431,6 +431,77 @@ func TestHash(t *testing.T) {
if obj1.Hash() != obj2.Hash() {
t.Errorf("Expected hash codes to be equal")
}

// Calculate hash like we did before moving the caching to create/update:
obj := obj1.(*object)
exp := 0
for h, curr := range obj.elems {
for ; curr != nil; curr = curr.next {
exp += h
exp += curr.value.Hash()
}
}

if act := obj1.Hash(); exp != act {
t.Errorf("expected %v, got %v", exp, act)
}
}

func TestHashArray(t *testing.T) {

doc := `[{"a": [[true, {"b": [null]}, {"c": "d"}]]}, 100, true, [a[i].b], {100: a[i].b}, ["foo" | true], {"foo": "bar" | true}, {"foo" | true}, {1, 2, {3, 4}}, 1e+1000]`

stmt1 := MustParseStatement(doc)
stmt2 := MustParseStatement(doc)

arr1 := stmt1.(Body)[0].Terms.(*Term).Value.(*Array)
arr2 := stmt2.(Body)[0].Terms.(*Term).Value.(*Array)

if arr1.Hash() != arr2.Hash() {
t.Errorf("Expected hash codes to be equal")
}

// Calculate hash like we did before moving the caching to create/update:
exp := termSliceHash(arr1.elems)

if act := arr1.Hash(); exp != act {
t.Errorf("expected %v, got %v", exp, act)
}

for j := 0; j < arr1.Len(); j++ {
for i := 0; i <= j; i++ {
slice := arr1.Slice(i, j)
exp := termSliceHash(slice.elems)
if act := slice.Hash(); exp != act {
t.Errorf("arr1[%d:%d]: expected %v, got %v", i, j, exp, act)
}
}
}
}

func TestHashSet(t *testing.T) {

doc := `{{"a": [[true, {"b": [null]}, {"c": "d"}]]}, 100, 100, 100, true, [a[i].b], {100: a[i].b}, ["foo" | true], {"foo": "bar" | true}, {"foo" | true}, {1, 2, {3, 4}}, 1e+1000}`

stmt1 := MustParseStatement(doc)
stmt2 := MustParseStatement(doc)

set1 := stmt1.(Body)[0].Terms.(*Term).Value.(Set)
set2 := stmt2.(Body)[0].Terms.(*Term).Value.(Set)

if set1.Hash() != set2.Hash() {
t.Errorf("Expected hash codes to be equal")
}

// Calculate hash like we did before moving the caching to create/update:
exp := 0
set1.Foreach(func(x *Term) {
exp += x.Hash()
})

if act := set1.Hash(); exp != act {
t.Errorf("expected %v, got %v", exp, act)
}
}

func TestTermIsGround(t *testing.T) {
Expand Down
56 changes: 56 additions & 0 deletions rego/rego_test.go
Expand Up @@ -18,6 +18,7 @@ import (
"reflect"
"strconv"
"strings"
"sync"
"testing"
"time"

Expand Down Expand Up @@ -658,6 +659,61 @@ func TestPartialRewriteEquals(t *testing.T) {
}
}

// NOTE(sr): https://github.com/open-policy-agent/opa/issues/4345
func TestPrepareAndEvalRaceConditions(t *testing.T) {
tests := []struct {
note string
module string
exp string
}{
{
note: "object",
module: `package test
p[{"x":"y"}]`,
exp: `[[[{"x":"y"}]]]`,
},
{
note: "set",
module: `package test
p[{"x"}]`,
exp: `[[[["x"]]]]`,
},
{
note: "array",
module: `package test
p[["x"]]`,
exp: `[[[["x"]]]]`,
},
}

for _, tc := range tests {
t.Run(tc.note, func(t *testing.T) {
r := New(
Query("data.test.p"),
Module("", tc.module),
Package("foo"),
)

pq, err := r.PrepareForEval(context.Background())
if err != nil {
t.Fatalf("Unexpected error: %s", err.Error())
}

// run this 1000 times concurrently
var wg sync.WaitGroup
wg.Add(1000)
for i := 0; i < 1000; i++ {
go func(t *testing.T) {
t.Helper()
assertPreparedEvalQueryEval(t, pq, []EvalOption{}, tc.exp)
wg.Done()
}(t)
}
wg.Wait()
})
}
}

func TestPrepareAndEvalNewInput(t *testing.T) {
module := `
package test
Expand Down

0 comments on commit 1dfa6bd

Please sign in to comment.