Skip to content

Commit

Permalink
e2e: test namespace consistency
Browse files Browse the repository at this point in the history
  • Loading branch information
ecordell committed Dec 30, 2021
1 parent a353874 commit 67abfca
Showing 1 changed file with 231 additions and 60 deletions.
291 changes: 231 additions & 60 deletions e2e/newenemy/newenemy_test.go
Expand Up @@ -42,6 +42,17 @@ definition {{.}}/resource {
{{ end }}
`

const schemaAllowAllText = `
{{ range .Prefixes }}
definition {{.}}/user {}
definition {{.}}/resource {
relation direct: {{.}}/user
relation excluded: {{.}}/user
permission allowed = direct
}
{{ end }}
`

const (
objIDRegex = "[a-zA-Z0-9_][a-zA-Z0-9/_-]{0,127}"
namespacePrefixRegex = "[a-z][a-z0-9_]{2,62}[a-z0-9]"
Expand All @@ -50,8 +61,10 @@ const (
var (
maxIterations = flag.Int("max-iterations", 1000, "iteration cap for statistic-based tests (0 for no limit)")

schemaTpl = template.Must(template.New("schema").Parse(schemaText))
testCtx context.Context
schemaTpl = template.Must(template.New("schema").Parse(schemaText))
schemaAllowTpl = template.Must(template.New("schema_allow").Parse(schemaAllowAllText))

testCtx context.Context
)

func TestMain(m *testing.M) {
Expand Down Expand Up @@ -126,39 +139,142 @@ func TestNoNewEnemy(t *testing.T) {
fmt.Sprintf(setSmallRanges, dbName),
))

t.Log("fill with schemas to span multiple ranges")
// 4000 is larger than we need to span all three nodes, but a higher number
// seems to make the test converge faster
schemaData := generateSchemaData(4000, 500)
require.NoError(fillSchema(t, schemaData, vulnerableSpiceDb[1].Client().V1Alpha1().Schema()))
// This subtest ensures protection from a "data-based" newenemy problem:
//
// 1. Use this schema:
// definition user {}
// definition resource {
// relation direct: user
// relation excluded: user
// permission allowed = direct - excluded
// }
// 2. Write resource:1#excluded@user:A
// 3. Write resource:2#direct:@user:A
// 4. If the timestamp from (3) is before the timestamp for (2), then:
// check(resource:1#allowed@user:A)
// may succeed, when it should fail. The timestamps can be reversed
// if the tx overlap protections are disabled, because cockroach only
// ensures overlapping transactions are linearizable.
t.Run("protected from data newenemy", func(t *testing.T) {
t.Log("fill with schemas to span multiple ranges")
// 4000 is larger than we need to span all three nodes, but a higher number
// seems to make the test converge faster
schemaData := generateSchemaData(4000, 500)
fillSchema(t, schemaTpl, schemaData, vulnerableSpiceDb[1].Client().V1Alpha1().Schema())

t.Log("determining a prefix with a leader on the slow node")
slowNodeId, err := crdb[1].NodeID(testCtx)
require.NoError(err)
slowPrefix := prefixForNode(ctx, crdb[1].Conn(), schemaData, slowNodeId)

t.Logf("using prefix %s for slow node %d", slowPrefix, slowNodeId)

t.Log("determining a prefix with a leader on the slow node")
slowNodeId, err := crdb[1].NodeID(testCtx)
require.NoError(err)
slowPrefix := prefixForNode(ctx, crdb[1].Conn(), schemaData, slowNodeId)
t.Log("filling with data to span multiple ranges")
fill(t, vulnerableSpiceDb[0].Client().V0().ACL(), slowPrefix, 4000, 1000)

t.Logf("using prefix %s for slow node %d", slowPrefix, slowNodeId)
t.Log("modifying time")
require.NoError(crdb.TimeDelay(ctx, e2e.MustFile(ctx, t, "timeattack-"+t.Name()+".log"), 1, -200*time.Millisecond))

t.Log("filling with data to span multiple ranges")
fill(t, vulnerableSpiceDb[0].Client().V0().ACL(), slowPrefix, 4000, 1000)
vulnerableFn := func(t *testing.T) int {
checkCtx, checkCancel := context.WithTimeout(ctx, 30*time.Minute)
protected, attempts := checkDataNoNewEnemy(checkCtx, t, schemaData, slowNodeId, crdb, vulnerableSpiceDb, -1)
require.NotNil(protected, "unable to determine if spicedb displays newenemy when mitigations are disabled within the time limit")
require.False(*protected)
checkCancel()
t.Logf("determined spicedb vulnerable in %d attempts", attempts)
return attempts
}
protectedFn := func(t *testing.T, count int) {
t.Logf("check spicedb is protected after %d attempts", count)
protected, _ := checkDataNoNewEnemy(ctx, t, schemaData, slowNodeId, crdb, protectedSpiceDb, count)
require.NotNil(protected, "unable to determine if spicedb is protected within the time limit")
require.True(*protected, "protection is enabled, but newenemy detected")
}
statTest(t, 5, vulnerableFn, protectedFn)
})

t.Log("modifying time")
require.NoError(crdb.TimeDelay(ctx, e2e.MustFile(ctx, t, "timeattack.log"), 1, -200*time.Millisecond))
// This subtest ensures protection from a "schema and data" newenemy, i.e.
// the new enemy conditions require linearizable changes to schema and data
// 1. Start with this schema:
// definition user {}
// definition resource {
// relation direct: user
// relation excluded: user
// permission allowed = direct
// }
// 2. Write resource:2#direct:@user:A
// 3. Update to this schema:
// definition user {}
// definition resource {
// relation direct: user
// relation excluded: user
// permission allowed = direct - excluded
// }
// 4. Write resource:1#excluded@user:A
// 5. If the revision from (4) is before the timestamp for (3), then:
// check(fully consistent, resource:1#allowed@user:A)
// may succeed, when it should fail. The timestamps can be reversed
// if the tx overlap protections are disabled, because cockroach only
// ensures overlapping transactions are linearizable.
// In this case, we don't get an explicit revision back from the
// WriteSchema call, but the Schema write and the resource write are
// fully consistent.
t.Run("protected from schema newenemy", func(t *testing.T) {
t.Log("fill with schemas to span multiple ranges")
// 4000 is larger than we need to span all three nodes, but a higher number
// seems to make the test converge faster
schemaData := generateSchemaData(4000, 500)
fillSchema(t, schemaAllowTpl, schemaData, vulnerableSpiceDb[1].Client().V1Alpha1().Schema())

t.Log("determining a prefix with a leader on the slow node")
slowNodeId, err := crdb[1].NodeID(testCtx)
require.NoError(err)
slowPrefix := prefixForNode(ctx, crdb[1].Conn(), schemaData, slowNodeId)

const sampleSize = 5
samples := make([]int, sampleSize)
t.Logf("using prefix %s for slow node %d", slowPrefix, slowNodeId)

t.Log("filling with data to span multiple ranges")
fill(t, vulnerableSpiceDb[0].Client().V0().ACL(), slowPrefix, 4000, 1000)

t.Log("modifying time")
require.NoError(crdb.TimeDelay(ctx, e2e.MustFile(ctx, t, "timeattack-"+t.Name()+".log"), 1, -200*time.Millisecond))

vulnerableFn := func(t *testing.T) int {
checkCtx, checkCancel := context.WithTimeout(ctx, 30*time.Minute)
protected, attempts := checkSchemaNoNewEnemy(checkCtx, t, schemaData, slowNodeId, crdb, vulnerableSpiceDb, -1)
require.NotNil(protected, "unable to determine if spicedb displays newenemy when mitigations are disabled within the time limit")
require.False(*protected)
checkCancel()
t.Logf("determined spicedb vulnerable in %d attempts", attempts)
return attempts
}
protectedFn := func(t *testing.T, count int) {
t.Logf("check spicedb is protected after %d attempts", count)
protected, _ := checkSchemaNoNewEnemy(ctx, t, schemaData, slowNodeId, crdb, protectedSpiceDb, count)
require.NotNil(protected, "unable to determine if spicedb is protected within the time limit")
require.True(*protected, "protection is enabled, but newenemy detected")
}
statTest(t, 5, vulnerableFn, protectedFn)
})
}

// statTest takes a testFn that is expected to fail the test, returning a sample
// and a protectedTestFn that is expected to succeed even after a given number
// of runs.
func statTest(t *testing.T, sampleSize int, testFn func(t *testing.T) int, protectedTestFn func(t *testing.T, count int)) {
samples := make([]int, sampleSize)
for i := 0; i < sampleSize; i++ {
t.Log(i, "check vulnerability with mitigations disabled")
checkCtx, checkCancel := context.WithTimeout(ctx, 30*time.Minute)
protected, attempts := checkNoNewEnemy(checkCtx, t, schemaData, slowNodeId, crdb, vulnerableSpiceDb, -1)
require.NotNil(protected, "unable to determine if spicedb displays newenemy when mitigations are disabled within the time limit")
require.False(*protected)
checkCancel()
t.Logf("%d - determined spicedb vulnerable in %d attempts", i, attempts)
samples[i] = attempts
t.Logf("collecting sample %d", i)
samples[i] = testFn(t)
}
protectedTestFn(t, iterationsForHighConfidence(samples))
}

// iterationsForHighConfidence returns how many iterations we need to get
// > 3sigma from the mean of the samples.
// Caps at maxIterations to control test runtime. Set maxIterations to 0 for no
// cap.
func iterationsForHighConfidence(samples []int) (iterations int) {
// from https://cs.opensource.google/go/x/perf/+/40a54f11:internal/stats/sample.go;l=196
// calculates mean and stddev at the same time
mean, M2 := 0.0, 0.0
Expand All @@ -167,25 +283,21 @@ func TestNoNewEnemy(t *testing.T) {
mean += delta / float64(n+1)
M2 += delta * (float64(x) - mean)
}
variance := M2 / float64(sampleSize-1)
variance := M2 / float64(len(samples)-1)
stddev := math.Sqrt(variance)

samplestddev := stddev / math.Sqrt(float64(sampleSize))
samplestddev := stddev / math.Sqrt(float64(len(samples)))
// how many iterations do we need to get > 3sigma from the mean?
// cap maxIterations to control test runtime.
iterations := int(math.Ceil(3*stddev*samplestddev + mean))
iterations = int(math.Ceil(3*stddev*samplestddev + mean))
if *maxIterations != 0 && *maxIterations < iterations {
iterations = *maxIterations
}

t.Logf("check spicedb is protected after %d attempts", iterations)
protected, _ := checkNoNewEnemy(ctx, t, schemaData, slowNodeId, crdb, protectedSpiceDb, iterations)
require.NotNil(protected, "unable to determine if spicedb is protected within the time limit")
require.True(*protected, "protection is enabled, but newenemy detected")
return
}

// checkNoNewEnemy returns true if the service is protected, false if it is vulnerable, and nil if we couldn't determine
func checkNoNewEnemy(ctx context.Context, t testing.TB, schemaData []SchemaData, slowNodeId int, crdb cockroach.Cluster, spicedb spice.Cluster, candidateCount int) (*bool, int) {
// checkDataNoNewEnemy returns true if the service is protected, false if it is vulnerable, and nil if we couldn't determine
func checkDataNoNewEnemy(ctx context.Context, t testing.TB, schemaData []SchemaData, slowNodeId int, crdb cockroach.Cluster, spicedb spice.Cluster, candidateCount int) (*bool, int) {
var attempts int

prefix := prefixForNode(ctx, crdb[1].Conn(), schemaData, slowNodeId)
Expand Down Expand Up @@ -231,9 +343,6 @@ func checkNoNewEnemy(ctx context.Context, t testing.TB, schemaData []SchemaData,
t.Log(err)
continue
}
if canHas.IsMember {
t.Log("service is subject to the new enemy problem")
}

r1leader, r2leader := getLeaderNode(testCtx, crdb[1].Conn(), excludes[0].Tuple), getLeaderNode(testCtx, crdb[1].Conn(), directs[0].Tuple)
ns1Leader := getLeaderNodeForNamespace(testCtx, crdb[1].Conn(), excludes[0].Tuple.ObjectAndRelation.Namespace)
Expand All @@ -242,6 +351,12 @@ func checkNoNewEnemy(ctx context.Context, t testing.TB, schemaData []SchemaData,
z2, _ := zookie.DecodeRevision(r2.GetRevision())
t.Log(z1, z2, z1.Sub(z2).String(), r1leader, r2leader, ns1Leader, ns2Leader)

if canHas.IsMember {
t.Log("service is subject to the new enemy problem")
protected := false
return &protected, attempts
}

if ns1Leader != slowNodeId || ns2Leader != slowNodeId {
t.Log("namespace leader changed, pick new prefix and fill")
prefix = prefixForNode(ctx, crdb[1].Conn(), schemaData, slowNodeId)
Expand All @@ -251,13 +366,7 @@ func checkNoNewEnemy(ctx context.Context, t testing.TB, schemaData []SchemaData,
continue
}

if canHas.IsMember {
t.Log("service is subject to the new enemy problem")
protected := false
return &protected, attempts
}

if !canHas.IsMember && z1.Sub(z2).IsPositive() {
if z1.Sub(z2).IsPositive() {
t.Log("error in test, object id has been re-used.")
continue
}
Expand All @@ -269,7 +378,7 @@ func checkNoNewEnemy(ctx context.Context, t testing.TB, schemaData []SchemaData,
return &protected, attempts
}

if attempts > 1000 {
if attempts > *maxIterations {
t.Log("trying with a new prefix")
attempts = 0
prefix = prefixForNode(ctx, crdb[1].Conn(), schemaData, slowNodeId)
Expand All @@ -282,11 +391,78 @@ func checkNoNewEnemy(ctx context.Context, t testing.TB, schemaData []SchemaData,
case <-ctx.Done():
return nil, attempts
default:
// allow clocks to desync
time.Sleep(100 * time.Millisecond)
continue
}
}
}

// allow clocks to desync
time.Sleep(100 * time.Millisecond)
// checkSchemaNoNewEnemy returns true if the service is protected, false if it is vulnerable, and nil if we couldn't determine
func checkSchemaNoNewEnemy(ctx context.Context, t testing.TB, schemaData []SchemaData, slowNodeId int, crdb cockroach.Cluster, spicedb spice.Cluster, candidateCount int) (*bool, int) {
var attempts int

prefix := prefixForNode(ctx, crdb[1].Conn(), schemaData, slowNodeId)
objIdGenerator := generator.NewUniqueGenerator(objIDRegex)

var b strings.Builder
for {
attempts++

directs, excludes := generateTuples(prefix, 1, objIdGenerator)

// write the "allow" schema
b.Reset()
require.NoError(t, schemaAllowTpl.Execute(&b, SchemaData{Prefixes: []string{prefix}}))
_, err := spicedb[0].Client().V1Alpha1().Schema().WriteSchema(context.Background(), &v1alpha1.WriteSchemaRequest{
Schema: b.String(),
})
require.NoError(t, err)

// write the "direct" tuple
_, err = spicedb[0].Client().V0().ACL().Write(testCtx, &v0.WriteRequest{
Updates: []*v0.RelationTupleUpdate{directs[0]},
})
require.NoError(t, err)

// write the "exclude" schema
b.Reset()
require.NoError(t, schemaTpl.Execute(&b, SchemaData{Prefixes: []string{prefix}}))
_, err = spicedb[1].Client().V1Alpha1().Schema().WriteSchema(context.Background(), &v1alpha1.WriteSchemaRequest{
Schema: b.String(),
})
require.NoError(t, err)

// write the "excludes" tuple
_, err = spicedb[0].Client().V0().ACL().Write(testCtx, &v0.WriteRequest{
Updates: []*v0.RelationTupleUpdate{excludes[0]},
})
require.NoError(t, err)

canHas, err := spicedb[1].Client().V0().ACL().Check(context.Background(), &v0.CheckRequest{
TestUserset: &v0.ObjectAndRelation{
Namespace: directs[0].Tuple.ObjectAndRelation.Namespace,
ObjectId: directs[0].Tuple.ObjectAndRelation.ObjectId,
Relation: "allowed",
},
User: directs[0].Tuple.GetUser(),
})
require.NoError(t, err)

if canHas.IsMember {
t.Log("service is subject to the new enemy problem")
protected := false
return &protected, attempts
}

select {
case <-ctx.Done():
return nil, attempts
default:
// allow clocks to desync
time.Sleep(100 * time.Millisecond)
continue
}
}
}

Expand Down Expand Up @@ -329,7 +505,7 @@ func BenchmarkConflictingTupleWrites(b *testing.B) {

b.ResetTimer()

checkNoNewEnemy(ctx, b, generateSchemaData(1, 1), 1, crdb, spicedb, b.N)
checkDataNoNewEnemy(ctx, b, generateSchemaData(1, 1), 1, crdb, spicedb, b.N)
}

func fill(t testing.TB, client v0.ACLServiceClient, prefix string, fillerCount, batchSize int) {
Expand All @@ -349,24 +525,19 @@ func fill(t testing.TB, client v0.ACLServiceClient, prefix string, fillerCount,
}
}

func fillSchema(t testing.TB, data []SchemaData, schemaClient v1alpha1.SchemaServiceClient) error {
// fillSchema generates the schema text for given SchemaData and applies it
func fillSchema(t testing.TB, template *template.Template, data []SchemaData, schemaClient v1alpha1.SchemaServiceClient) {
var b strings.Builder
batchSize := len(data[0].Prefixes)
for i, d := range data {
t.Logf("filling %d to %d", i*batchSize, (i+1)*batchSize)
b.Reset()
err := schemaTpl.Execute(&b, d)
if err != nil {
return err
}
_, err = schemaClient.WriteSchema(context.Background(), &v1alpha1.WriteSchemaRequest{
require.NoError(t, template.Execute(&b, d))
_, err := schemaClient.WriteSchema(context.Background(), &v1alpha1.WriteSchemaRequest{
Schema: b.String(),
})
if err != nil {
return err
}
require.NoError(t, err)
}
return nil
}

// prefixForNode finds a prefix with namespace leaders on the node id
Expand Down

0 comments on commit 67abfca

Please sign in to comment.