From eece1897453796951d0daa9f7b249bdeb2e647fd Mon Sep 17 00:00:00 2001 From: Evan Cordell Date: Tue, 28 Dec 2021 12:59:34 -0500 Subject: [PATCH] crdb: touch overlap key on namespace write this will ensure that namespace write transactions conflict with tuple write transactions, and prevent the newenemy problem for interactions such as: - write namespace (always fully consistent) - write a tuple - do a check with the revision from the tuple write --- e2e/newenemy/newenemy_test.go | 122 +++++++++++++++++---------- internal/datastore/crdb/namespace.go | 9 +- 2 files changed, 85 insertions(+), 46 deletions(-) diff --git a/e2e/newenemy/newenemy_test.go b/e2e/newenemy/newenemy_test.go index 2ea025efec..7971ccb9ce 100644 --- a/e2e/newenemy/newenemy_test.go +++ b/e2e/newenemy/newenemy_test.go @@ -15,7 +15,9 @@ import ( "time" v0 "github.com/authzed/authzed-go/proto/authzed/api/v0" + v1 "github.com/authzed/authzed-go/proto/authzed/api/v1" "github.com/authzed/authzed-go/proto/authzed/api/v1alpha1" + "github.com/authzed/spicedb/pkg/zedtoken" "github.com/authzed/spicedb/pkg/zookie" "github.com/jackc/pgtype" "github.com/jackc/pgx/v4" @@ -166,8 +168,8 @@ func TestNoNewEnemy(t *testing.T) { protectedFn := func(t *testing.T, count int) { t.Logf("check spicedb is protected after %d attempts", count+1) protected, attempts := checkSchemaNoNewEnemy(ctx, t, schemaData, slowNodeId, crdb, protectedSpiceDb, count+1) - require.Equal(t, attempts, count) require.True(t, protected, "protection is enabled, but newenemy detected") + require.Equal(t, attempts, count) } statTest(t, 5, vulnerableFn, protectedFn) }) @@ -181,12 +183,12 @@ func TestNoNewEnemy(t *testing.T) { protected, attempts = checkDataNoNewEnemy(ctx, t, schemaData, slowNodeId, crdb, vulnerableSpiceDb, 100) } require.False(t, protected) - t.Logf("determined spicedb vulnerable in %d attempts", attempts+1) + t.Logf("determined spicedb vulnerable in %d attempts", attempts) return attempts } protectedFn := func(t *testing.T, count int) { - t.Logf("check spicedb is protected after %d attempts", count+1) - protected, attempts := checkDataNoNewEnemy(ctx, t, schemaData, slowNodeId, crdb, protectedSpiceDb, count+1) + t.Logf("check spicedb is protected after %d attempts", count) + protected, attempts := checkDataNoNewEnemy(ctx, t, schemaData, slowNodeId, crdb, protectedSpiceDb, count) require.Equal(t, attempts, count) require.True(t, protected, "protection is enabled, but newenemy detected") t.Logf("spicedb is protected after %d attempts", count) @@ -254,10 +256,10 @@ func iterationsForHighConfidence(samples []int) (iterations int) { // ensures overlapping transactions are linearizable. func checkDataNoNewEnemy(ctx context.Context, t testing.TB, schemaData []SchemaData, slowNodeId int, crdb cockroach.Cluster, spicedb spice.Cluster, maxAttempts int) (bool, int) { prefix := prefixForNode(ctx, crdb[1].Conn(), schemaData, slowNodeId) - t.Log("filling with data to span multiple ranges for prefix ", prefix) + t.Log("filling with data to span multiple ranges for prefix", prefix) fill(t, spicedb[0].Client().V0().ACL(), prefix, 4000, 1000) - for attempts := 0; attempts < maxAttempts; attempts++ { + for attempts := 1; attempts < maxAttempts+1; attempts++ { direct, exclude := generateTuple(prefix, objIdGenerator) // write to node 1 @@ -334,18 +336,18 @@ func checkDataNoNewEnemy(ctx context.Context, t testing.TB, schemaData []SchemaD // relation excluded: user // permission allowed = direct // } -// 2. Write resource:2#direct:@user:A -// 3. Update to this schema: +// 2. Write resource:1#direct:@user:A +// 3. Write resource:1#excluded@user:A +// 4. Update to this schema: // definition user {} // definition resource { // relation direct: user // relation excluded: user // permission allowed = direct - excluded // } -// 4. Write resource:1#excluded@user:A // 5. If the revision from (4) is before the timestamp for (3), then: -// check(fully consistent, resource:1#allowed@user:A) -// may succeed, when it should fail. The timestamps can be reversed +// check(revision from 3, resource:1#allowed@user:A) +// may fail, when it should succeed. The timestamps can be reversed // if the tx overlap protections are disabled, because cockroach only // ensures overlapping transactions are linearizable. // In this case, we don't get an explicit revision back from the @@ -353,55 +355,71 @@ func checkDataNoNewEnemy(ctx context.Context, t testing.TB, schemaData []SchemaD // fully consistent. func checkSchemaNoNewEnemy(ctx context.Context, t testing.TB, schemaData []SchemaData, slowNodeId int, crdb cockroach.Cluster, spicedb spice.Cluster, maxAttempts int) (bool, int) { prefix := prefixForNode(ctx, crdb[1].Conn(), schemaData, slowNodeId) - var b strings.Builder - for attempts := 0; attempts < maxAttempts; attempts++ { + require.NoError(t, schemaAllowTpl.Execute(&b, SchemaData{Prefixes: []string{prefix}})) + allowSchema := b.String() + b.Reset() + require.NoError(t, schemaTpl.Execute(&b, SchemaData{Prefixes: []string{prefix}})) + excludeSchema := b.String() + + for attempts := 1; attempts < maxAttempts+1; attempts++ { direct, exclude := generateTuple(prefix, objIdGenerator) // write the "allow" schema - b.Reset() - require.NoError(t, schemaAllowTpl.Execute(&b, SchemaData{Prefixes: []string{prefix}})) - _, err := spicedb[0].Client().V1Alpha1().Schema().WriteSchema(context.Background(), &v1alpha1.WriteSchemaRequest{ - Schema: b.String(), - }) - if err != nil { - t.Log(err.Error()) - continue - } + require.NoError(t, getErr(spicedb[0].Client().V1Alpha1().Schema().WriteSchema(context.Background(), &v1alpha1.WriteSchemaRequest{ + Schema: allowSchema, + }))) // write the "direct" tuple - _, err = spicedb[0].Client().V0().ACL().Write(testCtx, &v0.WriteRequest{ + require.NoError(t, getErr(spicedb[0].Client().V0().ACL().Write(testCtx, &v0.WriteRequest{ Updates: []*v0.RelationTupleUpdate{direct}, - }) - require.NoError(t, err) + }))) - time.Sleep(100 * time.Millisecond) // write the "excludes" tuple - r2, err := spicedb[0].Client().V0().ACL().Write(testCtx, &v0.WriteRequest{ + // writing to 1 primes the namespace cache on node 1 with the "allow" namespace + r2, err := spicedb[1].Client().V0().ACL().Write(testCtx, &v0.WriteRequest{ Updates: []*v0.RelationTupleUpdate{exclude}, }) require.NoError(t, err) - // write the "exclude" schema - b.Reset() - require.NoError(t, schemaTpl.Execute(&b, SchemaData{Prefixes: []string{prefix}})) - _, err = spicedb[1].Client().V1Alpha1().Schema().WriteSchema(context.Background(), &v1alpha1.WriteSchemaRequest{ - Schema: b.String(), - }) - require.NoError(t, err) + // write the "exclude" schema. If this write hits the slow crdb node, it + // can get a revision in between the direct and exclude tuple writes + // which will cause check to fail, when it should succeed + require.NoError(t, getErr(spicedb[1].Client().V1Alpha1().Schema().WriteSchema(testCtx, &v1alpha1.WriteSchemaRequest{ + Schema: excludeSchema, + }))) - canHas, err := spicedb[1].Client().V0().ACL().Check(context.Background(), &v0.CheckRequest{ - TestUserset: &v0.ObjectAndRelation{ - Namespace: direct.Tuple.ObjectAndRelation.Namespace, - ObjectId: direct.Tuple.ObjectAndRelation.ObjectId, - Relation: "allowed", - }, - User: direct.Tuple.GetUser(), - AtRevision: r2.GetRevision(), - }) + rev, err := zookie.DecodeRevision(r2.GetRevision()) require.NoError(t, err) - if canHas.IsMember { + var canHas *v1.CheckPermissionResponse + checkAccess := func() bool { + var err error + + canHas, err = spicedb[0].Client().V1().Permissions().CheckPermission(testCtx, &v1.CheckPermissionRequest{ + Consistency: &v1.Consistency{ + Requirement: &v1.Consistency_AtExactSnapshot{AtExactSnapshot: zedtoken.NewFromRevision(rev)}, + }, + Resource: &v1.ObjectReference{ + ObjectType: direct.Tuple.ObjectAndRelation.Namespace, + ObjectId: direct.Tuple.ObjectAndRelation.ObjectId, + }, + Permission: "allowed", + Subject: &v1.SubjectReference{ + Object: &v1.ObjectReference{ + ObjectType: direct.Tuple.User.GetUserset().Namespace, + ObjectId: direct.Tuple.User.GetUserset().ObjectId, + }, + }, + }) + if err != nil { + t.Log(err) + } + return err == nil + } + require.Eventually(t, checkAccess, 10*time.Second, 10*time.Millisecond) + + if canHas.Permissionship == v1.CheckPermissionResponse_PERMISSIONSHIP_NO_PERMISSION { t.Log("service is subject to the new enemy problem") return false, attempts } @@ -413,7 +431,7 @@ func checkSchemaNoNewEnemy(ctx context.Context, t testing.TB, schemaData []Schem continue } } - return true, maxAttempts + return true, maxAttempts + 1 } func BenchmarkBatchWrites(b *testing.B) { @@ -621,3 +639,17 @@ func leaderFromRangeRow(rows pgx.Rows) int { } return leaseHolder } + +func getErr(vals ...interface{}) error { + if len(vals) == 0 { + return nil + } + err := vals[len(vals)-1] + if err == nil { + return nil + } + if err, ok := err.(error); ok { + return err + } + return nil +} diff --git a/internal/datastore/crdb/namespace.go b/internal/datastore/crdb/namespace.go index 07a543295c..5edab10af2 100644 --- a/internal/datastore/crdb/namespace.go +++ b/internal/datastore/crdb/namespace.go @@ -44,6 +44,8 @@ var ( func (cds *crdbDatastore) WriteNamespace(ctx context.Context, newConfig *v0.NamespaceDefinition) (datastore.Revision, error) { var hlcNow decimal.Decimal if err := cds.execute(ctx, cds.conn, pgx.TxOptions{}, func(tx pgx.Tx) error { + keySet := newKeySet() + cds.AddOverlapKey(keySet, newConfig.Name) serialized, err := proto.Marshal(newConfig) if err != nil { return err @@ -54,7 +56,12 @@ func (cds *crdbDatastore) WriteNamespace(ctx context.Context, newConfig *v0.Name if err != nil { return err } - return cds.conn.QueryRow( + for k := range keySet { + if _, err := tx.Exec(ctx, queryTouchTransaction, k); err != nil { + return err + } + } + return tx.QueryRow( datastore.SeparateContextWithTracing(ctx), writeSQL, writeArgs..., ).Scan(&hlcNow) }); err != nil {