From 8d54217284c4fcc649da7cf11b6a1c33efaf1925 Mon Sep 17 00:00:00 2001 From: mgritter Date: Tue, 16 Mar 2021 18:21:41 -0700 Subject: [PATCH 01/10] Acquire a per-lock lease to make renew and revoke atomic wrt each other. This means we don't have to hold pendingLock during I/O. --- vault/expiration.go | 134 +++++++++++++++++++++++++++++++------------- 1 file changed, 95 insertions(+), 39 deletions(-) diff --git a/vault/expiration.go b/vault/expiration.go index a555fc41ce118..2077b66bc6674 100644 --- a/vault/expiration.go +++ b/vault/expiration.go @@ -97,6 +97,9 @@ type ExpirationManager struct { leaseCount int pendingLock sync.RWMutex + // A sync.Lock for every active leaseID + lockPerLease sync.Map + // The uniquePolicies map holds policy sets, so they can // be deduplicated. It is periodically emptied to prevent // unbounded growth. @@ -355,6 +358,8 @@ func NewExpirationManager(c *Core, view *BarrierView, e ExpireLeaseStrategy, log leaseCount: 0, tidyLock: new(int32), + lockPerLease: sync.Map{}, + uniquePolicies: make(map[string][]string), emptyUniquePolicies: time.NewTicker(7 * 24 * time.Hour), @@ -853,6 +858,10 @@ func (m *ExpirationManager) Revoke(ctx context.Context, leaseID string) error { func (m *ExpirationManager) LazyRevoke(ctx context.Context, leaseID string) error { defer metrics.MeasureSince([]string{"expire", "lazy-revoke"}, time.Now()) + leaseLock := m.lockForLeaseID(leaseID) + leaseLock.Lock() + defer leaseLock.Unlock() + // Load the entry le, err := m.loadEntry(ctx, leaseID) if err != nil { @@ -865,16 +874,10 @@ func (m *ExpirationManager) LazyRevoke(ctx context.Context, leaseID string) erro } le.ExpireTime = time.Now() - { - m.pendingLock.Lock() - if err := m.persistEntry(ctx, le); err != nil { - m.pendingLock.Unlock() - return err - } - - m.updatePendingInternal(le) - m.pendingLock.Unlock() + if err := m.persistEntry(ctx, le); err != nil { + return err } + m.updatePending(le) return nil } @@ -884,6 +887,11 @@ func (m *ExpirationManager) LazyRevoke(ctx context.Context, leaseID string) erro func (m *ExpirationManager) revokeCommon(ctx context.Context, leaseID string, force, skipToken bool) error { defer metrics.MeasureSince([]string{"expire", "revoke-common"}, time.Now()) + // Acquire lease for this lock + leaseLock := m.lockForLeaseID(leaseID) + leaseLock.Lock() + defer leaseLock.Unlock() + // Load the entry le, err := m.loadEntry(ctx, leaseID) if err != nil { @@ -913,6 +921,8 @@ func (m *ExpirationManager) revokeCommon(ctx context.Context, leaseID string, fo return err } + m.deleteLockForLease(leaseID) + // Delete the secondary index, but only if it's a leased secret (not auth) if le.Secret != nil { if err := m.removeIndexByToken(ctx, le); err != nil { @@ -982,6 +992,12 @@ func (m *ExpirationManager) RevokeByToken(ctx context.Context, te *logical.Token // Revoke all the keys for _, leaseID := range existing { + // Lock the child lease... save up all the unlocks for the end + // of the function. + leaseLock := m.lockForLeaseID(leaseID) + leaseLock.Lock() + defer leaseLock.Unlock() + // Load the entry le, err := m.loadEntry(ctx, leaseID) if err != nil { @@ -994,16 +1010,10 @@ func (m *ExpirationManager) RevokeByToken(ctx context.Context, te *logical.Token if le != nil { le.ExpireTime = time.Now() - { - m.pendingLock.Lock() - if err := m.persistEntry(ctx, le); err != nil { - m.pendingLock.Unlock() - return err - } - - m.updatePendingInternal(le) - m.pendingLock.Unlock() + if err := m.persistEntry(ctx, le); err != nil { + return err } + m.updatePending(le) } } @@ -1070,6 +1080,7 @@ func (m *ExpirationManager) revokePrefixCommon(ctx context.Context, prefix strin // Revoke all the keys for idx, suffix := range existing { leaseID := prefix + suffix + // No need to acquire per-lease lock here, one of these two will do it. switch { case sync: if err := m.revokeCommon(ctx, leaseID, force, false); err != nil { @@ -1090,6 +1101,11 @@ func (m *ExpirationManager) revokePrefixCommon(ctx context.Context, prefix strin func (m *ExpirationManager) Renew(ctx context.Context, leaseID string, increment time.Duration) (*logical.Response, error) { defer metrics.MeasureSince([]string{"expire", "renew"}, time.Now()) + // Acquire lock for this lease + leaseLock := m.lockForLeaseID(leaseID) + leaseLock.Lock() + defer leaseLock.Unlock() + // Load the entry le, err := m.loadEntry(ctx, leaseID) if err != nil { @@ -1182,18 +1198,13 @@ func (m *ExpirationManager) Renew(ctx context.Context, leaseID string, increment } } - { - m.pendingLock.Lock() - if err := m.persistEntry(ctx, le); err != nil { - m.pendingLock.Unlock() - return nil, err - } - - // Update the expiration time - m.updatePendingInternal(le) - m.pendingLock.Unlock() + if err := m.persistEntry(ctx, le); err != nil { + return nil, err } + // Update the expiration time + m.updatePending(le) + // Return the response return resp, nil } @@ -1232,6 +1243,11 @@ func (m *ExpirationManager) RenewToken(ctx context.Context, req *logical.Request leaseID = fmt.Sprintf("%s.%s", leaseID, ns.ID) } + // Acquire lock for this lease + leaseLock := m.lockForLeaseID(leaseID) + leaseLock.Lock() + defer leaseLock.Unlock() + // Load the entry le, err := m.loadEntry(ctx, leaseID) if err != nil { @@ -1297,17 +1313,10 @@ func (m *ExpirationManager) RenewToken(ctx context.Context, req *logical.Request le.ExpireTime = resp.Auth.ExpirationTime() le.LastRenewalTime = time.Now() - { - m.pendingLock.Lock() - if err := m.persistEntry(ctx, le); err != nil { - m.pendingLock.Unlock() - return nil, err - } - - // Update the expiration time - m.updatePendingInternal(le) - m.pendingLock.Unlock() + if err := m.persistEntry(ctx, le); err != nil { + return nil, err } + m.updatePending(le) retResp.Auth = resp.Auth return retResp, nil @@ -1385,6 +1394,8 @@ func (m *ExpirationManager) Register(ctx context.Context, req *logical.Request, if err := m.removeIndexByToken(ctx, le); err != nil { retErr = multierror.Append(retErr, errwrap.Wrapf("an additional error was encountered removing lease indexes associated with the newly-generated secret: {{err}}", err)) } + + m.deleteLockForLease(leaseID) } }() @@ -1403,6 +1414,14 @@ func (m *ExpirationManager) Register(ctx context.Context, req *logical.Request, } } + // Acquire the lock here so persistEntry and updatePending are atomic, + // although it is *very unlikely* that anybody could grab the lease ID + // before this function returns. (They could find it in an index, or + // find it in a list.) + leaseLock := m.lockForLeaseID(leaseID) + leaseLock.Lock() + defer leaseLock.Unlock() + // Encode the entry if err := m.persistEntry(ctx, le); err != nil { return "", err @@ -1496,6 +1515,10 @@ func (m *ExpirationManager) RegisterAuth(ctx context.Context, te *logical.TokenE Version: 1, } + leaseLock := m.lockForLeaseID(leaseID) + leaseLock.Lock() + defer leaseLock.Unlock() + // Encode the entry if err := m.persistEntry(ctx, &le); err != nil { return err @@ -1634,6 +1657,30 @@ func (m *ExpirationManager) uniquePoliciesGc() { } } +// Placing a lock in pendingMap means that we need to work very hard on reload +// to only create one lock. Instead, we'll create locks on-demand in an atomic fashion. +// +// Acquiring a lock from a leaseEntry is a bad idea because it could change +// between loading and acquirung the lock. So we only provide an ID-based map, and the +// locking discipline should be: +// 1. Lock lease +// 2. Load, or attempt to load, leaseEntry +// 3. Modify leaseEntry and pendingMap (atomic wrt operations on this lease) +// 4. Unlock lease +// +// The lock must be removed from the map when the lease is deleted, or is +// found to not exist in storage. loadEntry does this whenever it returns +// nil, but we should also do it in revokeCommon(). +func (m *ExpirationManager) lockForLeaseID(id string) *sync.Mutex { + mutex := &sync.Mutex{} + lock, _ := m.lockPerLease.LoadOrStore(id, mutex) + return lock.(*sync.Mutex) +} + +func (m *ExpirationManager) deleteLockForLease(id string) { + m.lockPerLease.Delete(id) +} + // updatePending is used to update a pending invocation for a lease func (m *ExpirationManager) updatePending(le *leaseEntry) { m.pendingLock.Lock() @@ -1815,7 +1862,16 @@ func (m *ExpirationManager) loadEntry(ctx context.Context, leaseID string) (*lea } else { ctx = namespace.ContextWithNamespace(ctx, namespace.RootNamespace) } - return m.loadEntryInternal(ctx, leaseID, restoreMode, true) + + // + // If a lease entry is nil, proactively delete the lease lock, in case we + // created one erroneously. + leaseEntry, err := m.loadEntryInternal(ctx, leaseID, restoreMode, true) + if err != nil && leaseEntry == nil { + m.deleteLockForLease(leaseID) + } + return leaseEntry, err + } // loadEntryInternal is used when you need to load an entry but also need to From 1704a8d1c32378cc2c60f022aaecbd9b6ffe93e4 Mon Sep 17 00:00:00 2001 From: mgritter Date: Wed, 17 Mar 2021 12:40:21 -0700 Subject: [PATCH 02/10] Attempted fix for deadlock in token revocation. --- vault/expiration.go | 43 +++++++++++++++++-------------------------- 1 file changed, 17 insertions(+), 26 deletions(-) diff --git a/vault/expiration.go b/vault/expiration.go index 2077b66bc6674..30e60eb9a8c6f 100644 --- a/vault/expiration.go +++ b/vault/expiration.go @@ -857,7 +857,11 @@ func (m *ExpirationManager) Revoke(ctx context.Context, leaseID string) error { // it triggers a return of a 202. func (m *ExpirationManager) LazyRevoke(ctx context.Context, leaseID string) error { defer metrics.MeasureSince([]string{"expire", "lazy-revoke"}, time.Now()) + return m.lazyRevokeInternal(ctx, leaseID) +} +// Mark a lease as expiring immediately +func (m *ExpirationManager) lazyRevokeInternal(ctx context.Context, leaseID string) error { leaseLock := m.lockForLeaseID(leaseID) leaseLock.Lock() defer leaseLock.Unlock() @@ -887,10 +891,15 @@ func (m *ExpirationManager) LazyRevoke(ctx context.Context, leaseID string) erro func (m *ExpirationManager) revokeCommon(ctx context.Context, leaseID string, force, skipToken bool) error { defer metrics.MeasureSince([]string{"expire", "revoke-common"}, time.Now()) - // Acquire lease for this lock - leaseLock := m.lockForLeaseID(leaseID) - leaseLock.Lock() - defer leaseLock.Unlock() + if !skipToken { + // Acquire lease for this lock + // If skipToken is true, then we're being called via RevokeByToken, so + // probably (always?) the lock is already held, and if we re-acquire it + // we get deadlock. + leaseLock := m.lockForLeaseID(leaseID) + leaseLock.Lock() + defer leaseLock.Unlock() + } // Load the entry le, err := m.loadEntry(ctx, leaseID) @@ -972,7 +981,8 @@ func (m *ExpirationManager) RevokePrefix(ctx context.Context, prefix string, syn // RevokeByToken is used to revoke all the secrets issued with a given token. // This is done by using the secondary index. It also removes the lease entry // for the token itself. As a result it should *ONLY* ever be called from the -// token store's revokeSalted function. +// token store's revokeInternal function. +// (NB: it's called by token tidy as well.) func (m *ExpirationManager) RevokeByToken(ctx context.Context, te *logical.TokenEntry) error { defer metrics.MeasureSince([]string{"expire", "revoke-by-token"}, time.Now()) tokenNS, err := NamespaceByID(ctx, te.NamespaceID, m.core) @@ -990,31 +1000,12 @@ func (m *ExpirationManager) RevokeByToken(ctx context.Context, te *logical.Token return errwrap.Wrapf("failed to scan for leases: {{err}}", err) } - // Revoke all the keys + // Revoke all the keys by marking them expired for _, leaseID := range existing { - // Lock the child lease... save up all the unlocks for the end - // of the function. - leaseLock := m.lockForLeaseID(leaseID) - leaseLock.Lock() - defer leaseLock.Unlock() - - // Load the entry - le, err := m.loadEntry(ctx, leaseID) + err := m.lazyRevokeInternal(ctx, leaseID) if err != nil { return err } - - // If there's a lease, set expiration to now, persist, and call - // updatePending to hand off revocation to the expiration manager's pending - // timer map - if le != nil { - le.ExpireTime = time.Now() - - if err := m.persistEntry(ctx, le); err != nil { - return err - } - m.updatePending(le) - } } // te.Path should never be empty, but we check just in case From 044d252a186abf193332584252988c51b3f0f512 Mon Sep 17 00:00:00 2001 From: mgritter Date: Wed, 17 Mar 2021 12:50:50 -0700 Subject: [PATCH 03/10] Comment fix. --- vault/expiration.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/vault/expiration.go b/vault/expiration.go index 30e60eb9a8c6f..bd1d626bc2a5b 100644 --- a/vault/expiration.go +++ b/vault/expiration.go @@ -893,9 +893,10 @@ func (m *ExpirationManager) revokeCommon(ctx context.Context, leaseID string, fo if !skipToken { // Acquire lease for this lock - // If skipToken is true, then we're being called via RevokeByToken, so - // probably (always?) the lock is already held, and if we re-acquire it - // we get deadlock. + // If skipToken is true, then we're either being (1) called via RevokeByToken, so + // probably the lock is already held, and if we re-acquire we get deadlock, or + // (2) called by tidy, in which case the lock is not held. + // Is it worth separating those cases out, or is (2) OK to proceed unlocked? leaseLock := m.lockForLeaseID(leaseID) leaseLock.Lock() defer leaseLock.Unlock() From 138f108bc85d726e3f20a54d585a330a75cd5294 Mon Sep 17 00:00:00 2001 From: mgritter Date: Fri, 19 Mar 2021 11:48:20 -0700 Subject: [PATCH 04/10] Fix error checking in loadEntry. --- vault/expiration.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vault/expiration.go b/vault/expiration.go index bd1d626bc2a5b..6656367258219 100644 --- a/vault/expiration.go +++ b/vault/expiration.go @@ -1855,11 +1855,11 @@ func (m *ExpirationManager) loadEntry(ctx context.Context, leaseID string) (*lea ctx = namespace.ContextWithNamespace(ctx, namespace.RootNamespace) } - // // If a lease entry is nil, proactively delete the lease lock, in case we // created one erroneously. + // If there was an error, we don't know whether the lease entry exists or not. leaseEntry, err := m.loadEntryInternal(ctx, leaseID, restoreMode, true) - if err != nil && leaseEntry == nil { + if err == nil && leaseEntry == nil { m.deleteLockForLease(leaseID) } return leaseEntry, err From 364f6eb5dd6279c6bbc9d0f0ee9741f8a113e4b0 Mon Sep 17 00:00:00 2001 From: Brian Kassouf Date: Mon, 7 Jun 2021 15:19:49 -0700 Subject: [PATCH 05/10] Add benchmark --- vault/expiration_test.go | 49 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/vault/expiration_test.go b/vault/expiration_test.go index 2c94d249d4204..fe9f246827bcd 100644 --- a/vault/expiration_test.go +++ b/vault/expiration_test.go @@ -612,6 +612,7 @@ func benchmarkExpirationBackend(b *testing.B, physicalBackend physical.Backend, Path: "prod/aws/" + pathUUID, ClientToken: "root", } + req.SetTokenEntry(&logical.TokenEntry{ID: "root", NamespaceID: "root"}) resp := &logical.Response{ Secret: &logical.Secret{ LeaseOptions: logical.LeaseOptions{ @@ -623,7 +624,7 @@ func benchmarkExpirationBackend(b *testing.B, physicalBackend physical.Backend, "secret_key": "abcd", }, } - _, err = exp.Register(context.Background(), req, resp) + _, err = exp.Register(namespace.RootContext(nil), req, resp) if err != nil { b.Fatalf("err: %v", err) } @@ -646,6 +647,52 @@ func benchmarkExpirationBackend(b *testing.B, physicalBackend physical.Backend, b.StopTimer() } +func BenchmarkExpiration_Create_Leases(b *testing.B) { + logger := logging.NewVaultLogger(log.Trace) + inm, err := inmem.NewInmem(nil, logger) + if err != nil { + b.Fatal(err) + } + + c, _, _ := TestCoreUnsealedBackend(b, inm) + exp := c.expiration + noop := &NoopBackend{} + view := NewBarrierView(c.barrier, "logical/") + meUUID, err := uuid.GenerateUUID() + if err != nil { + b.Fatal(err) + } + err = exp.router.Mount(noop, "prod/aws/", &MountEntry{Path: "prod/aws/", Type: "noop", UUID: meUUID, Accessor: "noop-accessor", namespace: namespace.RootNamespace}, view) + if err != nil { + b.Fatal(err) + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + req := &logical.Request{ + Operation: logical.ReadOperation, + Path: fmt.Sprintf("prod/aws/%d", i), + ClientToken: "root", + } + req.SetTokenEntry(&logical.TokenEntry{ID: "root", NamespaceID: "root"}) + resp := &logical.Response{ + Secret: &logical.Secret{ + LeaseOptions: logical.LeaseOptions{ + TTL: 400 * time.Second, + }, + }, + Data: map[string]interface{}{ + "access_key": "xyz", + "secret_key": "abcd", + }, + } + _, err = exp.Register(namespace.RootContext(nil), req, resp) + if err != nil { + b.Fatalf("err: %v", err) + } + } +} + func TestExpiration_Restore(t *testing.T) { c, _, _ := TestCoreUnsealed(t) exp := c.expiration From 56cd7139b5216a5f51496b04889a97cd79704a8a Mon Sep 17 00:00:00 2001 From: Brian Kassouf Date: Mon, 7 Jun 2021 18:34:28 -0700 Subject: [PATCH 06/10] Add a few additional locking locations --- vault/expiration.go | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/vault/expiration.go b/vault/expiration.go index 7ec5505fe6a28..5a4af5d5d4b97 100644 --- a/vault/expiration.go +++ b/vault/expiration.go @@ -983,6 +983,7 @@ func (m *ExpirationManager) revokeCommon(ctx context.Context, leaseID string, fo return err } + // Lease has been removed, also remove the in-memory lock. m.deleteLockForLease(leaseID) // Delete the secondary index, but only if it's a leased secret (not auth) @@ -2180,6 +2181,15 @@ func (m *ExpirationManager) CreateOrFetchRevocationLeaseByToken(ctx context.Cont // If there's no associated leaseEntry for the token, we create one if le == nil { + + // Acquire the lock here so persistEntry and updatePending are atomic, + // although it is *very unlikely* that anybody could grab the lease ID + // before this function returns. (They could find it in an index, or + // find it in a list.) + leaseLock := m.lockForLeaseID(leaseID) + leaseLock.Lock() + defer leaseLock.Unlock() + auth := &logical.Auth{ ClientToken: te.ID, LeaseOptions: logical.LeaseOptions{ @@ -2206,6 +2216,7 @@ func (m *ExpirationManager) CreateOrFetchRevocationLeaseByToken(ctx context.Cont // Encode the entry if err := m.persistEntry(ctx, le); err != nil { + m.deleteLockForLease(leaseID) return "", err } } From 886b41fa1be6d706dc12bb85296f1ca4f4ff777e Mon Sep 17 00:00:00 2001 From: Brian Kassouf Date: Tue, 8 Jun 2021 09:56:27 -0700 Subject: [PATCH 07/10] Improve benchmark slightly --- vault/expiration_test.go | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/vault/expiration_test.go b/vault/expiration_test.go index fe9f246827bcd..add6696791e70 100644 --- a/vault/expiration_test.go +++ b/vault/expiration_test.go @@ -666,26 +666,26 @@ func BenchmarkExpiration_Create_Leases(b *testing.B) { if err != nil { b.Fatal(err) } + req := &logical.Request{ + Operation: logical.ReadOperation, + ClientToken: "root", + } + req.SetTokenEntry(&logical.TokenEntry{ID: "root", NamespaceID: "root"}) + resp := &logical.Response{ + Secret: &logical.Secret{ + LeaseOptions: logical.LeaseOptions{ + TTL: 400 * time.Second, + }, + }, + Data: map[string]interface{}{ + "access_key": "xyz", + "secret_key": "abcd", + }, + } b.ResetTimer() for i := 0; i < b.N; i++ { - req := &logical.Request{ - Operation: logical.ReadOperation, - Path: fmt.Sprintf("prod/aws/%d", i), - ClientToken: "root", - } - req.SetTokenEntry(&logical.TokenEntry{ID: "root", NamespaceID: "root"}) - resp := &logical.Response{ - Secret: &logical.Secret{ - LeaseOptions: logical.LeaseOptions{ - TTL: 400 * time.Second, - }, - }, - Data: map[string]interface{}{ - "access_key": "xyz", - "secret_key": "abcd", - }, - } + req.Path = fmt.Sprintf("prod/aws/%d", i) _, err = exp.Register(namespace.RootContext(nil), req, resp) if err != nil { b.Fatalf("err: %v", err) From 2105dba3169979418d3f0c97598ff01adcea6025 Mon Sep 17 00:00:00 2001 From: Brian Kassouf Date: Tue, 8 Jun 2021 10:23:54 -0700 Subject: [PATCH 08/10] Update vault/expiration.go Co-authored-by: swayne275 --- vault/expiration.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vault/expiration.go b/vault/expiration.go index 5a4af5d5d4b97..73f92827aa447 100644 --- a/vault/expiration.go +++ b/vault/expiration.go @@ -944,7 +944,7 @@ func (m *ExpirationManager) revokeCommon(ctx context.Context, leaseID string, fo defer metrics.MeasureSince([]string{"expire", "revoke-common"}, time.Now()) if !skipToken { - // Acquire lease for this lock + // Acquire lock for this lease // If skipToken is true, then we're either being (1) called via RevokeByToken, so // probably the lock is already held, and if we re-acquire we get deadlock, or // (2) called by tidy, in which case the lock is not held. From 3fbe11a3b7423e2b70be6be67e121e49f0aa6026 Mon Sep 17 00:00:00 2001 From: Brian Kassouf Date: Tue, 8 Jun 2021 10:25:42 -0700 Subject: [PATCH 09/10] Update vault/expiration.go Co-authored-by: swayne275 --- vault/expiration.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vault/expiration.go b/vault/expiration.go index 73f92827aa447..e987eadc89cd3 100644 --- a/vault/expiration.go +++ b/vault/expiration.go @@ -1735,7 +1735,7 @@ func (m *ExpirationManager) uniquePoliciesGc() { // to only create one lock. Instead, we'll create locks on-demand in an atomic fashion. // // Acquiring a lock from a leaseEntry is a bad idea because it could change -// between loading and acquirung the lock. So we only provide an ID-based map, and the +// between loading and acquiring the lock. So we only provide an ID-based map, and the // locking discipline should be: // 1. Lock lease // 2. Load, or attempt to load, leaseEntry From d983c06cfd532b1bfa4f4190c9223d68a8ee2415 Mon Sep 17 00:00:00 2001 From: Brian Kassouf Date: Tue, 8 Jun 2021 10:34:30 -0700 Subject: [PATCH 10/10] Add a lease lock into tidy --- vault/expiration.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/vault/expiration.go b/vault/expiration.go index 5a4af5d5d4b97..4a41783b66a45 100644 --- a/vault/expiration.go +++ b/vault/expiration.go @@ -646,7 +646,11 @@ func (m *ExpirationManager) Tidy(ctx context.Context) error { if revokeLease { // Force the revocation and skip going through the token store // again + + leaseLock := m.lockForLeaseID(leaseID) + leaseLock.Lock() err = m.revokeCommon(ctx, leaseID, true, true) + leaseLock.Unlock() if err != nil { tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to revoke an invalid lease with ID %q: %w", leaseID, err)) return @@ -947,8 +951,7 @@ func (m *ExpirationManager) revokeCommon(ctx context.Context, leaseID string, fo // Acquire lease for this lock // If skipToken is true, then we're either being (1) called via RevokeByToken, so // probably the lock is already held, and if we re-acquire we get deadlock, or - // (2) called by tidy, in which case the lock is not held. - // Is it worth separating those cases out, or is (2) OK to proceed unlocked? + // (2) called by tidy, in which case the lock is held by the tidy thread. leaseLock := m.lockForLeaseID(leaseID) leaseLock.Lock() defer leaseLock.Unlock()