Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Acquire a per-lock lease to make renew and revoke atomic wrt each other. #11122

Merged
merged 13 commits into from Jun 10, 2021
134 changes: 95 additions & 39 deletions vault/expiration.go
Expand Up @@ -97,6 +97,9 @@ type ExpirationManager struct {
leaseCount int
pendingLock sync.RWMutex

// A sync.Lock for every active leaseID
lockPerLease sync.Map

// The uniquePolicies map holds policy sets, so they can
// be deduplicated. It is periodically emptied to prevent
// unbounded growth.
Expand Down Expand Up @@ -355,6 +358,8 @@ func NewExpirationManager(c *Core, view *BarrierView, e ExpireLeaseStrategy, log
leaseCount: 0,
tidyLock: new(int32),

lockPerLease: sync.Map{},

uniquePolicies: make(map[string][]string),
emptyUniquePolicies: time.NewTicker(7 * 24 * time.Hour),

Expand Down Expand Up @@ -853,6 +858,10 @@ func (m *ExpirationManager) Revoke(ctx context.Context, leaseID string) error {
func (m *ExpirationManager) LazyRevoke(ctx context.Context, leaseID string) error {
defer metrics.MeasureSince([]string{"expire", "lazy-revoke"}, time.Now())

leaseLock := m.lockForLeaseID(leaseID)
leaseLock.Lock()
defer leaseLock.Unlock()

// Load the entry
le, err := m.loadEntry(ctx, leaseID)
if err != nil {
Expand All @@ -865,16 +874,10 @@ func (m *ExpirationManager) LazyRevoke(ctx context.Context, leaseID string) erro
}

le.ExpireTime = time.Now()
{
m.pendingLock.Lock()
if err := m.persistEntry(ctx, le); err != nil {
m.pendingLock.Unlock()
return err
}

m.updatePendingInternal(le)
m.pendingLock.Unlock()
if err := m.persistEntry(ctx, le); err != nil {
return err
}
m.updatePending(le)

return nil
}
Expand All @@ -884,6 +887,11 @@ func (m *ExpirationManager) LazyRevoke(ctx context.Context, leaseID string) erro
func (m *ExpirationManager) revokeCommon(ctx context.Context, leaseID string, force, skipToken bool) error {
defer metrics.MeasureSince([]string{"expire", "revoke-common"}, time.Now())

// Acquire lease for this lock
leaseLock := m.lockForLeaseID(leaseID)
leaseLock.Lock()
defer leaseLock.Unlock()

// Load the entry
le, err := m.loadEntry(ctx, leaseID)
if err != nil {
Expand Down Expand Up @@ -913,6 +921,8 @@ func (m *ExpirationManager) revokeCommon(ctx context.Context, leaseID string, fo
return err
}

m.deleteLockForLease(leaseID)

// Delete the secondary index, but only if it's a leased secret (not auth)
if le.Secret != nil {
if err := m.removeIndexByToken(ctx, le); err != nil {
Expand Down Expand Up @@ -982,6 +992,12 @@ func (m *ExpirationManager) RevokeByToken(ctx context.Context, te *logical.Token

// Revoke all the keys
for _, leaseID := range existing {
// Lock the child lease... save up all the unlocks for the end
// of the function.
leaseLock := m.lockForLeaseID(leaseID)
leaseLock.Lock()
defer leaseLock.Unlock()

// Load the entry
le, err := m.loadEntry(ctx, leaseID)
if err != nil {
Expand All @@ -994,16 +1010,10 @@ func (m *ExpirationManager) RevokeByToken(ctx context.Context, te *logical.Token
if le != nil {
le.ExpireTime = time.Now()

{
m.pendingLock.Lock()
if err := m.persistEntry(ctx, le); err != nil {
m.pendingLock.Unlock()
return err
}

m.updatePendingInternal(le)
m.pendingLock.Unlock()
if err := m.persistEntry(ctx, le); err != nil {
return err
}
m.updatePending(le)
}
}

Expand Down Expand Up @@ -1070,6 +1080,7 @@ func (m *ExpirationManager) revokePrefixCommon(ctx context.Context, prefix strin
// Revoke all the keys
for idx, suffix := range existing {
leaseID := prefix + suffix
// No need to acquire per-lease lock here, one of these two will do it.
switch {
case sync:
if err := m.revokeCommon(ctx, leaseID, force, false); err != nil {
Expand All @@ -1090,6 +1101,11 @@ func (m *ExpirationManager) revokePrefixCommon(ctx context.Context, prefix strin
func (m *ExpirationManager) Renew(ctx context.Context, leaseID string, increment time.Duration) (*logical.Response, error) {
defer metrics.MeasureSince([]string{"expire", "renew"}, time.Now())

// Acquire lock for this lease
leaseLock := m.lockForLeaseID(leaseID)
leaseLock.Lock()
defer leaseLock.Unlock()

// Load the entry
le, err := m.loadEntry(ctx, leaseID)
briankassouf marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
Expand Down Expand Up @@ -1182,18 +1198,13 @@ func (m *ExpirationManager) Renew(ctx context.Context, leaseID string, increment
}
}

{
m.pendingLock.Lock()
if err := m.persistEntry(ctx, le); err != nil {
m.pendingLock.Unlock()
return nil, err
}

// Update the expiration time
m.updatePendingInternal(le)
m.pendingLock.Unlock()
if err := m.persistEntry(ctx, le); err != nil {
return nil, err
}

// Update the expiration time
m.updatePending(le)

// Return the response
return resp, nil
}
Expand Down Expand Up @@ -1232,6 +1243,11 @@ func (m *ExpirationManager) RenewToken(ctx context.Context, req *logical.Request
leaseID = fmt.Sprintf("%s.%s", leaseID, ns.ID)
}

// Acquire lock for this lease
leaseLock := m.lockForLeaseID(leaseID)
leaseLock.Lock()
mgritter marked this conversation as resolved.
Show resolved Hide resolved
defer leaseLock.Unlock()

// Load the entry
le, err := m.loadEntry(ctx, leaseID)
briankassouf marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
Expand Down Expand Up @@ -1297,17 +1313,10 @@ func (m *ExpirationManager) RenewToken(ctx context.Context, req *logical.Request
le.ExpireTime = resp.Auth.ExpirationTime()
le.LastRenewalTime = time.Now()

{
m.pendingLock.Lock()
if err := m.persistEntry(ctx, le); err != nil {
m.pendingLock.Unlock()
return nil, err
}

// Update the expiration time
m.updatePendingInternal(le)
m.pendingLock.Unlock()
if err := m.persistEntry(ctx, le); err != nil {
return nil, err
}
m.updatePending(le)

retResp.Auth = resp.Auth
return retResp, nil
Expand Down Expand Up @@ -1385,6 +1394,8 @@ func (m *ExpirationManager) Register(ctx context.Context, req *logical.Request,
if err := m.removeIndexByToken(ctx, le); err != nil {
retErr = multierror.Append(retErr, errwrap.Wrapf("an additional error was encountered removing lease indexes associated with the newly-generated secret: {{err}}", err))
}

m.deleteLockForLease(leaseID)
}
}()

Expand All @@ -1403,6 +1414,14 @@ func (m *ExpirationManager) Register(ctx context.Context, req *logical.Request,
}
}

// Acquire the lock here so persistEntry and updatePending are atomic,
// although it is *very unlikely* that anybody could grab the lease ID
// before this function returns. (They could find it in an index, or
// find it in a list.)
leaseLock := m.lockForLeaseID(leaseID)
leaseLock.Lock()
defer leaseLock.Unlock()

// Encode the entry
if err := m.persistEntry(ctx, le); err != nil {
return "", err
Expand Down Expand Up @@ -1496,6 +1515,10 @@ func (m *ExpirationManager) RegisterAuth(ctx context.Context, te *logical.TokenE
Version: 1,
}

leaseLock := m.lockForLeaseID(leaseID)
leaseLock.Lock()
defer leaseLock.Unlock()

// Encode the entry
if err := m.persistEntry(ctx, &le); err != nil {
return err
Expand Down Expand Up @@ -1634,6 +1657,30 @@ func (m *ExpirationManager) uniquePoliciesGc() {
}
}

// Placing a lock in pendingMap means that we need to work very hard on reload
// to only create one lock. Instead, we'll create locks on-demand in an atomic fashion.
//
// Acquiring a lock from a leaseEntry is a bad idea because it could change
// between loading and acquirung the lock. So we only provide an ID-based map, and the
briankassouf marked this conversation as resolved.
Show resolved Hide resolved
// locking discipline should be:
// 1. Lock lease
// 2. Load, or attempt to load, leaseEntry
// 3. Modify leaseEntry and pendingMap (atomic wrt operations on this lease)
// 4. Unlock lease
//
// The lock must be removed from the map when the lease is deleted, or is
// found to not exist in storage. loadEntry does this whenever it returns
// nil, but we should also do it in revokeCommon().
func (m *ExpirationManager) lockForLeaseID(id string) *sync.Mutex {
mutex := &sync.Mutex{}
lock, _ := m.lockPerLease.LoadOrStore(id, mutex)
return lock.(*sync.Mutex)
}

func (m *ExpirationManager) deleteLockForLease(id string) {
m.lockPerLease.Delete(id)
}

// updatePending is used to update a pending invocation for a lease
func (m *ExpirationManager) updatePending(le *leaseEntry) {
m.pendingLock.Lock()
Expand Down Expand Up @@ -1815,7 +1862,16 @@ func (m *ExpirationManager) loadEntry(ctx context.Context, leaseID string) (*lea
} else {
ctx = namespace.ContextWithNamespace(ctx, namespace.RootNamespace)
}
return m.loadEntryInternal(ctx, leaseID, restoreMode, true)

//
mgritter marked this conversation as resolved.
Show resolved Hide resolved
// If a lease entry is nil, proactively delete the lease lock, in case we
// created one erroneously.
leaseEntry, err := m.loadEntryInternal(ctx, leaseID, restoreMode, true)
if err != nil && leaseEntry == nil {
m.deleteLockForLease(leaseID)
swayne275 marked this conversation as resolved.
Show resolved Hide resolved
}
return leaseEntry, err

}

// loadEntryInternal is used when you need to load an entry but also need to
Expand Down