diff --git a/vault/core.go b/vault/core.go index fa54058430397..1340c0f93c0e3 100644 --- a/vault/core.go +++ b/vault/core.go @@ -64,6 +64,10 @@ const ( // leaderPrefixCleanDelay is how long to wait between deletions // of orphaned leader keys, to prevent slamming the backend. leaderPrefixCleanDelay = 200 * time.Millisecond + + // manualStepDownSleepPeriod is how long to sleep after a user-initiated + // step down of the active node, to prevent instantly regrabbing the lock + manualStepDownSleepPeriod = 10 * time.Second ) var ( @@ -206,9 +210,10 @@ type Core struct { stateLock sync.RWMutex sealed bool - standby bool - standbyDoneCh chan struct{} - standbyStopCh chan struct{} + standby bool + standbyDoneCh chan struct{} + standbyStopCh chan struct{} + manualStepDownCh chan struct{} // unlockParts has the keys provided to Unseal until // the threshold number of parts is available. @@ -1149,7 +1154,8 @@ func (c *Core) Unseal(key []byte) (bool, error) { // Go to standby mode, wait until we are active to unseal c.standbyDoneCh = make(chan struct{}) c.standbyStopCh = make(chan struct{}) - go c.runStandby(c.standbyDoneCh, c.standbyStopCh) + c.manualStepDownCh = make(chan struct{}) + go c.runStandby(c.standbyDoneCh, c.standbyStopCh, c.manualStepDownCh) } // Success! @@ -1157,54 +1163,25 @@ func (c *Core) Unseal(key []byte) (bool, error) { return true, nil } -// Seal is used to seal the vault -func (c *Core) Seal(token string) error { - return c.stepDownAndSeal(token, true) -} - -// StepDown is used to step down from leadership -func (c *Core) StepDown(token string) error { - return c.stepDownAndSeal(token, false) -} - -// stepDownAndSeal is used to step down from leadership and, optionally, -// re-seal the Vault. If sealed, this requires the Vault to be unsealed again -// to perform any further operations. -func (c *Core) stepDownAndSeal(token string, seal bool) (retErr error) { - if seal { - defer metrics.MeasureSince([]string{"core", "seal"}, time.Now()) - } else { - defer metrics.MeasureSince([]string{"core", "step_down"}, time.Now()) - } +// Seal is used to re-seal the Vault. This requires the Vault to +// be unsealed again to perform any further operations. +func (c *Core) Seal(token string) (retErr error) { + defer metrics.MeasureSince([]string{"core", "seal"}, time.Now()) c.stateLock.Lock() defer c.stateLock.Unlock() if c.sealed { return nil } - if !seal && (c.ha == nil || c.standby) { - return nil - } // Validate the token is a root token req := &logical.Request{ Operation: logical.UpdateOperation, + Path: "sys/seal", ClientToken: token, } - if seal { - req.Path = "sys/seal" - } else { - req.Path = "sys/step-down" - } - acl, te, err := c.fetchACLandTokenEntry(req) - // Attempt to use the token (decrement num_uses) - if te != nil { - if err := c.tokenStore.UseToken(te); err != nil { - c.logger.Printf("[ERR] core: failed to use token: %v", err) - retErr = ErrInternalError - } - } + acl, te, err := c.fetchACLandTokenEntry(req) if err != nil { // Since there is no token store in standby nodes, sealing cannot // be done. Ideally, the request has to be forwarded to leader node @@ -1212,11 +1189,20 @@ func (c *Core) stepDownAndSeal(token string, seal bool) (retErr error) { // just returning with an error and recommending a vault restart, which // essentially does the same thing. if c.standby { - c.logger.Printf("[ERR] core: vault cannot step down or be sealed when in standby mode; please restart instead") - return errors.New("vault cannot step down or be sealed when in standby mode; please restart instead") + c.logger.Printf("[ERR] core: vault cannot seal when in standby mode; please restart instead") + return errors.New("vault cannot seal when in standby mode; please restart instead") } return err } + // Attempt to use the token (decrement num_uses) + // If we can't, we still continue attempting the seal, so long as the token + // has appropriate permissions + if te != nil { + if err := c.tokenStore.UseToken(te); err != nil { + c.logger.Printf("[ERR] core: failed to use token: %v", err) + retErr = ErrInternalError + } + } // Verify that this operation is allowed allowed, rootPrivs := acl.AllowOperation(req.Operation, req.Path) @@ -1229,21 +1215,65 @@ func (c *Core) stepDownAndSeal(token string, seal bool) (retErr error) { return logical.ErrPermissionDenied } - // Seal the Vault - if seal { - err = c.sealInternal() - if err == nil && retErr == ErrInternalError { - c.logger.Printf("[ERR] core: core is successfully sealed but another error occurred during the operation") - } else { - retErr = err - } + //Seal the Vault + err = c.sealInternal() + if err == nil && retErr == ErrInternalError { + c.logger.Printf("[ERR] core: core is successfully sealed but another error occurred during the operation") } else { - c.stepDownInternal() + retErr = err } return } +// StepDown is used to step down from leadership +func (c *Core) StepDown(token string) error { + defer metrics.MeasureSince([]string{"core", "step_down"}, time.Now()) + + c.stateLock.Lock() + defer c.stateLock.Unlock() + if c.sealed { + return nil + } + if c.ha == nil || c.standby { + return nil + } + + // Validate the token is a root token + req := &logical.Request{ + Operation: logical.UpdateOperation, + Path: "sys/step-down", + ClientToken: token, + } + + acl, te, err := c.fetchACLandTokenEntry(req) + if err != nil { + return err + } + // Attempt to use the token (decrement num_uses) + if te != nil { + if err := c.tokenStore.UseToken(te); err != nil { + c.logger.Printf("[ERR] core: failed to use token: %v", err) + return err + } + } + + // Verify that this operation is allowed + allowed, rootPrivs := acl.AllowOperation(req.Operation, req.Path) + if !allowed { + return logical.ErrPermissionDenied + } + + // We always require root privileges for this operation + if !rootPrivs { + return logical.ErrPermissionDenied + } + + c.manualStepDownCh <- struct{}{} + + return nil +} + // sealInternal is an internal method used to seal the vault. It does not do // any authorization checking. The stateLock must be held prior to calling. func (c *Core) sealInternal() error { @@ -1274,16 +1304,6 @@ func (c *Core) sealInternal() error { return nil } -// stepDownInternal is an internal method used to step down from active duty. -// It does not do any authorization checking. -func (c *Core) stepDownInternal() { - // Merely trigger the loop to re-run. This value will cause the - // loop to run through giving up leadership, but without triggering - // the return at the end of the next loop run, since it's not - // closed - c.standbyStopCh <- struct{}{} -} - // postUnseal is invoked after the barrier is unsealed, but before // allowing any user operations. This allows us to setup any state that // requires the Vault to be unsealed such as mount tables, logical backends, @@ -1390,8 +1410,9 @@ func (c *Core) preSeal() error { // runStandby is a long running routine that is used when an HA backend // is enabled. It waits until we are leader and switches this Vault to // active. -func (c *Core) runStandby(doneCh, stopCh chan struct{}) { +func (c *Core) runStandby(doneCh, stopCh, manualStepDownCh chan struct{}) { defer close(doneCh) + defer close(manualStepDownCh) c.logger.Printf("[INFO] core: entering standby mode") // Monitor for key rotation @@ -1455,11 +1476,15 @@ func (c *Core) runStandby(doneCh, stopCh chan struct{}) { } // Monitor a loss of leadership + var manualStepDown bool select { case <-leaderLostCh: c.logger.Printf("[WARN] core: leadership lost, stopping active operation") case <-stopCh: c.logger.Printf("[WARN] core: stopping active operation") + case <-manualStepDownCh: + c.logger.Printf("[WARN] core: stepping down from active operation to standby") + manualStepDown = true } // Clear ourself as leader @@ -1483,7 +1508,9 @@ func (c *Core) runStandby(doneCh, stopCh chan struct{}) { // If we've merely stepped down, we could instantly grab the lock // again. Give the other nodes a chance. - time.Sleep(time.Second) + if manualStepDown { + time.Sleep(manualStepDownSleepPeriod) + } } } diff --git a/vault/core_test.go b/vault/core_test.go index e597d767e2811..c66cf1fa82e57 100644 --- a/vault/core_test.go +++ b/vault/core_test.go @@ -1322,8 +1322,9 @@ func TestCore_StepDown(t *testing.T) { t.Fatal("error stepping down core 1") } - // Give time to switch leaders - time.Sleep(2 * time.Second) + // Give time to switch leaders -- core 1 will still be waiting on its + // cooling off period so give it a full 10 seconds to recover + time.Sleep(10 * time.Second) // Core2 should be in standby standby, err = core2.Standby()