From 6b0c6923850c442c5fdb3855a89b4ff17d287ce9 Mon Sep 17 00:00:00 2001 From: Jeff Mitchell Date: Fri, 26 Feb 2016 19:43:55 -0500 Subject: [PATCH 1/5] Provide 'sys/step-down' and 'vault step-down' This endpoint causes the node it's hit to step down from active duty. It's a noop if the node isn't active or not running in HA mode. The node will wait one second before attempting to reacquire the lock, to give other nodes a chance to grab it. Fixes #1093 --- api/sys_stepdown.go | 10 + cli/commands.go | 6 + command/step-down.go | 54 ++++++ http/handler.go | 1 + http/sys_seal.go | 23 +++ http/sys_seal_test.go | 10 + vault/core.go | 69 +++++-- vault/core_test.go | 176 ++++++++++++++++++ website/source/docs/http/sys-seal.html.md | 4 +- .../source/docs/http/sys-step-down.html.md | 33 ++++ website/source/layouts/http.erb | 3 + 11 files changed, 374 insertions(+), 15 deletions(-) create mode 100644 api/sys_stepdown.go create mode 100644 command/step-down.go create mode 100644 website/source/docs/http/sys-step-down.html.md diff --git a/api/sys_stepdown.go b/api/sys_stepdown.go new file mode 100644 index 0000000000000..421e5f19fb960 --- /dev/null +++ b/api/sys_stepdown.go @@ -0,0 +1,10 @@ +package api + +func (c *Sys) StepDown() error { + r := c.c.NewRequest("PUT", "/v1/sys/step-down") + resp, err := c.c.RawRequest(r) + if err == nil { + defer resp.Body.Close() + } + return err +} diff --git a/cli/commands.go b/cli/commands.go index 05f5c7479567e..1f5b89f911faf 100644 --- a/cli/commands.go +++ b/cli/commands.go @@ -224,6 +224,12 @@ func Commands(metaPtr *command.Meta) map[string]cli.CommandFactory { }, nil }, + "step-down": func() (cli.Command, error) { + return &command.StepDownCommand{ + Meta: meta, + }, nil + }, + "mount": func() (cli.Command, error) { return &command.MountCommand{ Meta: meta, diff --git a/command/step-down.go b/command/step-down.go new file mode 100644 index 0000000000000..1f2448e560bc5 --- /dev/null +++ b/command/step-down.go @@ -0,0 +1,54 @@ +package command + +import ( + "fmt" + "strings" +) + +// StepDownCommand is a Command that seals the vault. +type StepDownCommand struct { + Meta +} + +func (c *StepDownCommand) Run(args []string) int { + flags := c.Meta.FlagSet("step-down", FlagSetDefault) + flags.Usage = func() { c.Ui.Error(c.Help()) } + if err := flags.Parse(args); err != nil { + return 1 + } + + client, err := c.Client() + if err != nil { + c.Ui.Error(fmt.Sprintf( + "Error initializing client: %s", err)) + return 2 + } + + if err := client.Sys().StepDown(); err != nil { + c.Ui.Error(fmt.Sprintf("Error stepping down: %s", err)) + return 1 + } + + return 0 +} + +func (c *StepDownCommand) Synopsis() string { + return "Force the Vault node to give up active duty" +} + +func (c *StepDownCommand) Help() string { + helpText := ` +Usage: vault step-down [options] + + Force the Vault node to step down from active duty. + + This causes the indicated node to give up active status. Note that while the + affected node will have a short delay before attempting to grab the lock + again, if no other node grabs the lock beforehand, it is possible for the + same node to re-grab the lock and become active again. + +General Options: + + ` + generalOptionsUsage() + return strings.TrimSpace(helpText) +} diff --git a/http/handler.go b/http/handler.go index bd2f2dafc7cf2..5508a9539a403 100644 --- a/http/handler.go +++ b/http/handler.go @@ -23,6 +23,7 @@ func Handler(core *vault.Core) http.Handler { mux.Handle("/v1/sys/init", handleSysInit(core)) mux.Handle("/v1/sys/seal-status", handleSysSealStatus(core)) mux.Handle("/v1/sys/seal", handleSysSeal(core)) + mux.Handle("/v1/sys/step-down", handleSysStepDown(core)) mux.Handle("/v1/sys/unseal", handleSysUnseal(core)) mux.Handle("/v1/sys/mounts", proxySysRequest(core)) mux.Handle("/v1/sys/mounts/", proxySysRequest(core)) diff --git a/http/sys_seal.go b/http/sys_seal.go index d5ac76624f490..a11a2078b779f 100644 --- a/http/sys_seal.go +++ b/http/sys_seal.go @@ -34,6 +34,29 @@ func handleSysSeal(core *vault.Core) http.Handler { }) } +func handleSysStepDown(core *vault.Core) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.Method { + case "PUT": + case "POST": + default: + respondError(w, http.StatusMethodNotAllowed, nil) + return + } + + // Get the auth for the request so we can access the token directly + req := requestAuth(r, &logical.Request{}) + + // Seal with the token above + if err := core.StepDown(req.ClientToken); err != nil { + respondError(w, http.StatusInternalServerError, err) + return + } + + respondOk(w, nil) + }) +} + func handleSysUnseal(core *vault.Core) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { switch r.Method { diff --git a/http/sys_seal_test.go b/http/sys_seal_test.go index 4b3008276040c..e1cca89a6ff65 100644 --- a/http/sys_seal_test.go +++ b/http/sys_seal_test.go @@ -304,3 +304,13 @@ func TestSysSeal_Permissions(t *testing.T) { httpResp = testHttpPut(t, "child", addr+"/v1/sys/seal", nil) testResponseStatus(t, httpResp, 204) } + +func TestSysStepDown(t *testing.T) { + core, _, token := vault.TestCoreUnsealed(t) + ln, addr := TestServer(t, core) + defer ln.Close() + TestServerAuth(t, addr, token) + + resp := testHttpPut(t, token, addr+"/v1/sys/step-down", nil) + testResponseStatus(t, resp, 204) +} diff --git a/vault/core.go b/vault/core.go index 75df726016f2c..fa54058430397 100644 --- a/vault/core.go +++ b/vault/core.go @@ -1157,22 +1157,45 @@ func (c *Core) Unseal(key []byte) (bool, error) { return true, nil } -// Seal is used to re-seal the Vault. This requires the Vault to -// be unsealed again to perform any further operations. -func (c *Core) Seal(token string) (retErr error) { - defer metrics.MeasureSince([]string{"core", "seal"}, time.Now()) +// Seal is used to seal the vault +func (c *Core) Seal(token string) error { + return c.stepDownAndSeal(token, true) +} + +// StepDown is used to step down from leadership +func (c *Core) StepDown(token string) error { + return c.stepDownAndSeal(token, false) +} + +// stepDownAndSeal is used to step down from leadership and, optionally, +// re-seal the Vault. If sealed, this requires the Vault to be unsealed again +// to perform any further operations. +func (c *Core) stepDownAndSeal(token string, seal bool) (retErr error) { + if seal { + defer metrics.MeasureSince([]string{"core", "seal"}, time.Now()) + } else { + defer metrics.MeasureSince([]string{"core", "step_down"}, time.Now()) + } + c.stateLock.Lock() defer c.stateLock.Unlock() if c.sealed { return nil } + if !seal && (c.ha == nil || c.standby) { + return nil + } // Validate the token is a root token req := &logical.Request{ Operation: logical.UpdateOperation, - Path: "sys/seal", ClientToken: token, } + if seal { + req.Path = "sys/seal" + } else { + req.Path = "sys/step-down" + } acl, te, err := c.fetchACLandTokenEntry(req) // Attempt to use the token (decrement num_uses) @@ -1189,8 +1212,8 @@ func (c *Core) Seal(token string) (retErr error) { // just returning with an error and recommending a vault restart, which // essentially does the same thing. if c.standby { - c.logger.Printf("[ERR] core: vault cannot be sealed when in standby mode; please restart instead") - return errors.New("vault cannot be sealed when in standby mode; please restart instead") + c.logger.Printf("[ERR] core: vault cannot step down or be sealed when in standby mode; please restart instead") + return errors.New("vault cannot step down or be sealed when in standby mode; please restart instead") } return err } @@ -1207,19 +1230,22 @@ func (c *Core) Seal(token string) (retErr error) { } // Seal the Vault - err = c.sealInternal() - if err == nil && retErr == ErrInternalError { - c.logger.Printf("[ERR] core: core is successfully sealed but another error occurred during the operation") + if seal { + err = c.sealInternal() + if err == nil && retErr == ErrInternalError { + c.logger.Printf("[ERR] core: core is successfully sealed but another error occurred during the operation") + } else { + retErr = err + } } else { - retErr = err + c.stepDownInternal() } return } -// sealInternal is an internal method used to seal the vault. -// It does not do any authorization checking. The stateLock must -// be held prior to calling. +// sealInternal is an internal method used to seal the vault. It does not do +// any authorization checking. The stateLock must be held prior to calling. func (c *Core) sealInternal() error { // Enable that we are sealed to prevent furthur transactions c.sealed = true @@ -1244,9 +1270,20 @@ func (c *Core) sealInternal() error { return err } c.logger.Printf("[INFO] core: vault is sealed") + return nil } +// stepDownInternal is an internal method used to step down from active duty. +// It does not do any authorization checking. +func (c *Core) stepDownInternal() { + // Merely trigger the loop to re-run. This value will cause the + // loop to run through giving up leadership, but without triggering + // the return at the end of the next loop run, since it's not + // closed + c.standbyStopCh <- struct{}{} +} + // postUnseal is invoked after the barrier is unsealed, but before // allowing any user operations. This allows us to setup any state that // requires the Vault to be unsealed such as mount tables, logical backends, @@ -1443,6 +1480,10 @@ func (c *Core) runStandby(doneCh, stopCh chan struct{}) { if preSealErr != nil { c.logger.Printf("[ERR] core: pre-seal teardown failed: %v", err) } + + // If we've merely stepped down, we could instantly grab the lock + // again. Give the other nodes a chance. + time.Sleep(time.Second) } } diff --git a/vault/core_test.go b/vault/core_test.go index 1f9f80bd0b908..e597d767e2811 100644 --- a/vault/core_test.go +++ b/vault/core_test.go @@ -1183,6 +1183,182 @@ func TestCore_Standby_Seal(t *testing.T) { } } +func TestCore_StepDown(t *testing.T) { + // Create the first core and initialize it + inm := physical.NewInmem() + inmha := physical.NewInmemHA() + advertiseOriginal := "http://127.0.0.1:8200" + core, err := NewCore(&CoreConfig{ + Physical: inm, + HAPhysical: inmha, + AdvertiseAddr: advertiseOriginal, + DisableMlock: true, + }) + if err != nil { + t.Fatalf("err: %v", err) + } + key, root := TestCoreInit(t, core) + if _, err := core.Unseal(TestKeyCopy(key)); err != nil { + t.Fatalf("unseal err: %s", err) + } + + // Verify unsealed + sealed, err := core.Sealed() + if err != nil { + t.Fatalf("err checking seal status: %s", err) + } + if sealed { + t.Fatal("should not be sealed") + } + + // Wait for core to become active + testWaitActive(t, core) + + // Ensure that the original clean function has stopped running + time.Sleep(2 * time.Second) + + // Check the leader is local + isLeader, advertise, err := core.Leader() + if err != nil { + t.Fatalf("err: %v", err) + } + if !isLeader { + t.Fatalf("should be leader") + } + if advertise != advertiseOriginal { + t.Fatalf("Bad advertise: %v", advertise) + } + + // Create the second core and initialize it + advertiseOriginal2 := "http://127.0.0.1:8500" + core2, err := NewCore(&CoreConfig{ + Physical: inm, + HAPhysical: inmha, + AdvertiseAddr: advertiseOriginal2, + DisableMlock: true, + }) + if err != nil { + t.Fatalf("err: %v", err) + } + if _, err := core2.Unseal(TestKeyCopy(key)); err != nil { + t.Fatalf("unseal err: %s", err) + } + + // Verify unsealed + sealed, err = core2.Sealed() + if err != nil { + t.Fatalf("err checking seal status: %s", err) + } + if sealed { + t.Fatal("should not be sealed") + } + + // Core2 should be in standby + standby, err := core2.Standby() + if err != nil { + t.Fatalf("err: %v", err) + } + if !standby { + t.Fatalf("should be standby") + } + + // Check the leader is not local + isLeader, advertise, err = core2.Leader() + if err != nil { + t.Fatalf("err: %v", err) + } + if isLeader { + t.Fatalf("should not be leader") + } + if advertise != advertiseOriginal { + t.Fatalf("Bad advertise: %v", advertise) + } + + // Step down core + err = core.StepDown(root) + if err != nil { + t.Fatal("error stepping down core 1") + } + + // Give time to switch leaders + time.Sleep(2 * time.Second) + + // Core1 should be in standby + standby, err = core.Standby() + if err != nil { + t.Fatalf("err: %v", err) + } + if !standby { + t.Fatalf("should be standby") + } + + // Check the leader is core2 + isLeader, advertise, err = core2.Leader() + if err != nil { + t.Fatalf("err: %v", err) + } + if !isLeader { + t.Fatalf("should be leader") + } + if advertise != advertiseOriginal2 { + t.Fatalf("Bad advertise: %v", advertise) + } + + // Check the leader is not local + isLeader, advertise, err = core.Leader() + if err != nil { + t.Fatalf("err: %v", err) + } + if isLeader { + t.Fatalf("should not be leader") + } + if advertise != advertiseOriginal2 { + t.Fatalf("Bad advertise: %v", advertise) + } + + // Step down core2 + err = core2.StepDown(root) + if err != nil { + t.Fatal("error stepping down core 1") + } + + // Give time to switch leaders + time.Sleep(2 * time.Second) + + // Core2 should be in standby + standby, err = core2.Standby() + if err != nil { + t.Fatalf("err: %v", err) + } + if !standby { + t.Fatalf("should be standby") + } + + // Check the leader is core1 + isLeader, advertise, err = core.Leader() + if err != nil { + t.Fatalf("err: %v", err) + } + if !isLeader { + t.Fatalf("should be leader") + } + if advertise != advertiseOriginal { + t.Fatalf("Bad advertise: %v", advertise) + } + + // Check the leader is not local + isLeader, advertise, err = core2.Leader() + if err != nil { + t.Fatalf("err: %v", err) + } + if isLeader { + t.Fatalf("should not be leader") + } + if advertise != advertiseOriginal { + t.Fatalf("Bad advertise: %v", advertise) + } +} + func TestCore_CleanLeaderPrefix(t *testing.T) { // Create the first core and initialize it inm := physical.NewInmem() diff --git a/website/source/docs/http/sys-seal.html.md b/website/source/docs/http/sys-seal.html.md index 55d5a81a9ccce..d82b9af38671b 100644 --- a/website/source/docs/http/sys-seal.html.md +++ b/website/source/docs/http/sys-seal.html.md @@ -11,7 +11,9 @@ description: |-
Description
- Seals the Vault. In HA mode, only an active node can be sealed. Standby nodes should be restarted to get the same effect. + Seals the Vault. In HA mode, only an active node can be sealed. Standby + nodes should be restarted to get the same effect. Requires a token with + `root` policy or `sudo` capability on the path.
Method
diff --git a/website/source/docs/http/sys-step-down.html.md b/website/source/docs/http/sys-step-down.html.md new file mode 100644 index 0000000000000..94f5aa4c29887 --- /dev/null +++ b/website/source/docs/http/sys-step-down.html.md @@ -0,0 +1,33 @@ +--- +layout: "http" +page_title: "HTTP API: /sys/step-down" +sidebar_current: "docs-http-ha-step-down" +description: |- + The '/sys/step-down' endpoint causes the node to give up active status. +--- + +# /sys/seal + +
+
Description
+
+ Forces the node to give up active status. If the node does not have active + status, this endpoint does nothing. Note that the node will sleep for a + second before attempting to grab the active lock again, but if no standby + nodes grab the active lock in the interim, the same node may become the + active node again. Requires a token with `root` policy or `sudo` capability + on the path. +
+ +
Method
+
PUT
+ +
Parameters
+
+ None +
+ +
Returns
+
A `204` response code. +
+
diff --git a/website/source/layouts/http.erb b/website/source/layouts/http.erb index ac69d5651ed9f..ee75b37f9cbc8 100644 --- a/website/source/layouts/http.erb +++ b/website/source/layouts/http.erb @@ -107,6 +107,9 @@ > /sys/leader + > + /sys/step-down + From ef4466d6d3b1ea81b4d0ce4c9aa6a06ccf34166a Mon Sep 17 00:00:00 2001 From: Jeff Mitchell Date: Sun, 28 Feb 2016 21:35:32 -0500 Subject: [PATCH 2/5] Address review feedback --- vault/core.go | 151 ++++++++++++++++++++++++++------------------- vault/core_test.go | 5 +- 2 files changed, 92 insertions(+), 64 deletions(-) diff --git a/vault/core.go b/vault/core.go index fa54058430397..1340c0f93c0e3 100644 --- a/vault/core.go +++ b/vault/core.go @@ -64,6 +64,10 @@ const ( // leaderPrefixCleanDelay is how long to wait between deletions // of orphaned leader keys, to prevent slamming the backend. leaderPrefixCleanDelay = 200 * time.Millisecond + + // manualStepDownSleepPeriod is how long to sleep after a user-initiated + // step down of the active node, to prevent instantly regrabbing the lock + manualStepDownSleepPeriod = 10 * time.Second ) var ( @@ -206,9 +210,10 @@ type Core struct { stateLock sync.RWMutex sealed bool - standby bool - standbyDoneCh chan struct{} - standbyStopCh chan struct{} + standby bool + standbyDoneCh chan struct{} + standbyStopCh chan struct{} + manualStepDownCh chan struct{} // unlockParts has the keys provided to Unseal until // the threshold number of parts is available. @@ -1149,7 +1154,8 @@ func (c *Core) Unseal(key []byte) (bool, error) { // Go to standby mode, wait until we are active to unseal c.standbyDoneCh = make(chan struct{}) c.standbyStopCh = make(chan struct{}) - go c.runStandby(c.standbyDoneCh, c.standbyStopCh) + c.manualStepDownCh = make(chan struct{}) + go c.runStandby(c.standbyDoneCh, c.standbyStopCh, c.manualStepDownCh) } // Success! @@ -1157,54 +1163,25 @@ func (c *Core) Unseal(key []byte) (bool, error) { return true, nil } -// Seal is used to seal the vault -func (c *Core) Seal(token string) error { - return c.stepDownAndSeal(token, true) -} - -// StepDown is used to step down from leadership -func (c *Core) StepDown(token string) error { - return c.stepDownAndSeal(token, false) -} - -// stepDownAndSeal is used to step down from leadership and, optionally, -// re-seal the Vault. If sealed, this requires the Vault to be unsealed again -// to perform any further operations. -func (c *Core) stepDownAndSeal(token string, seal bool) (retErr error) { - if seal { - defer metrics.MeasureSince([]string{"core", "seal"}, time.Now()) - } else { - defer metrics.MeasureSince([]string{"core", "step_down"}, time.Now()) - } +// Seal is used to re-seal the Vault. This requires the Vault to +// be unsealed again to perform any further operations. +func (c *Core) Seal(token string) (retErr error) { + defer metrics.MeasureSince([]string{"core", "seal"}, time.Now()) c.stateLock.Lock() defer c.stateLock.Unlock() if c.sealed { return nil } - if !seal && (c.ha == nil || c.standby) { - return nil - } // Validate the token is a root token req := &logical.Request{ Operation: logical.UpdateOperation, + Path: "sys/seal", ClientToken: token, } - if seal { - req.Path = "sys/seal" - } else { - req.Path = "sys/step-down" - } - acl, te, err := c.fetchACLandTokenEntry(req) - // Attempt to use the token (decrement num_uses) - if te != nil { - if err := c.tokenStore.UseToken(te); err != nil { - c.logger.Printf("[ERR] core: failed to use token: %v", err) - retErr = ErrInternalError - } - } + acl, te, err := c.fetchACLandTokenEntry(req) if err != nil { // Since there is no token store in standby nodes, sealing cannot // be done. Ideally, the request has to be forwarded to leader node @@ -1212,11 +1189,20 @@ func (c *Core) stepDownAndSeal(token string, seal bool) (retErr error) { // just returning with an error and recommending a vault restart, which // essentially does the same thing. if c.standby { - c.logger.Printf("[ERR] core: vault cannot step down or be sealed when in standby mode; please restart instead") - return errors.New("vault cannot step down or be sealed when in standby mode; please restart instead") + c.logger.Printf("[ERR] core: vault cannot seal when in standby mode; please restart instead") + return errors.New("vault cannot seal when in standby mode; please restart instead") } return err } + // Attempt to use the token (decrement num_uses) + // If we can't, we still continue attempting the seal, so long as the token + // has appropriate permissions + if te != nil { + if err := c.tokenStore.UseToken(te); err != nil { + c.logger.Printf("[ERR] core: failed to use token: %v", err) + retErr = ErrInternalError + } + } // Verify that this operation is allowed allowed, rootPrivs := acl.AllowOperation(req.Operation, req.Path) @@ -1229,21 +1215,65 @@ func (c *Core) stepDownAndSeal(token string, seal bool) (retErr error) { return logical.ErrPermissionDenied } - // Seal the Vault - if seal { - err = c.sealInternal() - if err == nil && retErr == ErrInternalError { - c.logger.Printf("[ERR] core: core is successfully sealed but another error occurred during the operation") - } else { - retErr = err - } + //Seal the Vault + err = c.sealInternal() + if err == nil && retErr == ErrInternalError { + c.logger.Printf("[ERR] core: core is successfully sealed but another error occurred during the operation") } else { - c.stepDownInternal() + retErr = err } return } +// StepDown is used to step down from leadership +func (c *Core) StepDown(token string) error { + defer metrics.MeasureSince([]string{"core", "step_down"}, time.Now()) + + c.stateLock.Lock() + defer c.stateLock.Unlock() + if c.sealed { + return nil + } + if c.ha == nil || c.standby { + return nil + } + + // Validate the token is a root token + req := &logical.Request{ + Operation: logical.UpdateOperation, + Path: "sys/step-down", + ClientToken: token, + } + + acl, te, err := c.fetchACLandTokenEntry(req) + if err != nil { + return err + } + // Attempt to use the token (decrement num_uses) + if te != nil { + if err := c.tokenStore.UseToken(te); err != nil { + c.logger.Printf("[ERR] core: failed to use token: %v", err) + return err + } + } + + // Verify that this operation is allowed + allowed, rootPrivs := acl.AllowOperation(req.Operation, req.Path) + if !allowed { + return logical.ErrPermissionDenied + } + + // We always require root privileges for this operation + if !rootPrivs { + return logical.ErrPermissionDenied + } + + c.manualStepDownCh <- struct{}{} + + return nil +} + // sealInternal is an internal method used to seal the vault. It does not do // any authorization checking. The stateLock must be held prior to calling. func (c *Core) sealInternal() error { @@ -1274,16 +1304,6 @@ func (c *Core) sealInternal() error { return nil } -// stepDownInternal is an internal method used to step down from active duty. -// It does not do any authorization checking. -func (c *Core) stepDownInternal() { - // Merely trigger the loop to re-run. This value will cause the - // loop to run through giving up leadership, but without triggering - // the return at the end of the next loop run, since it's not - // closed - c.standbyStopCh <- struct{}{} -} - // postUnseal is invoked after the barrier is unsealed, but before // allowing any user operations. This allows us to setup any state that // requires the Vault to be unsealed such as mount tables, logical backends, @@ -1390,8 +1410,9 @@ func (c *Core) preSeal() error { // runStandby is a long running routine that is used when an HA backend // is enabled. It waits until we are leader and switches this Vault to // active. -func (c *Core) runStandby(doneCh, stopCh chan struct{}) { +func (c *Core) runStandby(doneCh, stopCh, manualStepDownCh chan struct{}) { defer close(doneCh) + defer close(manualStepDownCh) c.logger.Printf("[INFO] core: entering standby mode") // Monitor for key rotation @@ -1455,11 +1476,15 @@ func (c *Core) runStandby(doneCh, stopCh chan struct{}) { } // Monitor a loss of leadership + var manualStepDown bool select { case <-leaderLostCh: c.logger.Printf("[WARN] core: leadership lost, stopping active operation") case <-stopCh: c.logger.Printf("[WARN] core: stopping active operation") + case <-manualStepDownCh: + c.logger.Printf("[WARN] core: stepping down from active operation to standby") + manualStepDown = true } // Clear ourself as leader @@ -1483,7 +1508,9 @@ func (c *Core) runStandby(doneCh, stopCh chan struct{}) { // If we've merely stepped down, we could instantly grab the lock // again. Give the other nodes a chance. - time.Sleep(time.Second) + if manualStepDown { + time.Sleep(manualStepDownSleepPeriod) + } } } diff --git a/vault/core_test.go b/vault/core_test.go index e597d767e2811..c66cf1fa82e57 100644 --- a/vault/core_test.go +++ b/vault/core_test.go @@ -1322,8 +1322,9 @@ func TestCore_StepDown(t *testing.T) { t.Fatal("error stepping down core 1") } - // Give time to switch leaders - time.Sleep(2 * time.Second) + // Give time to switch leaders -- core 1 will still be waiting on its + // cooling off period so give it a full 10 seconds to recover + time.Sleep(10 * time.Second) // Core2 should be in standby standby, err = core2.Standby() From d0ec85f4ba5d9f033a190a759fb791a6a60acde7 Mon Sep 17 00:00:00 2001 From: Jeff Mitchell Date: Mon, 29 Feb 2016 10:09:11 -0500 Subject: [PATCH 3/5] Update doc, it's now 10 seconds --- website/source/docs/http/sys-step-down.html.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/website/source/docs/http/sys-step-down.html.md b/website/source/docs/http/sys-step-down.html.md index 94f5aa4c29887..ee6b1d82fd04f 100644 --- a/website/source/docs/http/sys-step-down.html.md +++ b/website/source/docs/http/sys-step-down.html.md @@ -12,8 +12,8 @@ description: |-
Description
Forces the node to give up active status. If the node does not have active - status, this endpoint does nothing. Note that the node will sleep for a - second before attempting to grab the active lock again, but if no standby + status, this endpoint does nothing. Note that the node will sleep for ten + seconds before attempting to grab the active lock again, but if no standby nodes grab the active lock in the interim, the same node may become the active node again. Requires a token with `root` policy or `sudo` capability on the path. From 1ae2f2fd34eee07542ab0f25b29ec3497a6110a0 Mon Sep 17 00:00:00 2001 From: Jeff Mitchell Date: Thu, 3 Mar 2016 11:09:27 -0500 Subject: [PATCH 4/5] Remove unneeded sleeps in test code --- vault/core_test.go | 6 ------ 1 file changed, 6 deletions(-) diff --git a/vault/core_test.go b/vault/core_test.go index c66cf1fa82e57..ec545857368e3 100644 --- a/vault/core_test.go +++ b/vault/core_test.go @@ -1106,9 +1106,6 @@ func TestCore_Standby_Seal(t *testing.T) { // Wait for core to become active testWaitActive(t, core) - // Ensure that the original clean function has stopped running - time.Sleep(2 * time.Second) - // Check the leader is local isLeader, advertise, err := core.Leader() if err != nil { @@ -1214,9 +1211,6 @@ func TestCore_StepDown(t *testing.T) { // Wait for core to become active testWaitActive(t, core) - // Ensure that the original clean function has stopped running - time.Sleep(2 * time.Second) - // Check the leader is local isLeader, advertise, err := core.Leader() if err != nil { From 4e964a62f6c7d7c5f002399d987579a41eaa2932 Mon Sep 17 00:00:00 2001 From: Jeff Mitchell Date: Thu, 3 Mar 2016 12:29:30 -0500 Subject: [PATCH 5/5] Add default case for if the step down channel is blocked --- vault/core.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/vault/core.go b/vault/core.go index 1340c0f93c0e3..ff2a79339e488 100644 --- a/vault/core.go +++ b/vault/core.go @@ -1269,7 +1269,11 @@ func (c *Core) StepDown(token string) error { return logical.ErrPermissionDenied } - c.manualStepDownCh <- struct{}{} + select { + case c.manualStepDownCh <- struct{}{}: + default: + c.logger.Printf("[WARN] core: manual step-down operation already queued") + } return nil }