Skip to content

Commit

Permalink
Merge pull request #1146 from hashicorp/step-down
Browse files Browse the repository at this point in the history
Provide 'sys/step-down' and 'vault step-down'
  • Loading branch information
jefferai committed Mar 3, 2016
2 parents 1611a39 + 4e964a6 commit a520728
Show file tree
Hide file tree
Showing 11 changed files with 408 additions and 23 deletions.
10 changes: 10 additions & 0 deletions api/sys_stepdown.go
@@ -0,0 +1,10 @@
package api

func (c *Sys) StepDown() error {
r := c.c.NewRequest("PUT", "/v1/sys/step-down")
resp, err := c.c.RawRequest(r)
if err == nil {
defer resp.Body.Close()
}
return err
}
6 changes: 6 additions & 0 deletions cli/commands.go
Expand Up @@ -224,6 +224,12 @@ func Commands(metaPtr *command.Meta) map[string]cli.CommandFactory {
}, nil
},

"step-down": func() (cli.Command, error) {
return &command.StepDownCommand{
Meta: meta,
}, nil
},

"mount": func() (cli.Command, error) {
return &command.MountCommand{
Meta: meta,
Expand Down
54 changes: 54 additions & 0 deletions command/step-down.go
@@ -0,0 +1,54 @@
package command

import (
"fmt"
"strings"
)

// StepDownCommand is a Command that seals the vault.
type StepDownCommand struct {
Meta
}

func (c *StepDownCommand) Run(args []string) int {
flags := c.Meta.FlagSet("step-down", FlagSetDefault)
flags.Usage = func() { c.Ui.Error(c.Help()) }
if err := flags.Parse(args); err != nil {
return 1
}

client, err := c.Client()
if err != nil {
c.Ui.Error(fmt.Sprintf(
"Error initializing client: %s", err))
return 2
}

if err := client.Sys().StepDown(); err != nil {
c.Ui.Error(fmt.Sprintf("Error stepping down: %s", err))
return 1
}

return 0
}

func (c *StepDownCommand) Synopsis() string {
return "Force the Vault node to give up active duty"
}

func (c *StepDownCommand) Help() string {
helpText := `
Usage: vault step-down [options]
Force the Vault node to step down from active duty.
This causes the indicated node to give up active status. Note that while the
affected node will have a short delay before attempting to grab the lock
again, if no other node grabs the lock beforehand, it is possible for the
same node to re-grab the lock and become active again.
General Options:
` + generalOptionsUsage()
return strings.TrimSpace(helpText)
}
1 change: 1 addition & 0 deletions http/handler.go
Expand Up @@ -23,6 +23,7 @@ func Handler(core *vault.Core) http.Handler {
mux.Handle("/v1/sys/init", handleSysInit(core))
mux.Handle("/v1/sys/seal-status", handleSysSealStatus(core))
mux.Handle("/v1/sys/seal", handleSysSeal(core))
mux.Handle("/v1/sys/step-down", handleSysStepDown(core))
mux.Handle("/v1/sys/unseal", handleSysUnseal(core))
mux.Handle("/v1/sys/renew/", handleLogical(core, false))
mux.Handle("/v1/sys/leader", handleSysLeader(core))
Expand Down
23 changes: 23 additions & 0 deletions http/sys_seal.go
Expand Up @@ -34,6 +34,29 @@ func handleSysSeal(core *vault.Core) http.Handler {
})
}

func handleSysStepDown(core *vault.Core) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch r.Method {
case "PUT":
case "POST":
default:
respondError(w, http.StatusMethodNotAllowed, nil)
return
}

// Get the auth for the request so we can access the token directly
req := requestAuth(r, &logical.Request{})

// Seal with the token above
if err := core.StepDown(req.ClientToken); err != nil {
respondError(w, http.StatusInternalServerError, err)
return
}

respondOk(w, nil)
})
}

func handleSysUnseal(core *vault.Core) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch r.Method {
Expand Down
10 changes: 10 additions & 0 deletions http/sys_seal_test.go
Expand Up @@ -304,3 +304,13 @@ func TestSysSeal_Permissions(t *testing.T) {
httpResp = testHttpPut(t, "child", addr+"/v1/sys/seal", nil)
testResponseStatus(t, httpResp, 204)
}

func TestSysStepDown(t *testing.T) {
core, _, token := vault.TestCoreUnsealed(t)
ln, addr := TestServer(t, core)
defer ln.Close()
TestServerAuth(t, addr, token)

resp := testHttpPut(t, token, addr+"/v1/sys/step-down", nil)
testResponseStatus(t, resp, 204)
}
110 changes: 91 additions & 19 deletions vault/core.go
Expand Up @@ -64,6 +64,10 @@ const (
// leaderPrefixCleanDelay is how long to wait between deletions
// of orphaned leader keys, to prevent slamming the backend.
leaderPrefixCleanDelay = 200 * time.Millisecond

// manualStepDownSleepPeriod is how long to sleep after a user-initiated
// step down of the active node, to prevent instantly regrabbing the lock
manualStepDownSleepPeriod = 10 * time.Second
)

var (
Expand Down Expand Up @@ -206,9 +210,10 @@ type Core struct {
stateLock sync.RWMutex
sealed bool

standby bool
standbyDoneCh chan struct{}
standbyStopCh chan struct{}
standby bool
standbyDoneCh chan struct{}
standbyStopCh chan struct{}
manualStepDownCh chan struct{}

// unlockParts has the keys provided to Unseal until
// the threshold number of parts is available.
Expand Down Expand Up @@ -1149,7 +1154,8 @@ func (c *Core) Unseal(key []byte) (bool, error) {
// Go to standby mode, wait until we are active to unseal
c.standbyDoneCh = make(chan struct{})
c.standbyStopCh = make(chan struct{})
go c.runStandby(c.standbyDoneCh, c.standbyStopCh)
c.manualStepDownCh = make(chan struct{})
go c.runStandby(c.standbyDoneCh, c.standbyStopCh, c.manualStepDownCh)
}

// Success!
Expand All @@ -1161,6 +1167,7 @@ func (c *Core) Unseal(key []byte) (bool, error) {
// be unsealed again to perform any further operations.
func (c *Core) Seal(token string) (retErr error) {
defer metrics.MeasureSince([]string{"core", "seal"}, time.Now())

c.stateLock.Lock()
defer c.stateLock.Unlock()
if c.sealed {
Expand All @@ -1173,27 +1180,29 @@ func (c *Core) Seal(token string) (retErr error) {
Path: "sys/seal",
ClientToken: token,
}
acl, te, err := c.fetchACLandTokenEntry(req)

// Attempt to use the token (decrement num_uses)
if te != nil {
if err := c.tokenStore.UseToken(te); err != nil {
c.logger.Printf("[ERR] core: failed to use token: %v", err)
retErr = ErrInternalError
}
}
acl, te, err := c.fetchACLandTokenEntry(req)
if err != nil {
// Since there is no token store in standby nodes, sealing cannot
// be done. Ideally, the request has to be forwarded to leader node
// for validation and the operation should be performed. But for now,
// just returning with an error and recommending a vault restart, which
// essentially does the same thing.
if c.standby {
c.logger.Printf("[ERR] core: vault cannot be sealed when in standby mode; please restart instead")
return errors.New("vault cannot be sealed when in standby mode; please restart instead")
c.logger.Printf("[ERR] core: vault cannot seal when in standby mode; please restart instead")
return errors.New("vault cannot seal when in standby mode; please restart instead")
}
return err
}
// Attempt to use the token (decrement num_uses)
// If we can't, we still continue attempting the seal, so long as the token
// has appropriate permissions
if te != nil {
if err := c.tokenStore.UseToken(te); err != nil {
c.logger.Printf("[ERR] core: failed to use token: %v", err)
retErr = ErrInternalError
}
}

// Verify that this operation is allowed
allowed, rootPrivs := acl.AllowOperation(req.Operation, req.Path)
Expand All @@ -1206,7 +1215,7 @@ func (c *Core) Seal(token string) (retErr error) {
return logical.ErrPermissionDenied
}

// Seal the Vault
//Seal the Vault
err = c.sealInternal()
if err == nil && retErr == ErrInternalError {
c.logger.Printf("[ERR] core: core is successfully sealed but another error occurred during the operation")
Expand All @@ -1217,9 +1226,60 @@ func (c *Core) Seal(token string) (retErr error) {
return
}

// sealInternal is an internal method used to seal the vault.
// It does not do any authorization checking. The stateLock must
// be held prior to calling.
// StepDown is used to step down from leadership
func (c *Core) StepDown(token string) error {
defer metrics.MeasureSince([]string{"core", "step_down"}, time.Now())

c.stateLock.Lock()
defer c.stateLock.Unlock()
if c.sealed {
return nil
}
if c.ha == nil || c.standby {
return nil
}

// Validate the token is a root token
req := &logical.Request{
Operation: logical.UpdateOperation,
Path: "sys/step-down",
ClientToken: token,
}

acl, te, err := c.fetchACLandTokenEntry(req)
if err != nil {
return err
}
// Attempt to use the token (decrement num_uses)
if te != nil {
if err := c.tokenStore.UseToken(te); err != nil {
c.logger.Printf("[ERR] core: failed to use token: %v", err)
return err
}
}

// Verify that this operation is allowed
allowed, rootPrivs := acl.AllowOperation(req.Operation, req.Path)
if !allowed {
return logical.ErrPermissionDenied
}

// We always require root privileges for this operation
if !rootPrivs {
return logical.ErrPermissionDenied
}

select {
case c.manualStepDownCh <- struct{}{}:
default:
c.logger.Printf("[WARN] core: manual step-down operation already queued")
}

return nil
}

// sealInternal is an internal method used to seal the vault. It does not do
// any authorization checking. The stateLock must be held prior to calling.
func (c *Core) sealInternal() error {
// Enable that we are sealed to prevent furthur transactions
c.sealed = true
Expand All @@ -1244,6 +1304,7 @@ func (c *Core) sealInternal() error {
return err
}
c.logger.Printf("[INFO] core: vault is sealed")

return nil
}

Expand Down Expand Up @@ -1353,8 +1414,9 @@ func (c *Core) preSeal() error {
// runStandby is a long running routine that is used when an HA backend
// is enabled. It waits until we are leader and switches this Vault to
// active.
func (c *Core) runStandby(doneCh, stopCh chan struct{}) {
func (c *Core) runStandby(doneCh, stopCh, manualStepDownCh chan struct{}) {
defer close(doneCh)
defer close(manualStepDownCh)
c.logger.Printf("[INFO] core: entering standby mode")

// Monitor for key rotation
Expand Down Expand Up @@ -1418,11 +1480,15 @@ func (c *Core) runStandby(doneCh, stopCh chan struct{}) {
}

// Monitor a loss of leadership
var manualStepDown bool
select {
case <-leaderLostCh:
c.logger.Printf("[WARN] core: leadership lost, stopping active operation")
case <-stopCh:
c.logger.Printf("[WARN] core: stopping active operation")
case <-manualStepDownCh:
c.logger.Printf("[WARN] core: stepping down from active operation to standby")
manualStepDown = true
}

// Clear ourself as leader
Expand All @@ -1443,6 +1509,12 @@ func (c *Core) runStandby(doneCh, stopCh chan struct{}) {
if preSealErr != nil {
c.logger.Printf("[ERR] core: pre-seal teardown failed: %v", err)
}

// If we've merely stepped down, we could instantly grab the lock
// again. Give the other nodes a chance.
if manualStepDown {
time.Sleep(manualStepDownSleepPeriod)
}
}
}

Expand Down

0 comments on commit a520728

Please sign in to comment.