Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Provide 'sys/step-down' and 'vault step-down' #1146

Merged
merged 6 commits into from Mar 3, 2016
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
10 changes: 10 additions & 0 deletions api/sys_stepdown.go
@@ -0,0 +1,10 @@
package api

func (c *Sys) StepDown() error {
r := c.c.NewRequest("PUT", "/v1/sys/step-down")
resp, err := c.c.RawRequest(r)
if err == nil {
defer resp.Body.Close()
}
return err
}
6 changes: 6 additions & 0 deletions cli/commands.go
Expand Up @@ -224,6 +224,12 @@ func Commands(metaPtr *command.Meta) map[string]cli.CommandFactory {
}, nil
},

"step-down": func() (cli.Command, error) {
return &command.StepDownCommand{
Meta: meta,
}, nil
},

"mount": func() (cli.Command, error) {
return &command.MountCommand{
Meta: meta,
Expand Down
54 changes: 54 additions & 0 deletions command/step-down.go
@@ -0,0 +1,54 @@
package command

import (
"fmt"
"strings"
)

// StepDownCommand is a Command that seals the vault.
type StepDownCommand struct {
Meta
}

func (c *StepDownCommand) Run(args []string) int {
flags := c.Meta.FlagSet("step-down", FlagSetDefault)
flags.Usage = func() { c.Ui.Error(c.Help()) }
if err := flags.Parse(args); err != nil {
return 1
}

client, err := c.Client()
if err != nil {
c.Ui.Error(fmt.Sprintf(
"Error initializing client: %s", err))
return 2
}

if err := client.Sys().StepDown(); err != nil {
c.Ui.Error(fmt.Sprintf("Error stepping down: %s", err))
return 1
}

return 0
}

func (c *StepDownCommand) Synopsis() string {
return "Force the Vault node to give up active duty"
}

func (c *StepDownCommand) Help() string {
helpText := `
Usage: vault step-down [options]
Force the Vault node to step down from active duty.
This causes the indicated node to give up active status. Note that while the
affected node will have a short delay before attempting to grab the lock
again, if no other node grabs the lock beforehand, it is possible for the
same node to re-grab the lock and become active again.
General Options:
` + generalOptionsUsage()
return strings.TrimSpace(helpText)
}
1 change: 1 addition & 0 deletions http/handler.go
Expand Up @@ -23,6 +23,7 @@ func Handler(core *vault.Core) http.Handler {
mux.Handle("/v1/sys/init", handleSysInit(core))
mux.Handle("/v1/sys/seal-status", handleSysSealStatus(core))
mux.Handle("/v1/sys/seal", handleSysSeal(core))
mux.Handle("/v1/sys/step-down", handleSysStepDown(core))
mux.Handle("/v1/sys/unseal", handleSysUnseal(core))
mux.Handle("/v1/sys/mounts", proxySysRequest(core))
mux.Handle("/v1/sys/mounts/", proxySysRequest(core))
Expand Down
23 changes: 23 additions & 0 deletions http/sys_seal.go
Expand Up @@ -34,6 +34,29 @@ func handleSysSeal(core *vault.Core) http.Handler {
})
}

func handleSysStepDown(core *vault.Core) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch r.Method {
case "PUT":
case "POST":
default:
respondError(w, http.StatusMethodNotAllowed, nil)
return
}

// Get the auth for the request so we can access the token directly
req := requestAuth(r, &logical.Request{})

// Seal with the token above
if err := core.StepDown(req.ClientToken); err != nil {
respondError(w, http.StatusInternalServerError, err)
return
}

respondOk(w, nil)
})
}

func handleSysUnseal(core *vault.Core) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch r.Method {
Expand Down
10 changes: 10 additions & 0 deletions http/sys_seal_test.go
Expand Up @@ -304,3 +304,13 @@ func TestSysSeal_Permissions(t *testing.T) {
httpResp = testHttpPut(t, "child", addr+"/v1/sys/seal", nil)
testResponseStatus(t, httpResp, 204)
}

func TestSysStepDown(t *testing.T) {
core, _, token := vault.TestCoreUnsealed(t)
ln, addr := TestServer(t, core)
defer ln.Close()
TestServerAuth(t, addr, token)

resp := testHttpPut(t, token, addr+"/v1/sys/step-down", nil)
testResponseStatus(t, resp, 204)
}
106 changes: 87 additions & 19 deletions vault/core.go
Expand Up @@ -64,6 +64,10 @@ const (
// leaderPrefixCleanDelay is how long to wait between deletions
// of orphaned leader keys, to prevent slamming the backend.
leaderPrefixCleanDelay = 200 * time.Millisecond

// manualStepDownSleepPeriod is how long to sleep after a user-initiated
// step down of the active node, to prevent instantly regrabbing the lock
manualStepDownSleepPeriod = 10 * time.Second
)

var (
Expand Down Expand Up @@ -206,9 +210,10 @@ type Core struct {
stateLock sync.RWMutex
sealed bool

standby bool
standbyDoneCh chan struct{}
standbyStopCh chan struct{}
standby bool
standbyDoneCh chan struct{}
standbyStopCh chan struct{}
manualStepDownCh chan struct{}

// unlockParts has the keys provided to Unseal until
// the threshold number of parts is available.
Expand Down Expand Up @@ -1149,7 +1154,8 @@ func (c *Core) Unseal(key []byte) (bool, error) {
// Go to standby mode, wait until we are active to unseal
c.standbyDoneCh = make(chan struct{})
c.standbyStopCh = make(chan struct{})
go c.runStandby(c.standbyDoneCh, c.standbyStopCh)
c.manualStepDownCh = make(chan struct{})
go c.runStandby(c.standbyDoneCh, c.standbyStopCh, c.manualStepDownCh)
}

// Success!
Expand All @@ -1161,6 +1167,7 @@ func (c *Core) Unseal(key []byte) (bool, error) {
// be unsealed again to perform any further operations.
func (c *Core) Seal(token string) (retErr error) {
defer metrics.MeasureSince([]string{"core", "seal"}, time.Now())

c.stateLock.Lock()
defer c.stateLock.Unlock()
if c.sealed {
Expand All @@ -1173,27 +1180,29 @@ func (c *Core) Seal(token string) (retErr error) {
Path: "sys/seal",
ClientToken: token,
}
acl, te, err := c.fetchACLandTokenEntry(req)

// Attempt to use the token (decrement num_uses)
if te != nil {
if err := c.tokenStore.UseToken(te); err != nil {
c.logger.Printf("[ERR] core: failed to use token: %v", err)
retErr = ErrInternalError
}
}
acl, te, err := c.fetchACLandTokenEntry(req)
if err != nil {
// Since there is no token store in standby nodes, sealing cannot
// be done. Ideally, the request has to be forwarded to leader node
// for validation and the operation should be performed. But for now,
// just returning with an error and recommending a vault restart, which
// essentially does the same thing.
if c.standby {
c.logger.Printf("[ERR] core: vault cannot be sealed when in standby mode; please restart instead")
return errors.New("vault cannot be sealed when in standby mode; please restart instead")
c.logger.Printf("[ERR] core: vault cannot seal when in standby mode; please restart instead")
return errors.New("vault cannot seal when in standby mode; please restart instead")
}
return err
}
// Attempt to use the token (decrement num_uses)
// If we can't, we still continue attempting the seal, so long as the token
// has appropriate permissions
if te != nil {
if err := c.tokenStore.UseToken(te); err != nil {
c.logger.Printf("[ERR] core: failed to use token: %v", err)
retErr = ErrInternalError
}
}

// Verify that this operation is allowed
allowed, rootPrivs := acl.AllowOperation(req.Operation, req.Path)
Expand All @@ -1206,7 +1215,7 @@ func (c *Core) Seal(token string) (retErr error) {
return logical.ErrPermissionDenied
}

// Seal the Vault
//Seal the Vault
err = c.sealInternal()
if err == nil && retErr == ErrInternalError {
c.logger.Printf("[ERR] core: core is successfully sealed but another error occurred during the operation")
Expand All @@ -1217,9 +1226,56 @@ func (c *Core) Seal(token string) (retErr error) {
return
}

// sealInternal is an internal method used to seal the vault.
// It does not do any authorization checking. The stateLock must
// be held prior to calling.
// StepDown is used to step down from leadership
func (c *Core) StepDown(token string) error {
defer metrics.MeasureSince([]string{"core", "step_down"}, time.Now())

c.stateLock.Lock()
defer c.stateLock.Unlock()
if c.sealed {
return nil
}
if c.ha == nil || c.standby {
return nil
}

// Validate the token is a root token
req := &logical.Request{
Operation: logical.UpdateOperation,
Path: "sys/step-down",
ClientToken: token,
}

acl, te, err := c.fetchACLandTokenEntry(req)
if err != nil {
return err
}
// Attempt to use the token (decrement num_uses)
if te != nil {
if err := c.tokenStore.UseToken(te); err != nil {
c.logger.Printf("[ERR] core: failed to use token: %v", err)
return err
}
}

// Verify that this operation is allowed
allowed, rootPrivs := acl.AllowOperation(req.Operation, req.Path)
if !allowed {
return logical.ErrPermissionDenied
}

// We always require root privileges for this operation
if !rootPrivs {
return logical.ErrPermissionDenied
}

c.manualStepDownCh <- struct{}{}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should guard this with a default case in case there is no listener on the other side to avoid blocking.


return nil
}

// sealInternal is an internal method used to seal the vault. It does not do
// any authorization checking. The stateLock must be held prior to calling.
func (c *Core) sealInternal() error {
// Enable that we are sealed to prevent furthur transactions
c.sealed = true
Expand All @@ -1244,6 +1300,7 @@ func (c *Core) sealInternal() error {
return err
}
c.logger.Printf("[INFO] core: vault is sealed")

return nil
}

Expand Down Expand Up @@ -1353,8 +1410,9 @@ func (c *Core) preSeal() error {
// runStandby is a long running routine that is used when an HA backend
// is enabled. It waits until we are leader and switches this Vault to
// active.
func (c *Core) runStandby(doneCh, stopCh chan struct{}) {
func (c *Core) runStandby(doneCh, stopCh, manualStepDownCh chan struct{}) {
defer close(doneCh)
defer close(manualStepDownCh)
c.logger.Printf("[INFO] core: entering standby mode")

// Monitor for key rotation
Expand Down Expand Up @@ -1418,11 +1476,15 @@ func (c *Core) runStandby(doneCh, stopCh chan struct{}) {
}

// Monitor a loss of leadership
var manualStepDown bool
select {
case <-leaderLostCh:
c.logger.Printf("[WARN] core: leadership lost, stopping active operation")
case <-stopCh:
c.logger.Printf("[WARN] core: stopping active operation")
case <-manualStepDownCh:
c.logger.Printf("[WARN] core: stepping down from active operation to standby")
manualStepDown = true
}

// Clear ourself as leader
Expand All @@ -1443,6 +1505,12 @@ func (c *Core) runStandby(doneCh, stopCh chan struct{}) {
if preSealErr != nil {
c.logger.Printf("[ERR] core: pre-seal teardown failed: %v", err)
}

// If we've merely stepped down, we could instantly grab the lock
// again. Give the other nodes a chance.
if manualStepDown {
time.Sleep(manualStepDownSleepPeriod)
}
}
}

Expand Down