diff --git a/builtin/logical/pki/backend.go b/builtin/logical/pki/backend.go index 49ff9f67a1c5e..cfa11bd3def75 100644 --- a/builtin/logical/pki/backend.go +++ b/builtin/logical/pki/backend.go @@ -8,8 +8,6 @@ import ( "sync/atomic" "time" - atomic2 "go.uber.org/atomic" - "github.com/hashicorp/vault/sdk/helper/consts" "github.com/armon/go-metrics" @@ -178,7 +176,6 @@ func Backend(conf *logical.BackendConfig) *backend { PeriodicFunc: b.periodicFunc, } - b.crlLifetime = time.Hour * 72 b.tidyCASGuard = new(uint32) b.tidyStatus = &tidyStatus{state: tidyStatusInactive} b.storage = conf.StorageView @@ -186,8 +183,8 @@ func Backend(conf *logical.BackendConfig) *backend { b.pkiStorageVersion.Store(0) - b.crlBuilder = &crlBuilder{} - b.isOcspDisabled = atomic2.NewBool(false) + b.crlBuilder = newCRLBuilder() + return &b } @@ -208,9 +205,6 @@ type backend struct { // Write lock around issuers and keys. issuersLock sync.RWMutex - - // Optimization to not read the CRL config on every OCSP request. - isOcspDisabled *atomic2.Bool } type ( @@ -307,8 +301,10 @@ func (b *backend) metricsWrap(callType string, roleMode int, ofunc roleOperation // initialize is used to perform a possible PKI storage migration if needed func (b *backend) initialize(ctx context.Context, _ *logical.InitializationRequest) error { - // load ocsp enabled status - setOcspStatus(b, ctx) + sc := b.makeStorageContext(ctx, b.storage) + if err := b.crlBuilder.reloadConfigIfRequired(sc); err != nil { + return err + } // Grab the lock prior to the updating of the storage lock preventing us flipping // the storage flag midway through the request stream of other requests. @@ -335,14 +331,6 @@ func (b *backend) initialize(ctx context.Context, _ *logical.InitializationReque return nil } -func setOcspStatus(b *backend, ctx context.Context) { - sc := b.makeStorageContext(ctx, b.storage) - config, err := sc.getRevocationConfig() - if config != nil && err == nil { - b.isOcspDisabled.Store(config.OcspDisable) - } -} - func (b *backend) useLegacyBundleCaStorage() bool { // This helper function is here to choose whether or not we use the newer // issuer/key storage format or the older legacy ca bundle format. @@ -398,10 +386,27 @@ func (b *backend) invalidate(ctx context.Context, key string) { } case key == "config/crl": // We may need to reload our OCSP status flag - setOcspStatus(b, ctx) + b.crlBuilder.markConfigDirty() } } func (b *backend) periodicFunc(ctx context.Context, request *logical.Request) error { - return b.crlBuilder.rebuildIfForced(ctx, b, request) + // First attempt to reload the CRL configuration. + sc := b.makeStorageContext(ctx, request.Storage) + if err := b.crlBuilder.reloadConfigIfRequired(sc); err != nil { + return err + } + + // Check if we're set to auto rebuild and a CRL is set to expire. + if err := b.crlBuilder.checkForAutoRebuild(sc); err != nil { + return err + } + + // Then attempt to rebuild the CRLs if required. + if err := b.crlBuilder.rebuildIfForced(ctx, b, request); err != nil { + return err + } + + // All good! + return nil } diff --git a/builtin/logical/pki/crl_test.go b/builtin/logical/pki/crl_test.go index 8f9c6b3d57abe..c780c22da9a47 100644 --- a/builtin/logical/pki/crl_test.go +++ b/builtin/logical/pki/crl_test.go @@ -8,7 +8,11 @@ import ( "testing" "time" + "github.com/hashicorp/vault/api" + vaulthttp "github.com/hashicorp/vault/http" "github.com/hashicorp/vault/sdk/logical" + "github.com/hashicorp/vault/vault" + "github.com/stretchr/testify/require" ) @@ -363,6 +367,8 @@ func TestCrlRebuilder(t *testing.T) { } func TestBYOC(t *testing.T) { + t.Parallel() + b, s := createBackendWithStorage(t) // Create a root CA. @@ -480,6 +486,8 @@ func TestBYOC(t *testing.T) { } func TestPoP(t *testing.T) { + t.Parallel() + b, s := createBackendWithStorage(t) // Create a root CA. @@ -638,6 +646,8 @@ func TestPoP(t *testing.T) { } func TestIssuerRevocation(t *testing.T) { + t.Parallel() + b, s := createBackendWithStorage(t) // Create a root CA. @@ -778,6 +788,167 @@ func TestIssuerRevocation(t *testing.T) { require.NotEmpty(t, resp.Data["revocation_time"]) } +func TestAutoRebuild(t *testing.T) { + t.Parallel() + + // While we'd like to reduce this duration, we need to wait until + // the rollback manager timer ticks. With the new helper, we can + // modify the rollback manager timer period directly, allowing us + // to shorten the total test time significantly. + newPeriod := 6 * time.Second + gracePeriod := (newPeriod + 1*time.Second).String() + crlTime := (newPeriod + 2*time.Second).String() + delta := 2 * newPeriod + + // This test requires the periodicFunc to trigger, which requires we stand + // up a full test cluster. + coreConfig := &vault.CoreConfig{ + LogicalBackends: map[string]logical.Factory{ + "pki": Factory, + }, + } + oldPeriod := vault.SetRollbackPeriodForTesting(newPeriod) + cluster := vault.NewTestCluster(t, coreConfig, &vault.TestClusterOptions{ + HandlerFunc: vaulthttp.Handler, + }) + cluster.Start() + defer cluster.Cleanup() + client := cluster.Cores[0].Client + vault.SetRollbackPeriodForTesting(oldPeriod) + + // Mount PKI + err := client.Sys().Mount("pki", &api.MountInput{ + Type: "pki", + Config: api.MountConfigInput{ + DefaultLeaseTTL: "16h", + MaxLeaseTTL: "60h", + }, + }) + require.NoError(t, err) + + // Generate root. + _, err = client.Logical().Write("pki/root/generate/internal", map[string]interface{}{ + "ttl": "40h", + "common_name": "Root X1", + "key_type": "ec", + }) + require.NoError(t, err) + + // Setup a testing role. + _, err = client.Logical().Write("pki/roles/local-testing", map[string]interface{}{ + "allow_any_name": true, + "enforce_hostnames": false, + "key_type": "ec", + }) + require.NoError(t, err) + + // Safety guard: we play with rebuild timing below. We don't expect + // this entire test to take longer than 80s. + _, err = client.Logical().Write("pki/config/crl", map[string]interface{}{ + "expiry": crlTime, + }) + require.NoError(t, err) + + // Issue a cert and revoke it. It should appear on the CRL right away. + resp, err := client.Logical().Write("pki/issue/local-testing", map[string]interface{}{ + "common_name": "example.com", + }) + require.NoError(t, err) + require.NotNil(t, resp) + require.NotNil(t, resp.Data) + require.NotEmpty(t, resp.Data["serial_number"]) + leafSerial := resp.Data["serial_number"].(string) + + _, err = client.Logical().Write("pki/revoke", map[string]interface{}{ + "serial_number": leafSerial, + }) + require.NoError(t, err) + + crl := getCrlCertificateList(t, client, "pki") + lastCRLNumber := crl.Version + lastCRLExpiry := crl.NextUpdate + requireSerialNumberInCRL(t, crl, leafSerial) + + // Enable periodic rebuild of the CRL. + _, err = client.Logical().Write("pki/config/crl", map[string]interface{}{ + "expiry": crlTime, + "auto_rebuild": true, + "auto_rebuild_grace_period": gracePeriod, + }) + require.NoError(t, err) + + // Issue a cert and revoke it. + resp, err = client.Logical().Write("pki/issue/local-testing", map[string]interface{}{ + "common_name": "example.com", + }) + require.NoError(t, err) + require.NotNil(t, resp) + require.NotNil(t, resp.Data) + require.NotEmpty(t, resp.Data["serial_number"]) + newLeafSerial := resp.Data["serial_number"].(string) + + _, err = client.Logical().Write("pki/revoke", map[string]interface{}{ + "serial_number": newLeafSerial, + }) + require.NoError(t, err) + + // Serial should not appear on CRL. + crl = getCrlCertificateList(t, client, "pki") + thisCRLNumber := crl.Version + requireSerialNumberInCRL(t, crl, leafSerial) + + now := time.Now() + graceInterval, _ := time.ParseDuration(gracePeriod) + expectedUpdate := lastCRLExpiry.Add(-1 * graceInterval) + if requireSerialNumberInCRL(nil, crl, newLeafSerial) { + // If we somehow lagged and we ended up needing to rebuild + // the CRL, we should avoid throwing an error. + + if thisCRLNumber == lastCRLNumber { + t.Fatalf("unexpected failure: last (%v) and current (%v) leaf certificate might have the same serial number?", leafSerial, newLeafSerial) + } + + if !now.After(expectedUpdate) { + t.Fatalf("expected newly generated certificate with serial %v not to appear on this CRL but it did, prematurely: %v", newLeafSerial, crl) + } + + t.Fatalf("shouldn't be here") + } + + // Now, wait until we're within the grace period... Then start prompting + // for regeneration. + if expectedUpdate.After(now) { + time.Sleep(expectedUpdate.Sub(now)) + } + + // Otherwise, the absolute latest we're willing to wait is some delta + // after CRL expiry (to let stuff regenerate &c). + interruptChan := time.After(lastCRLExpiry.Sub(now) + delta) + for { + select { + case <-interruptChan: + t.Fatalf("expected CRL to regenerate prior to CRL expiry (plus %v grace period)", delta) + default: + crl = getCrlCertificateList(t, client, "pki") + if crl.NextUpdate.Equal(lastCRLExpiry) { + // Hack to ensure we got a net-new CRL. If we didn't, we can + // exit this default conditional and wait for the next + // go-round. When the timer fires, it'll populate the channel + // and we'll exit correctly. + time.Sleep(1 * time.Second) + break + } + + now := time.Now() + require.True(t, crl.ThisUpdate.Before(now)) + require.True(t, crl.NextUpdate.After(now)) + requireSerialNumberInCRL(t, crl, leafSerial) + requireSerialNumberInCRL(t, crl, newLeafSerial) + return + } + } +} + func requestCrlFromBackend(t *testing.T, s logical.Storage, b *backend) *logical.Response { crlReq := &logical.Request{ Operation: logical.ReadOperation, diff --git a/builtin/logical/pki/crl_util.go b/builtin/logical/pki/crl_util.go index bd9028c911a0c..305f1a3b960cd 100644 --- a/builtin/logical/pki/crl_util.go +++ b/builtin/logical/pki/crl_util.go @@ -13,6 +13,8 @@ import ( "sync/atomic" "time" + atomic2 "go.uber.org/atomic" + "github.com/hashicorp/vault/sdk/helper/consts" "github.com/hashicorp/vault/sdk/helper/errutil" @@ -33,9 +35,15 @@ type revocationInfo struct { // without the actual API calls. During the storage invalidation process, we do not have the required state // to actually rebuild the CRLs, so we need to schedule it in a deferred fashion. This allows either // read or write calls to perform the operation if required, or have the flag reset upon a write operation +// +// The CRL builder also tracks the revocation configuration. type crlBuilder struct { m sync.Mutex forceRebuild uint32 + + c sync.RWMutex + dirty *atomic2.Bool + config crlConfig } const ( @@ -43,6 +51,126 @@ const ( _enforceForceFlag = false ) +func newCRLBuilder() *crlBuilder { + return &crlBuilder{ + dirty: atomic2.NewBool(true), + config: defaultCrlConfig, + } +} + +func (cb *crlBuilder) markConfigDirty() { + cb.dirty.Store(true) +} + +func (cb *crlBuilder) reloadConfigIfRequired(sc *storageContext) error { + if cb.dirty.Load() { + // Acquire a write lock. + cb.c.Lock() + defer cb.c.Unlock() + + if !cb.dirty.Load() { + // Someone else might've been reloading the config; no need + // to do it twice. + return nil + } + + config, err := sc.getRevocationConfig() + if err != nil { + return err + } + + // Set the default config if none was returned to us. + if config != nil { + cb.config = *config + } else { + cb.config = defaultCrlConfig + } + + // Updated the config; unset dirty. + cb.dirty.Store(false) + } + + return nil +} + +func (cb *crlBuilder) getConfigWithUpdate(sc *storageContext) (*crlConfig, error) { + // Config may mutate immediately after accessing, but will be freshly + // fetched if necessary. + if err := cb.reloadConfigIfRequired(sc); err != nil { + return nil, err + } + + cb.c.RLock() + defer cb.c.RUnlock() + + configCopy := cb.config + return &configCopy, nil +} + +func (cb *crlBuilder) checkForAutoRebuild(sc *storageContext) error { + cfg, err := cb.getConfigWithUpdate(sc) + if err != nil { + return err + } + + if cfg.Disable || !cfg.AutoRebuild || atomic.LoadUint32(&cb.forceRebuild) == 1 { + // Not enabled, not on auto-rebuilder, or we're already scheduled to + // rebuild so there's no point to interrogate CRL values... + return nil + } + + // Auto-Rebuild is enabled. We need to check each issuer's CRL and see + // if its about to expire. If it is, we've gotta rebuild it (and well, + // every other CRL since we don't have a fine-toothed rebuilder). + // + // We store a list of all (unique) CRLs in the cluster-local CRL + // configuration along with their expiration dates. + crlConfig, err := sc.getLocalCRLConfig() + if err != nil { + return fmt.Errorf("error checking for auto-rebuild status: unable to fetch cluster-local CRL configuration: %v", err) + } + + // If there's no config, assume we've gotta rebuild it to get this + // information. + if crlConfig == nil { + atomic.CompareAndSwapUint32(&cb.forceRebuild, 0, 1) + return nil + } + + // If the map is empty, assume we need to upgrade and schedule a + // rebuild. + if len(crlConfig.CRLExpirationMap) == 0 { + atomic.CompareAndSwapUint32(&cb.forceRebuild, 0, 1) + return nil + } + + // Otherwise, check CRL's expirations and see if its zero or within + // the grace period and act accordingly. + now := time.Now() + + period, err := time.ParseDuration(cfg.AutoRebuildGracePeriod) + if err != nil { + // This may occur if the duration is empty; in that case + // assume the default. The default should be valid and shouldn't + // error. + defaultPeriod, defaultErr := time.ParseDuration(defaultCrlConfig.AutoRebuildGracePeriod) + if defaultErr != nil { + return fmt.Errorf("error checking for auto-rebuild status: unable to parse duration from both config's grace period (%v) and default grace period (%v):\n- config: %v\n- default: %v\n", cfg.AutoRebuildGracePeriod, defaultCrlConfig.AutoRebuildGracePeriod, err, defaultErr) + } + + period = defaultPeriod + } + + for _, value := range crlConfig.CRLExpirationMap { + if value.IsZero() || now.After(value.Add(-1*period)) { + atomic.CompareAndSwapUint32(&cb.forceRebuild, 0, 1) + return nil + } + } + + return nil +} + // rebuildIfForced is to be called by readers or periodic functions that might need to trigger // a refresh of the CRL before the read occurs. func (cb *crlBuilder) rebuildIfForced(ctx context.Context, b *backend, request *logical.Request) error { @@ -235,13 +363,23 @@ func revokeCert(ctx context.Context, b *backend, req *logical.Request, serial st } } - crlErr := b.crlBuilder.rebuild(ctx, b, req, false) - if crlErr != nil { - switch crlErr.(type) { - case errutil.UserError: - return logical.ErrorResponse(fmt.Sprintf("Error during CRL building: %s", crlErr)), nil - default: - return nil, fmt.Errorf("error encountered during CRL building: %w", crlErr) + // Fetch the config and see if we need to rebuild the CRL. If we have + // auto building enabled, we will wait for the next rebuild period to + // actually rebuild it. + config, err := b.crlBuilder.getConfigWithUpdate(sc) + if err != nil { + return nil, fmt.Errorf("error building CRL: while updating config: %v", err) + } + + if !config.AutoRebuild { + crlErr := b.crlBuilder.rebuild(ctx, b, req, false) + if crlErr != nil { + switch crlErr.(type) { + case errutil.UserError: + return logical.ErrorResponse(fmt.Sprintf("Error during CRL building: %s", crlErr)), nil + default: + return nil, fmt.Errorf("error encountered during CRL building: %w", crlErr) + } } } @@ -288,6 +426,13 @@ func buildCRLs(ctx context.Context, b *backend, req *logical.Request, forceNew b var wasLegacy bool sc := b.makeStorageContext(ctx, req.Storage) + // First, fetch an updated copy of the CRL config. We'll pass this into + // buildCRL. + globalCRLConfig, err := b.crlBuilder.getConfigWithUpdate(sc) + if err != nil { + return fmt.Errorf("error building CRL: while updating config: %v", err) + } + if !b.useLegacyBundleCaStorage() { issuers, err = sc.listIssuers() if err != nil { @@ -420,9 +565,12 @@ func buildCRLs(ctx context.Context, b *backend, req *logical.Request, forceNew b crlConfig.CRLNumberMap[crlIdentifier] += 1 // Lastly, build the CRL. - if err := buildCRL(sc, forceNew, representative, revokedCerts, crlIdentifier, crlNumber); err != nil { + nextUpdate, err := buildCRL(sc, globalCRLConfig, forceNew, representative, revokedCerts, crlIdentifier, crlNumber) + if err != nil { return fmt.Errorf("error building CRLs: unable to build CRL for issuer (%v): %v", representative, err) } + + crlConfig.CRLExpirationMap[crlIdentifier] = *nextUpdate } } @@ -610,26 +758,19 @@ func augmentWithRevokedIssuers(issuerIDEntryMap map[issuerID]*issuerEntry, issue // Builds a CRL by going through the list of revoked certificates and building // a new CRL with the stored revocation times and serial numbers. -func buildCRL(sc *storageContext, forceNew bool, thisIssuerId issuerID, revoked []pkix.RevokedCertificate, identifier crlID, crlNumber int64) error { - crlInfo, err := sc.getRevocationConfig() - if err != nil { - return errutil.InternalError{Err: fmt.Sprintf("error fetching CRL config information: %s", err)} - } - - crlLifetime := sc.Backend.crlLifetime +func buildCRL(sc *storageContext, crlInfo *crlConfig, forceNew bool, thisIssuerId issuerID, revoked []pkix.RevokedCertificate, identifier crlID, crlNumber int64) (*time.Time, error) { var revokedCerts []pkix.RevokedCertificate - if crlInfo.Expiry != "" { - crlDur, err := time.ParseDuration(crlInfo.Expiry) - if err != nil { - return errutil.InternalError{Err: fmt.Sprintf("error parsing CRL duration of %s", crlInfo.Expiry)} - } - crlLifetime = crlDur + crlLifetime, err := time.ParseDuration(crlInfo.Expiry) + if err != nil { + return nil, errutil.InternalError{Err: fmt.Sprintf("error parsing CRL duration of %s", crlInfo.Expiry)} } if crlInfo.Disable { if !forceNew { - return nil + // In the event of a disabled CRL, we'll have the next time set + // to the zero time as a sentinel in case we get re-enabled. + return &time.Time{}, nil } // NOTE: in this case, the passed argument (revoked) is not added @@ -648,23 +789,26 @@ WRITE: if caErr != nil { switch caErr.(type) { case errutil.UserError: - return errutil.UserError{Err: fmt.Sprintf("could not fetch the CA certificate: %s", caErr)} + return nil, errutil.UserError{Err: fmt.Sprintf("could not fetch the CA certificate: %s", caErr)} default: - return errutil.InternalError{Err: fmt.Sprintf("error fetching CA certificate: %s", caErr)} + return nil, errutil.InternalError{Err: fmt.Sprintf("error fetching CA certificate: %s", caErr)} } } + now := time.Now() + nextUpdate := now.Add(crlLifetime) + revocationListTemplate := &x509.RevocationList{ RevokedCertificates: revokedCerts, Number: big.NewInt(crlNumber), - ThisUpdate: time.Now(), - NextUpdate: time.Now().Add(crlLifetime), + ThisUpdate: now, + NextUpdate: nextUpdate, SignatureAlgorithm: signingBundle.RevocationSigAlg, } crlBytes, err := x509.CreateRevocationList(rand.Reader, revocationListTemplate, signingBundle.Certificate, signingBundle.PrivateKey) if err != nil { - return errutil.InternalError{Err: fmt.Sprintf("error creating new CRL: %s", err)} + return nil, errutil.InternalError{Err: fmt.Sprintf("error creating new CRL: %s", err)} } writePath := "crls/" + identifier.String() @@ -679,8 +823,8 @@ WRITE: Value: crlBytes, }) if err != nil { - return errutil.InternalError{Err: fmt.Sprintf("error storing CRL: %s", err)} + return nil, errutil.InternalError{Err: fmt.Sprintf("error storing CRL: %s", err)} } - return nil + return &nextUpdate, nil } diff --git a/builtin/logical/pki/ocsp.go b/builtin/logical/pki/ocsp.go index 67302e6c3f1a6..658eaa5720be5 100644 --- a/builtin/logical/pki/ocsp.go +++ b/builtin/logical/pki/ocsp.go @@ -100,7 +100,9 @@ func buildPathOcspPost(b *backend) *framework.Path { } func (b *backend) ocspHandler(ctx context.Context, request *logical.Request, data *framework.FieldData) (*logical.Response, error) { - if b.isOcspDisabled.Load() { + sc := b.makeStorageContext(ctx, request.Storage) + cfg, err := b.crlBuilder.getConfigWithUpdate(sc) + if err != nil || cfg.OcspDisable { return OcspUnauthorizedResponse, nil } @@ -114,8 +116,6 @@ func (b *backend) ocspHandler(ctx context.Context, request *logical.Request, dat return OcspMalformedResponse, nil } - sc := b.makeStorageContext(ctx, request.Storage) - ocspStatus, err := getOcspStatus(sc, request, ocspReq) if err != nil { return logAndReturnInternalError(b, err), nil diff --git a/builtin/logical/pki/path_config_crl.go b/builtin/logical/pki/path_config_crl.go index d1d806339e7f2..78515521bc809 100644 --- a/builtin/logical/pki/path_config_crl.go +++ b/builtin/logical/pki/path_config_crl.go @@ -12,9 +12,20 @@ import ( // CRLConfig holds basic CRL configuration information type crlConfig struct { - Expiry string `json:"expiry"` - Disable bool `json:"disable"` - OcspDisable bool `json:"ocsp_disable"` + Expiry string `json:"expiry"` + Disable bool `json:"disable"` + OcspDisable bool `json:"ocsp_disable"` + AutoRebuild bool `json:"auto_rebuild"` + AutoRebuildGracePeriod string `json:"auto_rebuild_grace_period"` +} + +// Implicit default values for the config if it does not exist. +var defaultCrlConfig = crlConfig{ + Expiry: "72h", + Disable: false, + OcspDisable: false, + AutoRebuild: false, + AutoRebuildGracePeriod: "12h", } func pathConfigCRL(b *backend) *framework.Path { @@ -35,6 +46,15 @@ valid; defaults to 72 hours`, Type: framework.TypeBool, Description: `If set to true, ocsp unauthorized responses will be returned.`, }, + "auto_rebuild": { + Type: framework.TypeBool, + Description: `If set to true, enables automatic rebuilding of the CRL`, + }, + "auto_rebuild_grace_period": { + Type: framework.TypeString, + Description: `The time before the CRL expires to automatically rebuild it, when enabled. Must be shorter than the CRL expiry. Defaults to 12h.`, + Default: "12h", + }, }, Operations: map[logical.Operation]framework.OperationHandler{ @@ -63,9 +83,11 @@ func (b *backend) pathCRLRead(ctx context.Context, req *logical.Request, _ *fram return &logical.Response{ Data: map[string]interface{}{ - "expiry": config.Expiry, - "disable": config.Disable, - "ocsp_disable": config.OcspDisable, + "expiry": config.Expiry, + "disable": config.Disable, + "ocsp_disable": config.OcspDisable, + "auto_rebuild": config.AutoRebuild, + "auto_rebuild_grace_period": config.AutoRebuildGracePeriod, }, }, nil } @@ -86,18 +108,37 @@ func (b *backend) pathCRLWrite(ctx context.Context, req *logical.Request, d *fra config.Expiry = expiry } - var oldDisable bool + oldDisable := config.Disable if disableRaw, ok := d.GetOk("disable"); ok { - oldDisable = config.Disable config.Disable = disableRaw.(bool) } - var oldOcspDisable bool if ocspDisableRaw, ok := d.GetOk("ocsp_disable"); ok { - oldOcspDisable = config.OcspDisable config.OcspDisable = ocspDisableRaw.(bool) } + oldAutoRebuild := config.AutoRebuild + if autoRebuildRaw, ok := d.GetOk("auto_rebuild"); ok { + config.AutoRebuild = autoRebuildRaw.(bool) + } + + if autoRebuildGracePeriodRaw, ok := d.GetOk("auto_rebuild_grace_period"); ok { + autoRebuildGracePeriod := autoRebuildGracePeriodRaw.(string) + if _, err := time.ParseDuration(autoRebuildGracePeriod); err != nil { + return logical.ErrorResponse(fmt.Sprintf("given auto_rebuild_grace_period could not be decoded: %s", err)), nil + } + config.AutoRebuildGracePeriod = autoRebuildGracePeriod + } + + if config.AutoRebuild { + expiry, _ := time.ParseDuration(config.Expiry) + gracePeriod, _ := time.ParseDuration(config.AutoRebuildGracePeriod) + + if gracePeriod >= expiry { + return logical.ErrorResponse(fmt.Sprintf("CRL auto-rebuilding grace period (%v) must be strictly shorter than CRL expiry (%v) value when auto-rebuilding of CRLs is enabled", config.AutoRebuildGracePeriod, config.Expiry)), nil + } + } + entry, err := logical.StorageEntryJSON("config/crl", config) if err != nil { return nil, err @@ -107,8 +148,12 @@ func (b *backend) pathCRLWrite(ctx context.Context, req *logical.Request, d *fra return nil, err } - if oldDisable != config.Disable { - // It wasn't disabled but now it is, rotate + b.crlBuilder.markConfigDirty() + b.crlBuilder.reloadConfigIfRequired(sc) + + if oldDisable != config.Disable || (oldAutoRebuild && !config.AutoRebuild) { + // It wasn't disabled but now it is (or equivalently, we were set to + // auto-rebuild and we aren't now), so rotate the CRL. crlErr := b.crlBuilder.rebuild(ctx, b, req, true) if crlErr != nil { switch crlErr.(type) { @@ -120,10 +165,6 @@ func (b *backend) pathCRLWrite(ctx context.Context, req *logical.Request, d *fra } } - if oldOcspDisable != config.OcspDisable { - setOcspStatus(b, ctx) - } - return nil, nil } diff --git a/builtin/logical/pki/path_tidy.go b/builtin/logical/pki/path_tidy.go index 556e4c3488dd0..02f9b4157fada 100644 --- a/builtin/logical/pki/path_tidy.go +++ b/builtin/logical/pki/path_tidy.go @@ -225,9 +225,21 @@ func (b *backend) pathTidyWrite(ctx context.Context, req *logical.Request, d *fr } if rebuildCRL { - if err := b.crlBuilder.rebuild(ctx, b, req, false); err != nil { + // Expired certificates isn't generally an important + // reason to trigger a CRL rebuild for. Check if + // automatic CRL rebuilds have been enabled and defer + // the rebuild if so. + sc := b.makeStorageContext(ctx, req.Storage) + config, err := sc.getRevocationConfig() + if err != nil { return err } + + if !config.AutoRebuild { + if err := b.crlBuilder.rebuild(ctx, b, req, false); err != nil { + return err + } + } } } diff --git a/builtin/logical/pki/storage.go b/builtin/logical/pki/storage.go index 0145b2a37e411..1aea6ac7b7b86 100644 --- a/builtin/logical/pki/storage.go +++ b/builtin/logical/pki/storage.go @@ -159,8 +159,9 @@ type issuerEntry struct { } type localCRLConfigEntry struct { - IssuerIDCRLMap map[issuerID]crlID `json:"issuer_id_crl_map"` - CRLNumberMap map[crlID]int64 `json:"crl_number_map"` + IssuerIDCRLMap map[issuerID]crlID `json:"issuer_id_crl_map"` + CRLNumberMap map[crlID]int64 `json:"crl_number_map"` + CRLExpirationMap map[crlID]time.Time `json:"crl_expiration_map"` } type keyConfigEntry struct { @@ -803,6 +804,10 @@ func (sc *storageContext) getLocalCRLConfig() (*localCRLConfigEntry, error) { mapping.CRLNumberMap = make(map[crlID]int64) } + if len(mapping.CRLExpirationMap) == 0 { + mapping.CRLExpirationMap = make(map[crlID]time.Time) + } + return mapping, nil } diff --git a/changelog/16762.txt b/changelog/16762.txt new file mode 100644 index 0000000000000..ade57bd4dc7c2 --- /dev/null +++ b/changelog/16762.txt @@ -0,0 +1,3 @@ +```release-note:improvement +secrets/pki: Add ability to periodically rebuild CRL before expiry +``` diff --git a/vault/rollback.go b/vault/rollback.go index c2a3687706a59..ed37f94ef9119 100644 --- a/vault/rollback.go +++ b/vault/rollback.go @@ -14,10 +14,11 @@ import ( "github.com/hashicorp/vault/sdk/logical" ) -const ( - // rollbackPeriod is how often we attempt rollbacks for all the backends - rollbackPeriod = time.Minute -) +// rollbackPeriod is how often we attempt rollbacks for all the backends. +// +// This is turned into a variable to allow test to check behavior without +// waiting the full minute. See SetRollbackPeriodForTesting(...). +var rollbackPeriod = time.Minute // RollbackManager is responsible for performing rollbacks of partial // secrets within logical backends. diff --git a/vault/testing.go b/vault/testing.go index 7a9246090a5c4..f5978ce144dd0 100644 --- a/vault/testing.go +++ b/vault/testing.go @@ -2347,3 +2347,14 @@ func RetryUntil(t testing.T, timeout time.Duration, f func() error) { } t.Fatalf("did not complete before deadline, err: %v", err) } + +// SetRollbackPeriodForTesting lets us modify the periodic func invocation +// time period to some other value. Best practice is to set this, spin up +// a test cluster and immediately reset the value back to the default, to +// avoid impacting other tests too much. To that end, we return the original +// value of that period. +func SetRollbackPeriodForTesting(newPeriod time.Duration) time.Duration { + oldPeriod := rollbackPeriod + rollbackPeriod = newPeriod + return oldPeriod +} diff --git a/website/content/api-docs/secret/pki.mdx b/website/content/api-docs/secret/pki.mdx index 0aa5ad357305d..746837b930f01 100644 --- a/website/content/api-docs/secret/pki.mdx +++ b/website/content/api-docs/secret/pki.mdx @@ -2989,7 +2989,7 @@ $ curl \ } ``` -### Set CRL Configuration +### Set Revocation Configuration This endpoint allows setting the duration for which the generated CRL should be marked valid. If the CRL is disabled, it will return a signed but zero-length @@ -3009,6 +3009,13 @@ revoked until `tidy` has been run with the desired safety buffer. Re-enabling CRL generation will then result in all such certificates becoming a part of the CRL. +~> Note: Enabling automatic rebuilding of CRLs disables immediate regeneration + on revocation. This is in line with the behavior of other CAs which only + rebuild CRLs periodically. We suggest manually hitting the rotate if a + fresh CRL is necessary after a revocation. For the most part though, CRLs + should not be relied upon for the latest certificate status information, + and OCSP should be used instead. + | Method | Path | | :----- | :---------------- | | `POST` | `/pki/config/crl` | @@ -3018,6 +3025,10 @@ the CRL. - `expiry` `(string: "72h")` - The amount of time the generated CRL should be valid. - `disable` `(bool: false)` - Disables or enables CRL building. - `ocsp_disable` `(bool: false)` - Disables or enables the OCSP responder in Vault. +- `auto_rebuild` `(bool: false)` - Enables or disables periodic rebuilding of + the CRL upon expiry. +- `auto_rebuild_grace_period` `(string: "12h")` - Grace period before CRL expiry + to attempt rebuild of CRL. Must be shorter than the CRL expiry period. #### Sample Payload @@ -3025,7 +3036,9 @@ the CRL. { "expiry": "48h", "disable": "false", - "ocsp_disable": "false" + "ocsp_disable": "false", + "auto_rebuild": "true", + "auto_rebuild_grace_period": "8h" } ``` @@ -3055,6 +3068,14 @@ and **must** be called on every cluster. to delete their issuer and is instead necessary to **remove** the mount completely. +~> **Note**: As of Vault 1.12, it is suggested to switch to enabling the CRL's + `auto_rebuild` functionality to avoid having to manually hit the Rotate + endpoint when the CRL expires. This ensures a valid CRL is always + maintained, at the expense of potentially not being up-to-date. If a + revocation occurs that must be immediately propagated, this endpoint can + be used to regenerate the CRL, though distribution must still occur outside + of Vault (either manually or via AIA where supported). + | Method | Path | | :----- | :---------------- | | `GET` | `/pki/crl/rotate` |