Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix force-renewing revoked on-demand certs #166

Merged
merged 3 commits into from Feb 1, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 4 additions & 4 deletions certificates.go
Expand Up @@ -310,17 +310,17 @@ func (cfg *Config) managedCertInStorageExpiresSoon(cert Certificate) (bool, erro
// on oldCert into the cache, from storage. This also replaces the old certificate
// with the new one, so that all configurations that used the old cert now point
// to the new cert. It assumes that the new certificate for oldCert.Names[0] is
// already in storage.
func (cfg *Config) reloadManagedCertificate(oldCert Certificate) error {
// already in storage. It returns the newly-loaded certificate if successful.
func (cfg *Config) reloadManagedCertificate(oldCert Certificate) (Certificate, error) {
if cfg.Logger != nil {
cfg.Logger.Info("reloading managed certificate", zap.Strings("identifiers", oldCert.Names))
}
newCert, err := cfg.loadManagedCertificate(oldCert.Names[0])
if err != nil {
return fmt.Errorf("loading managed certificate for %v from storage: %v", oldCert.Names, err)
return Certificate{}, fmt.Errorf("loading managed certificate for %v from storage: %v", oldCert.Names, err)
}
cfg.certCache.replaceCertificate(oldCert, newCert)
return nil
return newCert, nil
}

// SubjectQualifiesForCert returns true if subj is a name which,
Expand Down
2 changes: 1 addition & 1 deletion config.go
Expand Up @@ -370,7 +370,7 @@ func (cfg *Config) manageOne(ctx context.Context, domainName string, async bool)
return fmt.Errorf("%s: renewing certificate: %w", domainName, err)
}
// successful renewal, so update in-memory cache
err = cfg.reloadManagedCertificate(cert)
_, err = cfg.reloadManagedCertificate(cert)
if err != nil {
return fmt.Errorf("%s: reloading renewed certificate into memory: %v", domainName, err)
}
Expand Down
65 changes: 40 additions & 25 deletions handshake.go
Expand Up @@ -25,6 +25,7 @@ import (

"github.com/mholt/acmez"
"go.uber.org/zap"
"golang.org/x/crypto/ocsp"
)

// GetCertificate gets a certificate to satisfy clientHello. In getting
Expand Down Expand Up @@ -469,11 +470,6 @@ func (cfg *Config) obtainOnDemandCertificate(ctx context.Context, hello *tls.Cli
func (cfg *Config) handshakeMaintenance(ctx context.Context, hello *tls.ClientHelloInfo, cert Certificate) (Certificate, error) {
log := loggerNamed(cfg.Logger, "on_demand")

// Check cert expiration
if currentlyInRenewalWindow(cert.Leaf.NotBefore, cert.Leaf.NotAfter, cfg.RenewalWindowRatio) {
return cfg.renewDynamicCertificate(ctx, hello, cert)
}

// Check OCSP staple validity
if cert.ocsp != nil && !freshOCSP(cert.ocsp) {
ocspResp, err := stapleOCSP(cfg.OCSP, cfg.Storage, &cert, nil)
Expand All @@ -494,7 +490,14 @@ func (cfg *Config) handshakeMaintenance(ctx context.Context, hello *tls.ClientHe

// We attempt to replace any certificates that were revoked.
// Crucially, this happens OUTSIDE a lock on the certCache.
cfg.forceRenewIfCertRevoked(ctx, log, cert, ocspResp)
if certShouldBeForceRenewed(cert, ocspResp) {
return cfg.renewDynamicCertificate(ctx, hello, cert, ocspResp)
}
}

// Check cert expiration
if currentlyInRenewalWindow(cert.Leaf.NotBefore, cert.Leaf.NotAfter, cfg.RenewalWindowRatio) {
return cfg.renewDynamicCertificate(ctx, hello, cert, nil)
}

return cert, nil
Expand All @@ -507,12 +510,17 @@ func (cfg *Config) handshakeMaintenance(ctx context.Context, hello *tls.ClientHe
// and the renewal will happen in the background; otherwise this blocks until the
// certificate has been renewed, and returns the renewed certificate.
//
// If the certificate is being renewed because it was revoked and not because it was
// expiring, set revokedOCSP to not nil, then it will be assumed the certificate has
// been revoked and the renewal will be forced.
//
// This function is safe for use by multiple concurrent goroutines.
func (cfg *Config) renewDynamicCertificate(ctx context.Context, hello *tls.ClientHelloInfo, currentCert Certificate) (Certificate, error) {
func (cfg *Config) renewDynamicCertificate(ctx context.Context, hello *tls.ClientHelloInfo, currentCert Certificate, revokedOCSP *ocsp.Response) (Certificate, error) {
log := loggerNamed(cfg.Logger, "on_demand")

name := cfg.getNameFromClientHello(hello)
timeLeft := time.Until(currentCert.Leaf.NotAfter)
revoked := revokedOCSP != nil

getCertWithoutReobtaining := func() (Certificate, error) {
// very important to set the obtainIfNecessary argument to false, so we don't repeat this infinitely
Expand All @@ -526,9 +534,10 @@ func (cfg *Config) renewDynamicCertificate(ctx context.Context, hello *tls.Clien
// lucky us -- another goroutine is already renewing the certificate
obtainCertWaitChansMu.Unlock()

if timeLeft > 0 {
// the current certificate hasn't expired, and another goroutine is already
// renewing it, so we might as well serve what we have without blocking
// the current certificate hasn't expired, and another goroutine is already
// renewing it, so we might as well serve what we have without blocking, UNLESS
// we're forcing renewal, in which case the current certificate is not usable
if timeLeft > 0 && !revoked {
if log != nil {
log.Debug("certificate expires soon but is already being renewed; serving current certificate",
zap.Strings("subjects", currentCert.Names),
Expand All @@ -538,12 +547,13 @@ func (cfg *Config) renewDynamicCertificate(ctx context.Context, hello *tls.Clien
}

// otherwise, we'll have to wait for the renewal to finish so we don't serve
// an expired certificate
// a revoked or expired certificate

if log != nil {
log.Debug("certificate has expired, but is already being renewed; waiting for renewal to complete",
zap.Strings("subjects", currentCert.Names),
zap.Time("expired", currentCert.Leaf.NotAfter))
zap.Time("expired", currentCert.Leaf.NotAfter),
zap.Bool("revoked", revoked))
}

// TODO: see if we can get a proper context in here, for true cancellation
Expand Down Expand Up @@ -575,7 +585,8 @@ func (cfg *Config) renewDynamicCertificate(ctx context.Context, hello *tls.Clien
zap.String("server_name", name),
zap.Strings("subjects", currentCert.Names),
zap.Time("expiration", currentCert.Leaf.NotAfter),
zap.Duration("remaining", timeLeft))
zap.Duration("remaining", timeLeft),
zap.Bool("revoked", revoked))
}

// Make sure a certificate for this name should be obtained on-demand
Expand All @@ -593,19 +604,23 @@ func (cfg *Config) renewDynamicCertificate(ctx context.Context, hello *tls.Clien
renewAndReload := func(ctx context.Context, cancel context.CancelFunc) (Certificate, error) {
defer cancel()

err = cfg.RenewCertAsync(ctx, name, false)
if err == nil {
// even though the recursive nature of the dynamic cert loading
// would just call this function anyway, we do it here to
// make the replacement as atomic as possible.
newCert, err := cfg.CacheManagedCertificate(name)
if err != nil {
if log != nil {
log.Error("loading renewed certificate", zap.String("server_name", name), zap.Error(err))
if revoked {
return cfg.forceRenew(ctx, log, currentCert, revokedOCSP)
} else {
mholt marked this conversation as resolved.
Show resolved Hide resolved
err = cfg.RenewCertAsync(ctx, name, false)
if err == nil {
// even though the recursive nature of the dynamic cert loading
// would just call this function anyway, we do it here to
// make the replacement as atomic as possible.
newCert, err := cfg.CacheManagedCertificate(name)
if err != nil {
if log != nil {
log.Error("loading renewed certificate", zap.String("server_name", name), zap.Error(err))
}
} else {
// replace the old certificate with the new one
cfg.certCache.replaceCertificate(currentCert, newCert)
}
} else {
// replace the old certificate with the new one
cfg.certCache.replaceCertificate(currentCert, newCert)
}
}

Expand Down
76 changes: 43 additions & 33 deletions maintain.go
Expand Up @@ -191,7 +191,7 @@ func (certCache *Cache) RenewManagedCertificates(ctx context.Context) error {
cfg := configs[oldCert.Names[0]]

// crucially, this happens OUTSIDE a lock on the certCache
err := cfg.reloadManagedCertificate(oldCert)
_, err := cfg.reloadManagedCertificate(oldCert)
if err != nil {
if log != nil {
log.Error("loading renewed certificate",
Expand Down Expand Up @@ -264,7 +264,7 @@ func (certCache *Cache) queueRenewalTask(ctx context.Context, oldCert Certificat

// successful renewal, so update in-memory cache by loading
// renewed certificate so it will be used with handshakes
err = cfg.reloadManagedCertificate(oldCert)
_, err = cfg.reloadManagedCertificate(oldCert)
if err != nil {
return ErrNoRetry{fmt.Errorf("%v %v", oldCert.Names, err)}
}
Expand Down Expand Up @@ -372,7 +372,7 @@ func (certCache *Cache) updateOCSPStaples(ctx context.Context) {
}

// If a managed certificate was revoked, we should attempt to replace it with a new one.
if cert.managed && ocspResp.Status == ocsp.Revoked && len(cert.Names) > 0 {
if certShouldBeForceRenewed(cert, ocspResp) {
renewQueue = append(renewQueue, renewQueueEntry{
oldCert: cert,
ocspResp: ocspResp,
Expand All @@ -395,7 +395,12 @@ func (certCache *Cache) updateOCSPStaples(ctx context.Context) {
// Crucially, this happens OUTSIDE a lock on the certCache.
for _, renew := range renewQueue {
cfg := configs[renew.oldCert.Names[0]]
cfg.forceRenewIfCertRevoked(ctx, logger, renew.oldCert, renew.ocspResp)
_, err := cfg.forceRenew(ctx, logger, renew.oldCert, renew.ocspResp)
if err != nil && logger != nil {
logger.Info("forcefully renewing certificate due to REVOKED status",
zap.Strings("identifiers", renew.oldCert.Names),
zap.Error(err))
}
}
}

Expand Down Expand Up @@ -543,18 +548,22 @@ func deleteExpiredCerts(ctx context.Context, storage Storage, gracePeriod time.D
return nil
}

// forceRenewIfCertRevoked forcefully renews cert if ocspResp has a status of Revoked. (It is a no-op otherwise.)
// Due to nuances with error handling, any errors are logged with the provided logger rather than returned.
// This MUST NOT be called within a lock on cfg.certCacheMu.
func (cfg *Config) forceRenewIfCertRevoked(ctx context.Context, logger *zap.Logger, cert Certificate, ocspResp *ocsp.Response) {
if !cert.managed || ocspResp.Status != ocsp.Revoked || len(cert.Names) == 0 {
return
}

// forceRenew forcefully renews cert and replaces it in the cache, and returns the new certificate. It is
// intended for use primarily in the case of cert revocation. The latest OCSP response must be passed in,
// since stapleOCSP() only staples Good responses to a Certificate. This MUST NOT be called within a lock
// on cfg.certCacheMu.
func (cfg *Config) forceRenew(ctx context.Context, logger *zap.Logger, cert Certificate, ocspResp *ocsp.Response) (Certificate, error) {
if logger != nil {
logger.Warn("OCSP status for managed certificate is REVOKED; attempting to replace with new certificate",
zap.Strings("identifiers", cert.Names),
zap.Time("expiration", cert.Leaf.NotAfter))
if ocspResp.Status == ocsp.Revoked {
logger.Warn("OCSP status for managed certificate is REVOKED; attempting to replace with new certificate",
zap.Strings("identifiers", cert.Names),
zap.Time("expiration", cert.Leaf.NotAfter))
} else {
logger.Warn("forcefully renewing certificate",
zap.Strings("identifiers", cert.Names),
zap.Time("expiration", cert.Leaf.NotAfter),
zap.Int("ocsp_status", ocspResp.Status))
}
}

renewName := cert.Names[0]
Expand Down Expand Up @@ -588,26 +597,21 @@ func (cfg *Config) forceRenewIfCertRevoked(ctx context.Context, logger *zap.Logg
err = cfg.RenewCertAsync(ctx, renewName, true)
}
if err != nil {
// probably better to not serve a revoked certificate at all
if logger != nil {
logger.Error("unable to obtain new to certificate after OCSP status of REVOKED; removing from cache",
zap.Strings("identifiers", cert.Names),
zap.Error(err))
}
cfg.certCache.mu.Lock()
cfg.certCache.removeCertificate(cert)
cfg.certCache.mu.Unlock()
return
}
err = cfg.reloadManagedCertificate(cert)
if err != nil {
if logger != nil {
logger.Error("after obtaining new certificate due to OCSP status of REVOKED",
zap.Strings("identifiers", cert.Names),
zap.Error(err))
if ocspResp.Status == ocsp.Revoked {
// probably better to not serve a revoked certificate at all
if logger != nil {
logger.Error("unable to obtain new to certificate after OCSP status of REVOKED; removing from cache",
zap.Strings("identifiers", cert.Names),
zap.Error(err))
}
cfg.certCache.mu.Lock()
cfg.certCache.removeCertificate(cert)
cfg.certCache.mu.Unlock()
}
return
return cert, fmt.Errorf("unable to forcefully get new certificate for %v: %w", cert.Names, err)
}

return cfg.reloadManagedCertificate(cert)
}

// moveCompromisedPrivateKey moves the private key for cert to a ".compromised" file
Expand Down Expand Up @@ -641,6 +645,12 @@ func (cfg *Config) moveCompromisedPrivateKey(cert Certificate, logger *zap.Logge
return nil
}

// certShouldBeForceRenewed returns true if cert should be forcefully renewed
// according to ocspResp (i.e. if it is Revoked).
func certShouldBeForceRenewed(cert Certificate, ocspResp *ocsp.Response) bool {
return cert.managed && len(cert.Names) > 0 && ocspResp.Status == ocsp.Revoked
}

const (
// DefaultRenewCheckInterval is how often to check certificates for expiration.
// Scans are very lightweight, so this can be semi-frequent. This default should
Expand Down