diff --git a/certificates.go b/certificates.go index 067bfc50..e0d8fbdc 100644 --- a/certificates.go +++ b/certificates.go @@ -42,6 +42,9 @@ type Certificate struct { Tags []string // OCSP contains the certificate's parsed OCSP response. + // It is not necessarily the response that is stapled + // (e.g. if the status is not Good), it is simply the + // most recent OCSP response we have for this certificate. ocsp *ocsp.Response // The hex-encoded hash of this cert's chain's bytes. @@ -159,7 +162,7 @@ func (cfg *Config) CacheUnmanagedTLSCertificate(tlsCert tls.Certificate, tags [] if err != nil { return err } - _, err = stapleOCSP(cfg.OCSP, cfg.Storage, &cert, nil) + err = stapleOCSP(cfg.OCSP, cfg.Storage, &cert, nil) if err != nil && cfg.Logger != nil { cfg.Logger.Warn("stapling OCSP", zap.Error(err)) } @@ -207,9 +210,9 @@ func (cfg Config) makeCertificateWithOCSP(certPEMBlock, keyPEMBlock []byte) (Cer if err != nil { return cert, err } - _, err = stapleOCSP(cfg.OCSP, cfg.Storage, &cert, certPEMBlock) + err = stapleOCSP(cfg.OCSP, cfg.Storage, &cert, certPEMBlock) if err != nil && cfg.Logger != nil { - cfg.Logger.Warn("stapling OCSP", zap.Error(err)) + cfg.Logger.Warn("stapling OCSP", zap.Error(err), zap.Strings("identifiers", cert.Names)) } return cert, nil } @@ -310,17 +313,17 @@ func (cfg *Config) managedCertInStorageExpiresSoon(cert Certificate) (bool, erro // on oldCert into the cache, from storage. This also replaces the old certificate // with the new one, so that all configurations that used the old cert now point // to the new cert. It assumes that the new certificate for oldCert.Names[0] is -// already in storage. -func (cfg *Config) reloadManagedCertificate(oldCert Certificate) error { +// already in storage. It returns the newly-loaded certificate if successful. +func (cfg *Config) reloadManagedCertificate(oldCert Certificate) (Certificate, error) { if cfg.Logger != nil { cfg.Logger.Info("reloading managed certificate", zap.Strings("identifiers", oldCert.Names)) } newCert, err := cfg.loadManagedCertificate(oldCert.Names[0]) if err != nil { - return fmt.Errorf("loading managed certificate for %v from storage: %v", oldCert.Names, err) + return Certificate{}, fmt.Errorf("loading managed certificate for %v from storage: %v", oldCert.Names, err) } cfg.certCache.replaceCertificate(oldCert, newCert) - return nil + return newCert, nil } // SubjectQualifiesForCert returns true if subj is a name which, diff --git a/config.go b/config.go index ca6de2a5..a85db185 100644 --- a/config.go +++ b/config.go @@ -35,6 +35,7 @@ import ( "github.com/mholt/acmez" "github.com/mholt/acmez/acme" "go.uber.org/zap" + "golang.org/x/crypto/ocsp" "golang.org/x/net/idna" ) @@ -358,33 +359,41 @@ func (cfg *Config) manageOne(ctx context.Context, domainName string, async bool) return obtain() } - // for an existing certificate, make sure it is renewed + // for an existing certificate, make sure it is renewed; or if it is revoked, + // force a renewal even if it's not expiring renew := func() error { - var err error - if async { - err = cfg.RenewCertAsync(ctx, domainName, false) - } else { - err = cfg.RenewCertSync(ctx, domainName, false) - } - if err != nil { - return fmt.Errorf("%s: renewing certificate: %w", domainName, err) + // first, ensure status is not revoked (it was just refreshed in CacheManagedCertificate above) + if !cert.Expired() && cert.ocsp != nil && cert.ocsp.Status == ocsp.Revoked { + _, err = cfg.forceRenew(ctx, cfg.Logger, cert) + return err } - // successful renewal, so update in-memory cache - err = cfg.reloadManagedCertificate(cert) - if err != nil { - return fmt.Errorf("%s: reloading renewed certificate into memory: %v", domainName, err) + + // otherwise, simply renew the certificate if needed + if cert.NeedsRenewal(cfg) { + var err error + if async { + err = cfg.RenewCertAsync(ctx, domainName, false) + } else { + err = cfg.RenewCertSync(ctx, domainName, false) + } + if err != nil { + return fmt.Errorf("%s: renewing certificate: %w", domainName, err) + } + // successful renewal, so update in-memory cache + _, err = cfg.reloadManagedCertificate(cert) + if err != nil { + return fmt.Errorf("%s: reloading renewed certificate into memory: %v", domainName, err) + } } + return nil } - if cert.NeedsRenewal(cfg) { - if async { - jm.Submit(cfg.Logger, "renew_"+domainName, renew) - return nil - } - return renew() - } - return nil + if async { + jm.Submit(cfg.Logger, "renew_"+domainName, renew) + return nil + } + return renew() } // Unmanage causes the certificates for domainNames to stop being managed. diff --git a/handshake.go b/handshake.go index 25515211..637cf9ca 100644 --- a/handshake.go +++ b/handshake.go @@ -25,6 +25,7 @@ import ( "github.com/mholt/acmez" "go.uber.org/zap" + "golang.org/x/crypto/ocsp" ) // GetCertificate gets a certificate to satisfy clientHello. In getting @@ -469,14 +470,9 @@ func (cfg *Config) obtainOnDemandCertificate(ctx context.Context, hello *tls.Cli func (cfg *Config) handshakeMaintenance(ctx context.Context, hello *tls.ClientHelloInfo, cert Certificate) (Certificate, error) { log := loggerNamed(cfg.Logger, "on_demand") - // Check cert expiration - if currentlyInRenewalWindow(cert.Leaf.NotBefore, cert.Leaf.NotAfter, cfg.RenewalWindowRatio) { - return cfg.renewDynamicCertificate(ctx, hello, cert) - } - // Check OCSP staple validity if cert.ocsp != nil && !freshOCSP(cert.ocsp) { - ocspResp, err := stapleOCSP(cfg.OCSP, cfg.Storage, &cert, nil) + err := stapleOCSP(cfg.OCSP, cfg.Storage, &cert, nil) if err != nil { // An error with OCSP stapling is not the end of the world, and in fact, is // quite common considering not all certs have issuer URLs that support it. @@ -494,7 +490,14 @@ func (cfg *Config) handshakeMaintenance(ctx context.Context, hello *tls.ClientHe // We attempt to replace any certificates that were revoked. // Crucially, this happens OUTSIDE a lock on the certCache. - cfg.forceRenewIfCertRevoked(ctx, log, cert, ocspResp) + if certShouldBeForceRenewed(cert) { + return cfg.renewDynamicCertificate(ctx, hello, cert) + } + } + + // Check cert expiration + if currentlyInRenewalWindow(cert.Leaf.NotBefore, cert.Leaf.NotAfter, cfg.RenewalWindowRatio) { + return cfg.renewDynamicCertificate(ctx, hello, cert) } return cert, nil @@ -507,12 +510,16 @@ func (cfg *Config) handshakeMaintenance(ctx context.Context, hello *tls.ClientHe // and the renewal will happen in the background; otherwise this blocks until the // certificate has been renewed, and returns the renewed certificate. // +// If the certificate's OCSP status (currentCert.ocsp) is Revoked, it will be forcefully +// renewed even if it is not expiring. +// // This function is safe for use by multiple concurrent goroutines. func (cfg *Config) renewDynamicCertificate(ctx context.Context, hello *tls.ClientHelloInfo, currentCert Certificate) (Certificate, error) { log := loggerNamed(cfg.Logger, "on_demand") name := cfg.getNameFromClientHello(hello) timeLeft := time.Until(currentCert.Leaf.NotAfter) + revoked := currentCert.ocsp != nil && currentCert.ocsp.Status == ocsp.Revoked getCertWithoutReobtaining := func() (Certificate, error) { // very important to set the obtainIfNecessary argument to false, so we don't repeat this infinitely @@ -526,9 +533,10 @@ func (cfg *Config) renewDynamicCertificate(ctx context.Context, hello *tls.Clien // lucky us -- another goroutine is already renewing the certificate obtainCertWaitChansMu.Unlock() - if timeLeft > 0 { - // the current certificate hasn't expired, and another goroutine is already - // renewing it, so we might as well serve what we have without blocking + // the current certificate hasn't expired, and another goroutine is already + // renewing it, so we might as well serve what we have without blocking, UNLESS + // we're forcing renewal, in which case the current certificate is not usable + if timeLeft > 0 && !revoked { if log != nil { log.Debug("certificate expires soon but is already being renewed; serving current certificate", zap.Strings("subjects", currentCert.Names), @@ -538,12 +546,13 @@ func (cfg *Config) renewDynamicCertificate(ctx context.Context, hello *tls.Clien } // otherwise, we'll have to wait for the renewal to finish so we don't serve - // an expired certificate + // a revoked or expired certificate if log != nil { log.Debug("certificate has expired, but is already being renewed; waiting for renewal to complete", zap.Strings("subjects", currentCert.Names), - zap.Time("expired", currentCert.Leaf.NotAfter)) + zap.Time("expired", currentCert.Leaf.NotAfter), + zap.Bool("revoked", revoked)) } // TODO: see if we can get a proper context in here, for true cancellation @@ -575,7 +584,8 @@ func (cfg *Config) renewDynamicCertificate(ctx context.Context, hello *tls.Clien zap.String("server_name", name), zap.Strings("subjects", currentCert.Names), zap.Time("expiration", currentCert.Leaf.NotAfter), - zap.Duration("remaining", timeLeft)) + zap.Duration("remaining", timeLeft), + zap.Bool("revoked", revoked)) } // Make sure a certificate for this name should be obtained on-demand @@ -593,19 +603,26 @@ func (cfg *Config) renewDynamicCertificate(ctx context.Context, hello *tls.Clien renewAndReload := func(ctx context.Context, cancel context.CancelFunc) (Certificate, error) { defer cancel() - err = cfg.RenewCertAsync(ctx, name, false) - if err == nil { - // even though the recursive nature of the dynamic cert loading - // would just call this function anyway, we do it here to - // make the replacement as atomic as possible. - newCert, err := cfg.CacheManagedCertificate(name) - if err != nil { - if log != nil { - log.Error("loading renewed certificate", zap.String("server_name", name), zap.Error(err)) + var newCert Certificate + var err error + + if revoked { + newCert, err = cfg.forceRenew(ctx, log, currentCert) + } else { + err = cfg.RenewCertAsync(ctx, name, false) + if err == nil { + // even though the recursive nature of the dynamic cert loading + // would just call this function anyway, we do it here to + // make the replacement as atomic as possible. + newCert, err = cfg.CacheManagedCertificate(name) + if err != nil { + if log != nil { + log.Error("loading renewed certificate", zap.String("server_name", name), zap.Error(err)) + } + } else { + // replace the old certificate with the new one + cfg.certCache.replaceCertificate(currentCert, newCert) } - } else { - // replace the old certificate with the new one - cfg.certCache.replaceCertificate(currentCert, newCert) } } @@ -615,7 +632,13 @@ func (cfg *Config) renewDynamicCertificate(ctx context.Context, hello *tls.Clien unblockWaiters() if err != nil { - return Certificate{}, err + if log != nil { + log.Error("renewing and reloading certificate", + zap.String("server_name", name), + zap.Error(err), + zap.Bool("forced", revoked)) + } + return newCert, err } return getCertWithoutReobtaining() diff --git a/maintain.go b/maintain.go index beb59f15..0c379c6c 100644 --- a/maintain.go +++ b/maintain.go @@ -191,7 +191,7 @@ func (certCache *Cache) RenewManagedCertificates(ctx context.Context) error { cfg := configs[oldCert.Names[0]] // crucially, this happens OUTSIDE a lock on the certCache - err := cfg.reloadManagedCertificate(oldCert) + _, err := cfg.reloadManagedCertificate(oldCert) if err != nil { if log != nil { log.Error("loading renewed certificate", @@ -264,7 +264,7 @@ func (certCache *Cache) queueRenewalTask(ctx context.Context, oldCert Certificat // successful renewal, so update in-memory cache by loading // renewed certificate so it will be used with handshakes - err = cfg.reloadManagedCertificate(oldCert) + _, err = cfg.reloadManagedCertificate(oldCert) if err != nil { return ErrNoRetry{fmt.Errorf("%v %v", oldCert.Names, err)} } @@ -295,8 +295,7 @@ func (certCache *Cache) updateOCSPStaples(ctx context.Context) { lastNextUpdate time.Time } type renewQueueEntry struct { - oldCert Certificate - ocspResp *ocsp.Response + oldCert Certificate } updated := make(map[string]ocspUpdate) var updateQueue []updateQueueEntry // certs that need a refreshed staple @@ -313,7 +312,7 @@ func (certCache *Cache) updateOCSPStaples(ctx context.Context) { var lastNextUpdate time.Time if cert.ocsp != nil { lastNextUpdate = cert.ocsp.NextUpdate - if freshOCSP(cert.ocsp) { + if cert.ocsp.Status != ocsp.Unknown && freshOCSP(cert.ocsp) { continue // no need to update staple if ours is still fresh } } @@ -345,8 +344,8 @@ func (certCache *Cache) updateOCSPStaples(ctx context.Context) { continue } - ocspResp, err := stapleOCSP(cfg.OCSP, cfg.Storage, &cert, nil) - if err != nil || ocspResp == nil { + err = stapleOCSP(cfg.OCSP, cfg.Storage, &cert, nil) + if err != nil { if cert.ocsp != nil { // if there was no staple before, that's fine; otherwise we should log the error if logger != nil { @@ -360,8 +359,9 @@ func (certCache *Cache) updateOCSPStaples(ctx context.Context) { // By this point, we've obtained the latest OCSP response. // If there was no staple before, or if the response is updated, make - // sure we apply the update to all names on the certificate. - if cert.ocsp != nil && (lastNextUpdate.IsZero() || lastNextUpdate != cert.ocsp.NextUpdate) { + // sure we apply the update to all names on the certificate if + // the status is still Good. + if cert.ocsp != nil && cert.ocsp.Status == ocsp.Good && (lastNextUpdate.IsZero() || lastNextUpdate != cert.ocsp.NextUpdate) { if logger != nil { logger.Info("advancing OCSP staple", zap.Strings("identifiers", cert.Names), @@ -371,11 +371,11 @@ func (certCache *Cache) updateOCSPStaples(ctx context.Context) { updated[certHash] = ocspUpdate{rawBytes: cert.Certificate.OCSPStaple, parsed: cert.ocsp} } - // If a managed certificate was revoked, we should attempt to replace it with a new one. - if cert.managed && ocspResp.Status == ocsp.Revoked && len(cert.Names) > 0 { + // If a managed certificate was revoked, we should attempt to replace + // it with a new one. + if certShouldBeForceRenewed(cert) { renewQueue = append(renewQueue, renewQueueEntry{ - oldCert: cert, - ocspResp: ocspResp, + oldCert: cert, }) configs[cert.Names[0]] = cfg } @@ -395,7 +395,12 @@ func (certCache *Cache) updateOCSPStaples(ctx context.Context) { // Crucially, this happens OUTSIDE a lock on the certCache. for _, renew := range renewQueue { cfg := configs[renew.oldCert.Names[0]] - cfg.forceRenewIfCertRevoked(ctx, logger, renew.oldCert, renew.ocspResp) + _, err := cfg.forceRenew(ctx, logger, renew.oldCert) + if err != nil && logger != nil { + logger.Info("forcefully renewing certificate due to REVOKED status", + zap.Strings("identifiers", renew.oldCert.Names), + zap.Error(err)) + } } } @@ -543,18 +548,19 @@ func deleteExpiredCerts(ctx context.Context, storage Storage, gracePeriod time.D return nil } -// forceRenewIfCertRevoked forcefully renews cert if ocspResp has a status of Revoked. (It is a no-op otherwise.) -// Due to nuances with error handling, any errors are logged with the provided logger rather than returned. -// This MUST NOT be called within a lock on cfg.certCacheMu. -func (cfg *Config) forceRenewIfCertRevoked(ctx context.Context, logger *zap.Logger, cert Certificate, ocspResp *ocsp.Response) { - if !cert.managed || ocspResp.Status != ocsp.Revoked || len(cert.Names) == 0 { - return - } - +// forceRenew forcefully renews cert and replaces it in the cache, and returns the new certificate. It is intended +// for use primarily in the case of cert revocation. This MUST NOT be called within a lock on cfg.certCacheMu. +func (cfg *Config) forceRenew(ctx context.Context, logger *zap.Logger, cert Certificate) (Certificate, error) { if logger != nil { - logger.Warn("OCSP status for managed certificate is REVOKED; attempting to replace with new certificate", - zap.Strings("identifiers", cert.Names), - zap.Time("expiration", cert.Leaf.NotAfter)) + if cert.ocsp != nil && cert.ocsp.Status == ocsp.Revoked { + logger.Warn("OCSP status for managed certificate is REVOKED; attempting to replace with new certificate", + zap.Strings("identifiers", cert.Names), + zap.Time("expiration", cert.Leaf.NotAfter)) + } else { + logger.Warn("forcefully renewing certificate", + zap.Strings("identifiers", cert.Names), + zap.Time("expiration", cert.Leaf.NotAfter)) + } } renewName := cert.Names[0] @@ -567,7 +573,7 @@ func (cfg *Config) forceRenewIfCertRevoked(ctx context.Context, logger *zap.Logg // of a prior key, we can't do a "renew" to replace the cert if we need a // new key, so we'll have to do an obtain instead var obtainInsteadOfRenew bool - if ocspResp.RevocationReason == acme.ReasonKeyCompromise { + if cert.ocsp != nil && cert.ocsp.RevocationReason == acme.ReasonKeyCompromise { err := cfg.moveCompromisedPrivateKey(cert, logger) if err != nil && logger != nil { logger.Error("could not remove compromised private key from use", @@ -588,26 +594,21 @@ func (cfg *Config) forceRenewIfCertRevoked(ctx context.Context, logger *zap.Logg err = cfg.RenewCertAsync(ctx, renewName, true) } if err != nil { - // probably better to not serve a revoked certificate at all - if logger != nil { - logger.Error("unable to obtain new to certificate after OCSP status of REVOKED; removing from cache", - zap.Strings("identifiers", cert.Names), - zap.Error(err)) - } - cfg.certCache.mu.Lock() - cfg.certCache.removeCertificate(cert) - cfg.certCache.mu.Unlock() - return - } - err = cfg.reloadManagedCertificate(cert) - if err != nil { - if logger != nil { - logger.Error("after obtaining new certificate due to OCSP status of REVOKED", - zap.Strings("identifiers", cert.Names), - zap.Error(err)) + if cert.ocsp != nil && cert.ocsp.Status == ocsp.Revoked { + // probably better to not serve a revoked certificate at all + if logger != nil { + logger.Error("unable to obtain new to certificate after OCSP status of REVOKED; removing from cache", + zap.Strings("identifiers", cert.Names), + zap.Error(err)) + } + cfg.certCache.mu.Lock() + cfg.certCache.removeCertificate(cert) + cfg.certCache.mu.Unlock() } - return + return cert, fmt.Errorf("unable to forcefully get new certificate for %v: %w", cert.Names, err) } + + return cfg.reloadManagedCertificate(cert) } // moveCompromisedPrivateKey moves the private key for cert to a ".compromised" file @@ -641,6 +642,15 @@ func (cfg *Config) moveCompromisedPrivateKey(cert Certificate, logger *zap.Logge return nil } +// certShouldBeForceRenewed returns true if cert should be forcefully renewed +// (like if it is revoked according to its OCSP response). +func certShouldBeForceRenewed(cert Certificate) bool { + return cert.managed && + len(cert.Names) > 0 && + cert.ocsp != nil && + cert.ocsp.Status == ocsp.Revoked +} + const ( // DefaultRenewCheckInterval is how often to check certificates for expiration. // Scans are very lightweight, so this can be semi-frequent. This default should diff --git a/ocsp.go b/ocsp.go index 4a21546d..43a74d1e 100644 --- a/ocsp.go +++ b/ocsp.go @@ -33,15 +33,15 @@ import ( // bundle; otherwise the DER-encoded cert will have to be PEM-encoded. // If you don't have the PEM blocks already, just pass in nil. // -// Errors here are not necessarily fatal, it could just be that the -// certificate doesn't have an issuer URL. This function may return -// both nil values if OCSP stapling is disabled according to ocspConfig. +// If successful, the OCSP response will be set to cert's ocsp field, +// regardless of the OCSP status. It is only stapled, however, if the +// status is Good. // -// If a status was received, it returns that status. Note that the -// returned status is not always stapled to the certificate. -func stapleOCSP(ocspConfig OCSPConfig, storage Storage, cert *Certificate, pemBundle []byte) (*ocsp.Response, error) { +// Errors here are not necessarily fatal, it could just be that the +// certificate doesn't have an issuer URL. +func stapleOCSP(ocspConfig OCSPConfig, storage Storage, cert *Certificate, pemBundle []byte) error { if ocspConfig.DisableStapling { - return nil, nil + return nil } if pemBundle == nil { @@ -93,33 +93,37 @@ func stapleOCSP(ocspConfig OCSPConfig, storage Storage, cert *Certificate, pemBu // not contain a link to an OCSP server. But we should log it anyway. // There's nothing else we can do to get OCSP for this certificate, // so we can return here with the error. - return nil, fmt.Errorf("no OCSP stapling for %v: %v", cert.Names, ocspErr) + return fmt.Errorf("no OCSP stapling for %v: %v", cert.Names, ocspErr) } gotNewOCSP = true } - // By now, we should have a response. If good, staple it to - // the certificate. If the OCSP response was not loaded from - // storage, we persist it for next time. + if ocspResp.NextUpdate.After(cert.Leaf.NotAfter) { + // uh oh, this OCSP response expires AFTER the certificate does, that's kinda bogus. + // it was the reason a lot of Symantec-validated sites (not Caddy) went down + // in October 2017. https://twitter.com/mattiasgeniar/status/919432824708648961 + return fmt.Errorf("invalid: OCSP response for %v valid after certificate expiration (%s)", + cert.Names, cert.Leaf.NotAfter.Sub(ocspResp.NextUpdate)) + } + + // Attach the latest OCSP response to the certificate; this is NOT the same + // as stapling it, which we do below only if the status is Good, but it is + // useful to keep with the cert in order to act on it later (like if Revoked). + cert.ocsp = ocspResp + + // If the response is good, staple it to the certificate. If the OCSP + // response was not loaded from storage, we persist it for next time. if ocspResp.Status == ocsp.Good { - if ocspResp.NextUpdate.After(cert.Leaf.NotAfter) { - // uh oh, this OCSP response expires AFTER the certificate does, that's kinda bogus. - // it was the reason a lot of Symantec-validated sites (not Caddy) went down - // in October 2017. https://twitter.com/mattiasgeniar/status/919432824708648961 - return ocspResp, fmt.Errorf("invalid: OCSP response for %v valid after certificate expiration (%s)", - cert.Names, cert.Leaf.NotAfter.Sub(ocspResp.NextUpdate)) - } cert.Certificate.OCSPStaple = ocspBytes - cert.ocsp = ocspResp if gotNewOCSP { err := storage.Store(ocspStapleKey, ocspBytes) if err != nil { - return ocspResp, fmt.Errorf("unable to write OCSP staple file for %v: %v", cert.Names, err) + return fmt.Errorf("unable to write OCSP staple file for %v: %v", cert.Names, err) } } } - return ocspResp, nil + return nil } // getOCSPForCert takes a PEM encoded cert or cert bundle returning the raw OCSP response,