Skip to content

Commit

Permalink
Merge pull request #111399 from Argh4k/i-111290
Browse files Browse the repository at this point in the history
Modify timeout for etcd healthcheck
  • Loading branch information
k8s-ci-robot committed Jul 27, 2022
2 parents ce33655 + b42045a commit 610b783
Show file tree
Hide file tree
Showing 8 changed files with 335 additions and 16 deletions.
1 change: 1 addition & 0 deletions cmd/kube-apiserver/app/options/options_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ func TestAddFlags(t *testing.T) {
CountMetricPollPeriod: time.Minute,
DBMetricPollInterval: storagebackend.DefaultDBMetricPollInterval,
HealthcheckTimeout: storagebackend.DefaultHealthcheckTimeout,
ReadycheckTimeout: storagebackend.DefaultReadinessTimeout,
LeaseManagerConfig: etcd3.LeaseManagerConfig{
ReuseDurationSeconds: 100,
MaxObjectCount: 1000,
Expand Down
6 changes: 6 additions & 0 deletions staging/src/k8s.io/apiserver/pkg/server/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,12 @@ func (c *Config) AddHealthChecks(healthChecks ...healthz.HealthChecker) {
c.ReadyzChecks = append(c.ReadyzChecks, healthChecks...)
}

// AddReadyzChecks adds a health check to our config to be exposed by the readyz endpoint
// of our configured apiserver.
func (c *Config) AddReadyzChecks(healthChecks ...healthz.HealthChecker) {
c.ReadyzChecks = append(c.ReadyzChecks, healthChecks...)
}

// AddPostStartHook allows you to add a PostStartHook that will later be added to the server itself in a New call.
// Name conflicts will cause an error.
func (c *Config) AddPostStartHook(name string, hook PostStartHookFunc) error {
Expand Down
11 changes: 11 additions & 0 deletions staging/src/k8s.io/apiserver/pkg/server/options/etcd.go
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,9 @@ func (s *EtcdOptions) AddFlags(fs *pflag.FlagSet) {
fs.DurationVar(&s.StorageConfig.HealthcheckTimeout, "etcd-healthcheck-timeout", s.StorageConfig.HealthcheckTimeout,
"The timeout to use when checking etcd health.")

fs.DurationVar(&s.StorageConfig.ReadycheckTimeout, "etcd-readycheck-timeout", s.StorageConfig.ReadycheckTimeout,
"The timeout to use when checking etcd readiness")

fs.Int64Var(&s.StorageConfig.LeaseManagerConfig.ReuseDurationSeconds, "lease-reuse-duration-seconds", s.StorageConfig.LeaseManagerConfig.ReuseDurationSeconds,
"The time in seconds that each lease is reused. A lower value could avoid large number of objects reusing the same lease. Notice that a too small value may cause performance problems at storage layer.")
}
Expand Down Expand Up @@ -234,6 +237,14 @@ func (s *EtcdOptions) addEtcdHealthEndpoint(c *server.Config) error {
return healthCheck()
}))

readyCheck, err := storagefactory.CreateReadyCheck(s.StorageConfig, c.DrainedNotify())
if err != nil {
return err
}
c.AddReadyzChecks(healthz.NamedCheck("etcd-readiness", func(r *http.Request) error {
return readyCheck()
}))

if s.EncryptionProviderConfigFilepath != "" {
kmsPluginHealthzChecks, err := encryptionconfig.GetKMSPluginHealthzCheckers(s.EncryptionProviderConfigFilepath)
if err != nil {
Expand Down
56 changes: 48 additions & 8 deletions staging/src/k8s.io/apiserver/pkg/server/options/etcd_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"k8s.io/apimachinery/pkg/runtime/serializer"
utilerrors "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/apiserver/pkg/server"
"k8s.io/apiserver/pkg/server/healthz"
"k8s.io/apiserver/pkg/storage/storagebackend"
)

Expand Down Expand Up @@ -230,18 +231,57 @@ func TestKMSHealthzEndpoint(t *testing.T) {
}

for _, n := range tc.wantChecks {
found := false
for _, h := range serverConfig.HealthzChecks {
if n == h.Name() {
found = true
break
}
if !hasCheck(n, serverConfig.HealthzChecks) {
t.Errorf("Missing HealthzChecker %s", n)
}
}
})
}
}

func TestReadinessCheck(t *testing.T) {
testCases := []struct {
name string
wantReadyzChecks []string
wantHealthzChecks []string
}{
{
name: "Readyz should have etcd-readiness check",
wantReadyzChecks: []string{"etcd", "etcd-readiness"},
wantHealthzChecks: []string{"etcd"},
},
}

scheme := runtime.NewScheme()
codecs := serializer.NewCodecFactory(scheme)

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
serverConfig := server.NewConfig(codecs)
etcdOptions := &EtcdOptions{}
if err := etcdOptions.addEtcdHealthEndpoint(serverConfig); err != nil {
t.Fatalf("Failed to add healthz error: %v", err)
}

for _, n := range tc.wantReadyzChecks {
if !hasCheck(n, serverConfig.ReadyzChecks) {
t.Errorf("Missing ReadyzChecker %s", n)
}
if !found {
}
for _, n := range tc.wantHealthzChecks {
if !hasCheck(n, serverConfig.HealthzChecks) {
t.Errorf("Missing HealthzChecker %s", n)
}
found = false
}
})
}
}

func hasCheck(want string, healthchecks []healthz.HealthChecker) bool {
for _, h := range healthchecks {
if want == h.Name() {
return true
}
}
return false
}
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ const (
DefaultCompactInterval = 5 * time.Minute
DefaultDBMetricPollInterval = 30 * time.Second
DefaultHealthcheckTimeout = 2 * time.Second
DefaultReadinessTimeout = 2 * time.Second
)

// TransportConfig holds all connection related info, i.e. equal TransportConfig means equal servers we talk to.
Expand Down Expand Up @@ -84,6 +85,8 @@ type Config struct {
DBMetricPollInterval time.Duration
// HealthcheckTimeout specifies the timeout used when checking health
HealthcheckTimeout time.Duration
// ReadycheckTimeout specifies the timeout used when checking readiness
ReadycheckTimeout time.Duration

LeaseManagerConfig etcd3.LeaseManagerConfig

Expand Down Expand Up @@ -117,6 +120,7 @@ func NewDefaultConfig(prefix string, codec runtime.Codec) *Config {
CompactionInterval: DefaultCompactInterval,
DBMetricPollInterval: DefaultDBMetricPollInterval,
HealthcheckTimeout: DefaultHealthcheckTimeout,
ReadycheckTimeout: DefaultReadinessTimeout,
LeaseManagerConfig: etcd3.NewDefaultLeaseManagerConfig(),
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,22 @@ func init() {
}

func newETCD3HealthCheck(c storagebackend.Config, stopCh <-chan struct{}) (func() error, error) {
timeout := storagebackend.DefaultHealthcheckTimeout
if c.HealthcheckTimeout != time.Duration(0) {
timeout = c.HealthcheckTimeout
}
return newETCD3Check(c, timeout, stopCh)
}

func newETCD3ReadyCheck(c storagebackend.Config, stopCh <-chan struct{}) (func() error, error) {
timeout := storagebackend.DefaultReadinessTimeout
if c.ReadycheckTimeout != time.Duration(0) {
timeout = c.ReadycheckTimeout
}
return newETCD3Check(c, timeout, stopCh)
}

func newETCD3Check(c storagebackend.Config, timeout time.Duration, stopCh <-chan struct{}) (func() error, error) {
// constructing the etcd v3 client blocks and times out if etcd is not available.
// retry in a loop in the background until we successfully create the client, storing the client or error encountered

Expand Down Expand Up @@ -129,23 +145,18 @@ func newETCD3HealthCheck(c storagebackend.Config, stopCh <-chan struct{}) (func(
if clientErr != nil {
return clientErr
}

healthcheckTimeout := storagebackend.DefaultHealthcheckTimeout
if c.HealthcheckTimeout != time.Duration(0) {
healthcheckTimeout = c.HealthcheckTimeout
}
ctx, cancel := context.WithTimeout(context.Background(), healthcheckTimeout)
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
// See https://github.com/etcd-io/etcd/blob/c57f8b3af865d1b531b979889c602ba14377420e/etcdctl/ctlv3/command/ep_command.go#L118
_, err := client.Get(ctx, path.Join("/", c.Prefix, "health"))
if err == nil {
return nil
}
return fmt.Errorf("error getting data from etcd: %v", err)
return fmt.Errorf("error getting data from etcd: %w", err)
}, nil
}

func newETCD3Client(c storagebackend.TransportConfig) (*clientv3.Client, error) {
var newETCD3Client = func(c storagebackend.TransportConfig) (*clientv3.Client, error) {
tlsInfo := transport.TLSInfo{
CertFile: c.CertFile,
KeyFile: c.KeyFile,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,14 @@ func CreateHealthCheck(c storagebackend.Config, stopCh <-chan struct{}) (func()
return nil, fmt.Errorf("unknown storage type: %s", c.Type)
}
}

func CreateReadyCheck(c storagebackend.Config, stopCh <-chan struct{}) (func() error, error) {
switch c.Type {
case storagebackend.StorageTypeETCD2:
return nil, fmt.Errorf("%s is no longer a supported storage backend", c.Type)
case storagebackend.StorageTypeUnset, storagebackend.StorageTypeETCD3:
return newETCD3ReadyCheck(c, stopCh)
default:
return nil, fmt.Errorf("unknown storage type: %s", c.Type)
}
}

0 comments on commit 610b783

Please sign in to comment.