Skip to content

Commit

Permalink
Fix the issue where cluster upgrade doesn't timeout and stuck indefin…
Browse files Browse the repository at this point in the history
  • Loading branch information
prkalle committed Nov 24, 2021
1 parent a9f377a commit 3e392a1
Showing 1 changed file with 14 additions and 5 deletions.
19 changes: 14 additions & 5 deletions pkg/v1/tkg/clusterclient/clusterclient.go
Original file line number Diff line number Diff line change
Expand Up @@ -486,6 +486,9 @@ func (c *client) WaitForClusterInitialized(clusterName, namespace string) error
var lastReason string
unchangedCounter := 0
interval := 15 * time.Second
// maxTimeout to time-bound wait operation to avoid indefinite wait if the cluster state keeps changing
maxTimeout := 3 * c.operationTimeout
maxTimeoutCounter := 0

getterFunc := func() (interface{}, error) {
currentClusterInfo = c.GetClusterStatusInfo(clusterName, namespace, nil)
Expand Down Expand Up @@ -524,6 +527,7 @@ func (c *client) WaitForClusterInitialized(clusterName, namespace string) error
unchangedCounter++
log.V(7).Infof("cluster state is unchanged %v", unchangedCounter)
}
maxTimeoutCounter++

if lastReason != err.Error() {
log.Info(err.Error())
Expand All @@ -532,8 +536,9 @@ func (c *client) WaitForClusterInitialized(clusterName, namespace string) error

lastClusterInfo = currentClusterInfo

// if unchanged for operationTimeout(30 min default), return error
if interval*time.Duration(unchangedCounter) > c.operationTimeout {
// if unchanged for operationTimeout(30 min default) or exceeds maxTimeout, return error
if (interval*time.Duration(unchangedCounter) > c.operationTimeout) ||
(interval*time.Duration(maxTimeoutCounter) > maxTimeout) {
return true, errors.Wrap(err, "timed out waiting for cluster creation to complete")
}

Expand Down Expand Up @@ -807,6 +812,9 @@ func (c *client) waitK8sVersionUpdateGeneric(clusterName, namespace, newK8sVersi
unchangedCounter := 0
interval := 15 * time.Second
timeout := c.operationTimeout
// maxTimeout to time-bound wait operation to avoid indefinite wait if the cluster state keeps changing
maxTimeout := 3 * c.operationTimeout
maxTimeoutCounter := 0

getterFunc := func() (interface{}, error) {
curClusterInfo = c.GetClusterStatusInfo(clusterName, namespace, workloadClusterClient)
Expand Down Expand Up @@ -838,11 +846,12 @@ func (c *client) waitK8sVersionUpdateGeneric(clusterName, namespace, newK8sVersi
unchangedCounter++
log.V(7).Infof("cluster state is unchanged %v", unchangedCounter)
}

maxTimeoutCounter++
lastClusterInfo = curClusterInfo

// if waiting for more than timeout time, return error
if interval*time.Duration(unchangedCounter) > timeout {
// if unchanged for operationTimeout(30 min default) or exceeds maxTimeout, return error
if (interval*time.Duration(unchangedCounter) > timeout) ||
(interval*time.Duration(maxTimeoutCounter) > maxTimeout) {
return true, errors.New("timed out waiting for upgrade to complete")
}

Expand Down

0 comments on commit 3e392a1

Please sign in to comment.