Skip to content

Commit

Permalink
Merge pull request #115021 from nikhita/automated-cherry-pick-of-#114…
Browse files Browse the repository at this point in the history
…516-upstream-release-1.24

[1.24] pkg/controller/job: re-honor exponential backoff
  • Loading branch information
k8s-ci-robot committed Jan 17, 2023
2 parents c49ab71 + 2fc2265 commit a2de8e8
Show file tree
Hide file tree
Showing 2 changed files with 121 additions and 52 deletions.
24 changes: 19 additions & 5 deletions pkg/controller/job/job_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -295,8 +295,10 @@ func (jm *Controller) updatePod(old, cur interface{}) {
return
}

// the only time we want the backoff to kick-in, is when the pod failed
immediate := curPod.Status.Phase != v1.PodFailed
// the only time we want the backoff to kick-in, is when the pod failed for the first time.
// we don't want to re-calculate backoff for an update event when the tracking finalizer
// for a failed pod is removed.
immediate := !(curPod.Status.Phase == v1.PodFailed && oldPod.Status.Phase != v1.PodFailed)

// Don't check if oldPod has the finalizer, as during ownership transfer
// finalizers might be re-added and removed again in behalf of the new owner.
Expand Down Expand Up @@ -486,7 +488,9 @@ func (jm *Controller) enqueueControllerDelayed(obj interface{}, immediate bool,

backoff := delay
if !immediate {
backoff = getBackoff(jm.queue, key)
if calculatedBackoff := getBackoff(jm.queue, key); calculatedBackoff > 0 {
backoff = calculatedBackoff
}
}

// TODO: Handle overlapping controllers better. Either disallow them at admission time or
Expand Down Expand Up @@ -852,14 +856,22 @@ func (jm *Controller) syncJob(ctx context.Context, key string) (forget bool, rEr
job.Status.Ready = ready
err = jm.trackJobStatusAndRemoveFinalizers(ctx, &job, pods, prevSucceededIndexes, *uncounted, expectedRmFinalizers, finishedCondition, needsStatusUpdate)
if err != nil {
if apierrors.IsConflict(err) {
// we probably have a stale informer cache
// so don't return an error to avoid backoff
jm.enqueueController(&job, false)
return false, nil
}
return false, fmt.Errorf("tracking status: %w", err)
}
jobFinished := IsJobFinished(&job)
if jobHasNewFailure && !jobFinished {
// returning an error will re-enqueue Job after the backoff period
return forget, fmt.Errorf("failed pod(s) detected for job key %q", key)
}
forget = true
if suspendCondChanged {
forget = true
}
return forget, manageJobErr
}
// Legacy path: tracking without finalizers.
Expand Down Expand Up @@ -890,7 +902,9 @@ func (jm *Controller) syncJob(ctx context.Context, key string) (forget bool, rEr
return forget, fmt.Errorf("failed pod(s) detected for job key %q", key)
}

forget = true
if suspendCondChanged {
forget = true
}
}

return forget, manageJobErr
Expand Down

0 comments on commit a2de8e8

Please sign in to comment.