Skip to content

Commit

Permalink
fix(operator): Workflow stuck at running when init container failed. F…
Browse files Browse the repository at this point in the history
…ixes argoproj#10045

Signed-off-by: Yuan Tang <terrytangyuan@gmail.com>
  • Loading branch information
terrytangyuan committed Nov 17, 2022
1 parent 4eb6cb7 commit 80e1224
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 0 deletions.
8 changes: 8 additions & 0 deletions workflow/controller/operator.go
Expand Up @@ -1337,6 +1337,14 @@ func (woc *wfOperationCtx) assessNodeStatus(pod *apiv1.Pod, old *wfv1.NodeStatus
new.Phase = old.Phase
}
}
// If the init container failed, we should mark the node as failed.
for _, c := range pod.Status.InitContainerStatuses {
if c.State.Terminated != nil && int(c.State.Terminated.ExitCode) != 0 {
new.Phase = wfv1.NodeFailed
woc.log.WithField("new.phase", new.Phase).Info("marking node as failed since init container has non-zero exit code")
break
}
}

// if we are transitioning from Pending to a different state, clear out unchanged message
if old.Phase == wfv1.NodePending && new.Phase != wfv1.NodePending && old.Message == new.Message {
Expand Down
26 changes: 26 additions & 0 deletions workflow/controller/operator_test.go
Expand Up @@ -1331,6 +1331,32 @@ func TestAssessNodeStatus(t *testing.T) {
},
node: &wfv1.NodeStatus{TemplateName: templateName},
want: wfv1.NodeFailed,
}, {
name: "pod failed - init container failed",
pod: &apiv1.Pod{
Status: apiv1.PodStatus{
InitContainerStatuses: []apiv1.ContainerStatus{
{
Name: common.InitContainerName,
State: apiv1.ContainerState{Terminated: &apiv1.ContainerStateTerminated{ExitCode: 1}},
},
},
ContainerStatuses: []apiv1.ContainerStatus{
{
Name: common.WaitContainerName,
State: apiv1.ContainerState{Terminated: nil},
},
{
Name: common.MainContainerName,
State: apiv1.ContainerState{Terminated: &apiv1.ContainerStateTerminated{ExitCode: 0}},
},
},
Message: "failed since init container failed",
Phase: apiv1.PodFailed,
},
},
node: &wfv1.NodeStatus{TemplateName: templateName},
want: wfv1.NodeFailed,
}, {
name: "pod running",
pod: &apiv1.Pod{
Expand Down

0 comments on commit 80e1224

Please sign in to comment.