Skip to content

Commit

Permalink
fix: Mark its taskResult as completed if pod has been terminated not …
Browse files Browse the repository at this point in the history
…gracefully. Fixes #13373

Signed-off-by: oninowang <[email protected]>
  • Loading branch information
jswxstw authored and oninowang committed Aug 23, 2024
1 parent dcd9436 commit 97f93d2
Showing 1 changed file with 13 additions and 3 deletions.
16 changes: 13 additions & 3 deletions workflow/controller/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -1498,9 +1498,19 @@ func (woc *wfOperationCtx) assessNodeStatus(ctx context.Context, pod *apiv1.Pod,
// We cannot fail the node if the wait container is still running because it may be busy saving outputs, and these
// would not get captured successfully.
for _, c := range pod.Status.ContainerStatuses {
if c.Name == common.WaitContainerName && c.State.Running != nil && new.Phase.Completed() {
woc.log.WithField("new.phase", new.Phase).Info("leaving phase un-changed: wait container is not yet terminated ")
new.Phase = old.Phase
if c.Name == common.WaitContainerName {
switch {
case c.State.Running != nil && new.Phase.Completed():
woc.log.WithField("new.phase", new.Phase).Info("leaving phase un-changed: wait container is not yet terminated ")
new.Phase = old.Phase
case c.State.Terminated != nil && c.State.Terminated.ExitCode != 0:
// Mark its taskResult as completed directly since wait container did not exit normally,
// and it will never have a chance to report taskResult correctly.
nodeID := woc.nodeID(pod)
woc.log.WithFields(log.Fields{"nodeID": nodeID, "exitCode": c.State.Terminated.ExitCode, "reason": c.State.Terminated.Reason}).
Debug("marking its taskResult as completed since wait container did not exit normally")
woc.wf.Status.MarkTaskResultComplete(nodeID)
}
}
}

Expand Down

0 comments on commit 97f93d2

Please sign in to comment.