Skip to content

Commit

Permalink
fix: Skip execution control for nodes of exit handler. Fixes #13060
Browse files Browse the repository at this point in the history
Signed-off-by: oninowang <[email protected]>
  • Loading branch information
jswxstw authored and oninowang committed Jun 26, 2024
1 parent 46728cf commit cb2397a
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 11 deletions.
2 changes: 1 addition & 1 deletion workflow/controller/exec_control.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ func (woc *wfOperationCtx) handleExecutionControlError(nodeID string, wfNodesLoc
// if node is a pod created from ContainerSet template
// then need to fail child nodes so they will not hang in Pending after pod deletion
for _, child := range children {
if !child.IsExitNode() && !child.Fulfilled() {
if !child.Fulfilled() {
woc.markNodePhase(child.Name, wfv1.NodeFailed, errorMsg)
}
}
Expand Down
2 changes: 0 additions & 2 deletions workflow/controller/exit_handler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -828,8 +828,6 @@ status:
type: Steps
hello-world-647r7-1045616760:
boundaryID: hello-world-647r7-206029318
children:
- hello-world-647r7-370991976
displayName: '[0]'
finishedAt: null
id: hello-world-647r7-1045616760
Expand Down
20 changes: 12 additions & 8 deletions workflow/controller/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -1272,20 +1272,24 @@ func (woc *wfOperationCtx) shouldPrintPodSpec(node *wfv1.NodeStatus) bool {

// failNodesWithoutCreatedPodsAfterDeadlineOrShutdown mark the nodes without created pods failed when shutting down or exceeding deadline.
func (woc *wfOperationCtx) failNodesWithoutCreatedPodsAfterDeadlineOrShutdown() {
for _, node := range woc.wf.Status.Nodes {
nodes := woc.wf.Status.Nodes
for _, node := range nodes {
if node.Fulfilled() {
continue
}
// fail suspended nodes or taskset nodes when shutting down
if woc.GetShutdownStrategy().Enabled() && (node.IsActiveSuspendNode() || node.IsTaskSetNode()) {
message := fmt.Sprintf("Stopped with strategy '%s'", woc.GetShutdownStrategy())
woc.markNodePhase(node.Name, wfv1.NodeFailed, message)
continue
// Only fail nodes that are not part of exit handler if we are "Stopping" or all pods if we are "Terminating"
if woc.GetShutdownStrategy().Enabled() && !woc.GetShutdownStrategy().ShouldExecute(node.IsPartOfExitHandler(nodes)) {
// fail suspended nodes or taskset nodes when shutting down
if node.IsActiveSuspendNode() || node.IsTaskSetNode() {
message := fmt.Sprintf("Stopped with strategy '%s'", woc.GetShutdownStrategy())
woc.markNodePhase(node.Name, wfv1.NodeFailed, message)
continue
}
}

// fail all pending and suspended nodes when exceeding deadline
// fail pending and suspended nodes that are not part of exit handler when exceeding deadline
deadlineExceeded := woc.workflowDeadline != nil && time.Now().UTC().After(*woc.workflowDeadline)
if deadlineExceeded && (node.Phase == wfv1.NodePending || node.IsActiveSuspendNode()) {
if deadlineExceeded && !node.IsPartOfExitHandler(nodes) && (node.Phase == wfv1.NodePending || node.IsActiveSuspendNode()) {
message := "Step exceeded its deadline"
woc.markNodePhase(node.Name, wfv1.NodeFailed, message)
continue
Expand Down

0 comments on commit cb2397a

Please sign in to comment.