Skip to content
This repository has been archived by the owner on Oct 9, 2023. It is now read-only.

Commit

Permalink
Permanent Failures classification (#82)
Browse files Browse the repository at this point in the history
* Permanent Failures classification

 - ImagePullBackoff should be a user error
 - Added a method to delineate system and user perm errors
  • Loading branch information
Ketan Umare authored Apr 28, 2020
1 parent 7275b8b commit 0543c7e
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 5 deletions.
6 changes: 5 additions & 1 deletion flyteplugins/go/tasks/pluginmachinery/core/phase.go
Original file line number Diff line number Diff line change
Expand Up @@ -180,8 +180,12 @@ func PhaseInfoSuccess(info *TaskInfo) PhaseInfo {
return phaseInfo(PhaseSuccess, DefaultPhaseVersion, nil, info)
}

func PhaseInfoSystemFailure(code, reason string, info *TaskInfo) PhaseInfo {
return PhaseInfoFailed(PhasePermanentFailure, &core.ExecutionError{Code: code, Message: reason, Kind: core.ExecutionError_SYSTEM}, info)
}

func PhaseInfoFailure(code, reason string, info *TaskInfo) PhaseInfo {
return PhaseInfoFailed(PhasePermanentFailure, &core.ExecutionError{Code: code, Message: reason}, info)
return PhaseInfoFailed(PhasePermanentFailure, &core.ExecutionError{Code: code, Message: reason, Kind: core.ExecutionError_USER}, info)
}

func PhaseInfoRetryableFailure(code, reason string, info *TaskInfo) PhaseInfo {
Expand Down
6 changes: 4 additions & 2 deletions flyteplugins/go/tasks/pluginmachinery/flytek8s/pod_helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -163,8 +163,10 @@ func DemystifyPending(status v1.PodStatus) (pluginsCore.PhaseInfo, error) {
}), nil

case "ImagePullBackOff":
// TODO once we implement timeouts, this should probably be PhaseInitializing with version 1, so that user can see the reason
fallthrough
t := c.LastTransitionTime.Time
return pluginsCore.PhaseInfoRetryableFailure(finalReason, finalMessage, &pluginsCore.TaskInfo{
OccurredAt: &t,
}), nil
default:
// Since we are not checking for all error states, we may end up perpetually
// in the queued state returned at the bottom of this function, until the Pod is reaped
Expand Down
2 changes: 1 addition & 1 deletion flyteplugins/go/tasks/plugins/array/core/state.go
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ func MapArrayStateToPluginPhase(_ context.Context, state *State, logLinks []*idl
if state.GetExecutionErr() != nil {
phaseInfo = core.PhaseInfoFailed(core.PhasePermanentFailure, state.GetExecutionErr(), nowTaskInfo)
} else {
phaseInfo = core.PhaseInfoFailure(ErrorK8sArrayGeneric, state.GetReason(), nowTaskInfo)
phaseInfo = core.PhaseInfoSystemFailure(ErrorK8sArrayGeneric, state.GetReason(), nowTaskInfo)
}
default:
return phaseInfo, fmt.Errorf("failed to map custom state phase to core phase. State Phase [%v]", p)
Expand Down
1 change: 0 additions & 1 deletion flyteplugins/go/tasks/plugins/k8s/sidecar/sidecar.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,6 @@ func determinePrimaryContainerPhase(primaryContainerName string, statuses []k8sv
}

// If for some reason we can't find the primary container, always just return a permanent failure

return pluginsCore.PhaseInfoFailure("PrimaryContainerMissing",
fmt.Sprintf("Primary container [%s] not found in pod's container statuses", primaryContainerName), info)
}
Expand Down

0 comments on commit 0543c7e

Please sign in to comment.