Skip to content
This repository has been archived by the owner on Oct 9, 2023. It is now read-only.

Commit

Permalink
Use system failure for node deletion or infra level issues (#59)
Browse files Browse the repository at this point in the history
* Use system failure for node deletion or infra level issues

* using version

* removing PhaseSystemRetryableFailure

* code review feedback

* fixing a test

* idl version

* reverting tests
  • Loading branch information
surindersinghp authored Feb 28, 2020
1 parent 2c44d9b commit 28f9aeb
Show file tree
Hide file tree
Showing 6 changed files with 12 additions and 6 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ require (
github.com/golang/protobuf v1.3.3
github.com/googleapis/gnostic v0.4.1 // indirect
github.com/hashicorp/golang-lru v0.5.4
github.com/lyft/flyteidl v0.17.1
github.com/lyft/flyteidl v0.17.5
github.com/lyft/flytestdlib v0.3.2
github.com/magiconair/properties v1.8.1
github.com/mitchellh/mapstructure v1.1.2
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -296,8 +296,8 @@ github.com/lyft/api v0.0.0-20191031200350-b49a72c274e0 h1:NGL46+1RYcCXb3sShp0nQq
github.com/lyft/api v0.0.0-20191031200350-b49a72c274e0/go.mod h1:/L5qH+AD540e7Cetbui1tuJeXdmNhO8jM6VkXeDdDhQ=
github.com/lyft/apimachinery v0.0.0-20191031200210-047e3ea32d7f h1:PGuAMDzAen0AulUfaEhNQMYmUpa41pAVo3zHI+GJsCM=
github.com/lyft/apimachinery v0.0.0-20191031200210-047e3ea32d7f/go.mod h1:llRdnznGEAqC3DcNm6yEj472xaFVfLM7hnYofMb12tQ=
github.com/lyft/flyteidl v0.17.1 h1:XXi8sTSzPVXG337S1ZbOTi7PHIBgy1sIehhQu1eZpyI=
github.com/lyft/flyteidl v0.17.1/go.mod h1:/zQXxuHO11u/saxTTZc8oYExIGEShXB+xCB1/F1Cu20=
github.com/lyft/flyteidl v0.17.5 h1:nuUixm2glaJ4orKw3t/G0y1iG3ikYUR6FLxQy6NPmNM=
github.com/lyft/flyteidl v0.17.5/go.mod h1:/zQXxuHO11u/saxTTZc8oYExIGEShXB+xCB1/F1Cu20=
github.com/lyft/flytestdlib v0.3.0 h1:nIkX4MlyYdcLLzaF35RI2P5BhARt+qMgHoFto8eVNzU=
github.com/lyft/flytestdlib v0.3.0/go.mod h1:LJPPJlkFj+wwVWMrQT3K5JZgNhZi2mULsCG4ZYhinhU=
github.com/lyft/flytestdlib v0.3.2 h1:bY6Y+Fg6Jdc7zY4GAYuR7t2hjWwynIdmRvtLcRNaGnw=
Expand Down
7 changes: 6 additions & 1 deletion go/tasks/pluginmachinery/core/phase.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
)

const DefaultPhaseVersion = uint32(0)
const SystemErrorCode = "SystemError"

//go:generate enumer -type=Phase

Expand Down Expand Up @@ -184,5 +185,9 @@ func PhaseInfoFailure(code, reason string, info *TaskInfo) PhaseInfo {
}

func PhaseInfoRetryableFailure(code, reason string, info *TaskInfo) PhaseInfo {
return PhaseInfoFailed(PhaseRetryableFailure, &core.ExecutionError{Code: code, Message: reason}, info)
return PhaseInfoFailed(PhaseRetryableFailure, &core.ExecutionError{Code: code, Message: reason, Kind: core.ExecutionError_USER}, info)
}

func PhaseInfoSystemRetryableFailure(code, reason string, info *TaskInfo) PhaseInfo {
return PhaseInfoFailed(PhaseRetryableFailure, &core.ExecutionError{Code: code, Message: reason, Kind: core.ExecutionError_SYSTEM}, info)
}
2 changes: 1 addition & 1 deletion go/tasks/pluginmachinery/flytek8s/pod_helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ func DemystifyPending(status v1.PodStatus) (pluginsCore.PhaseInfo, error) {
// So be default if the container is not waiting with the PodInitializing/ContainerCreating
// reasons, then we will assume a failure reason, and fail instantly
t := c.LastTransitionTime.Time
return pluginsCore.PhaseInfoRetryableFailure(c.Reason, c.Message, &pluginsCore.TaskInfo{
return pluginsCore.PhaseInfoSystemRetryableFailure(c.Reason, c.Message, &pluginsCore.TaskInfo{
OccurredAt: &t,
}), nil
}
Expand Down
1 change: 1 addition & 0 deletions go/tasks/plugins/array/k8s/monitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ func CheckPodStatus(ctx context.Context, client core.KubeClient, name k8sTypes.N
return core.PhaseInfoFailed(core.PhaseRetryableFailure, &idlCore.ExecutionError{
Code: string(k8serrors.ReasonForError(err)),
Message: err.Error(),
Kind: idlCore.ExecutionError_SYSTEM,
}, &core.TaskInfo{
OccurredAt: &now,
}), nil
Expand Down
2 changes: 1 addition & 1 deletion go/tasks/plugins/hive/execution_state.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ func MapExecutionStateToPhaseInfo(state ExecutionState, quboleClient client.Qubo
case PhaseQueued:
// TODO: Turn into config
if state.CreationFailureCount > 5 {
phaseInfo = core.PhaseInfoRetryableFailure("QuboleFailure", "Too many creation attempts", nil)
phaseInfo = core.PhaseInfoSystemRetryableFailure("QuboleFailure", "Too many creation attempts", nil)
} else {
phaseInfo = core.PhaseInfoQueued(t, uint32(state.CreationFailureCount), "Waiting for Qubole launch")
}
Expand Down

0 comments on commit 28f9aeb

Please sign in to comment.