Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Not revisiting task nodes and correctly incrementing parallelism #4318

Merged
merged 2 commits into from
Nov 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions flytepropeller/pkg/controller/nodes/task/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -570,6 +570,12 @@
}
if pluginTrns.IsPreviouslyObserved() {
logger.Debugf(ctx, "No state change for Task, previously observed same transition. Short circuiting.")
logger.Infof(ctx, "Parallelism now set to [%d].", nCtx.ExecutionContext().IncrementParallelism())

// This is a hack to ensure that we do not re-evaluate the same node again in the same round.
if err := nCtx.NodeStateWriter().PutTaskNodeState(ts); err != nil {
return handler.UnknownTransition, err
}

Check warning on line 578 in flytepropeller/pkg/controller/nodes/task/handler.go

View check run for this annotation

Codecov / codecov/patch

flytepropeller/pkg/controller/nodes/task/handler.go#L577-L578

Added lines #L577 - L578 were not covered by tests
return pluginTrns.FinalTransition(ctx)
}
}
Expand Down
29 changes: 11 additions & 18 deletions flytepropeller/pkg/controller/nodes/task/handler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -530,13 +530,12 @@ func Test_task_Handle_NoCatalog(t *testing.T) {
expectedState fakeplugins.NextPhaseState
}
type want struct {
handlerPhase handler.EPhase
wantErr bool
event bool
eventPhase core.TaskExecution_Phase
skipStateUpdate bool
incrParallel bool
checkpoint bool
handlerPhase handler.EPhase
wantErr bool
event bool
eventPhase core.TaskExecution_Phase
incrParallel bool
checkpoint bool
}
tests := []struct {
name string
Expand Down Expand Up @@ -666,10 +665,9 @@ func Test_task_Handle_NoCatalog(t *testing.T) {
},
},
want{
handlerPhase: handler.EPhaseRunning,
event: false,
skipStateUpdate: true,
incrParallel: true,
handlerPhase: handler.EPhaseRunning,
event: false,
incrParallel: true,
},
},
{
Expand Down Expand Up @@ -738,13 +736,8 @@ func Test_task_Handle_NoCatalog(t *testing.T) {
expectedPhase = pluginCore.PhasePermanentFailure
}
}
if tt.want.skipStateUpdate {
assert.Equal(t, pluginCore.PhaseUndefined, state.s.PluginPhase)
assert.Equal(t, uint32(0), state.s.PluginPhaseVersion)
} else {
assert.Equal(t, expectedPhase.String(), state.s.PluginPhase.String())
assert.Equal(t, tt.args.expectedState.PhaseVersion, state.s.PluginPhaseVersion)
}
assert.Equal(t, expectedPhase.String(), state.s.PluginPhase.String())
assert.Equal(t, tt.args.expectedState.PhaseVersion, state.s.PluginPhaseVersion)
if tt.want.checkpoint {
assert.Equal(t, "s3://sandbox/x/name-n1-1/_flytecheckpoints",
got.Info().GetInfo().TaskNodeInfo.TaskNodeMetadata.CheckpointUri)
Expand Down
Loading