Skip to content

Commit

Permalink
fix: resync job outside tx (#6537)
Browse files Browse the repository at this point in the history
  • Loading branch information
sguiheux authored Apr 18, 2023
1 parent 22d970a commit c1e0661
Showing 1 changed file with 15 additions and 13 deletions.
28 changes: 15 additions & 13 deletions engine/api/worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -287,28 +287,30 @@ func DisableWorker(ctx context.Context, db *gorp.DbMap, id string, maxLogSize in
return nil
}

if err := worker.SetStatus(ctx, tx, id, sdk.StatusDisabled); err != nil {
cause := sdk.Cause(err)
if cause == worker.ErrNoWorker || cause == sql.ErrNoRows {
return sdk.WrapError(sdk.ErrWrongRequest, "DisableWorker> worker %s does not exists", id)
}
return sdk.WrapError(err, "cannot update worker status")
}

if err := tx.Commit(); err != nil {
return sdk.WithStack(err)
}

if st == sdk.StatusBuilding && jobID.Valid {
// Worker is awol while building !
// We need to restart this action
wNodeJob, errL := workflow.LoadNodeJobRun(ctx, tx, nil, jobID.Int64)
if errL == nil && wNodeJob.Retry < 3 {
wNodeJob, err := workflow.LoadNodeJobRun(ctx, db, nil, jobID.Int64)
if err == nil && wNodeJob.Retry < 3 {
if err := workflow.RestartWorkflowNodeJob(context.TODO(), db, *wNodeJob, maxLogSize); err != nil {
log.Warn(ctx, "DisableWorker[%s]> Cannot restart workflow node run: %v", name, err)
} else {
log.Info(ctx, "DisableWorker[%s]> WorkflowNodeRun %d restarted after crash", name, jobID.Int64)
}
}

log.Info(ctx, "DisableWorker> Worker %s crashed while building %d !", name, jobID.Int64)
}

if err := worker.SetStatus(ctx, tx, id, sdk.StatusDisabled); err != nil {
cause := sdk.Cause(err)
if cause == worker.ErrNoWorker || cause == sql.ErrNoRows {
return sdk.WrapError(sdk.ErrWrongRequest, "DisableWorker> worker %s does not exists", id)
}
return sdk.WrapError(err, "cannot update worker status")
}

return tx.Commit()
return nil
}

0 comments on commit c1e0661

Please sign in to comment.