Skip to content

Commit

Permalink
fix(api): don't delete worker on /worker/unregister
Browse files Browse the repository at this point in the history
  • Loading branch information
fsamin committed Jul 4, 2018
1 parent 6420845 commit 7060b1a
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 13 deletions.
4 changes: 2 additions & 2 deletions engine/api/worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ func (api *API) disableWorkerHandler() Handler {

if err := worker.UpdateWorkerStatus(tx, id, sdk.StatusDisabled); err != nil {
if err == worker.ErrNoWorker || err == sql.ErrNoRows {
return sdk.WrapError(sdk.ErrWrongRequest, "disableWorkerHandler> handler %s does not exists", id)
return sdk.WrapError(sdk.ErrWrongRequest, "disableWorkerHandler> worker %s does not exists", id)
}
return sdk.WrapError(err, "disableWorkerHandler> cannot update worker status")
}
Expand Down Expand Up @@ -116,7 +116,7 @@ func (api *API) refreshWorkerHandler() Handler {

func (api *API) unregisterWorkerHandler() Handler {
return func(ctx context.Context, w http.ResponseWriter, r *http.Request) error {
if err := worker.DeleteWorker(api.mustDB(), getWorker(ctx).ID); err != nil {
if err := worker.DisableWorker(api.mustDB(), getWorker(ctx).ID); err != nil {
return sdk.WrapError(err, "unregisterWorkerHandler> cannot delete worker %s", getWorker(ctx).ID)
}
return nil
Expand Down
2 changes: 1 addition & 1 deletion engine/api/worker/heartbeat.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import (
)

// WorkerHeartbeatTimeout defines the number of seconds allowed for workers to refresh their beat
var WorkerHeartbeatTimeout = 600.0
var WorkerHeartbeatTimeout = 300.0

// CheckHeartbeat runs in a goroutine and check last beat from all workers
func CheckHeartbeat(c context.Context, DBFunc func() *gorp.DbMap) {
Expand Down
41 changes: 31 additions & 10 deletions engine/api/worker/worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,32 @@ func DeleteWorker(db *gorp.DbMap, id string) error {
}
defer tx.Rollback()

query := `DELETE FROM worker WHERE id = $1`
if _, err := tx.Exec(query, id); err != nil {
return sdk.WrapError(err, "DeleteWorker")
}

if err := tx.Commit(); err != nil {
return sdk.WrapError(err, "DeleteWorker> unable to commit tx")
}

return nil
}

// DisableWorker disable a worker
func DisableWorker(db *gorp.DbMap, id string) error {
tx, errb := db.Begin()
if errb != nil {
return fmt.Errorf("DisableWorker> Cannot start tx: %s", errb)
}
defer tx.Rollback()

query := `SELECT name, status, action_build_id, job_type FROM worker WHERE id = $1 FOR UPDATE`
var st, name string
var jobID sql.NullInt64
var jobType sql.NullString
if err := tx.QueryRow(query, id).Scan(&name, &st, &jobID, &jobType); err != nil {
log.Debug("DeleteWorker[%d]> Cannot lock worker: %s", id, err)
log.Debug("DisableWorker[%d]> Cannot lock worker: %s", id, err)
return nil
}

Expand All @@ -60,28 +80,29 @@ func DeleteWorker(db *gorp.DbMap, id string) error {
switch jobType.String {
case sdk.JobTypePipeline:
if err := pipeline.RestartPipelineBuildJob(tx, jobID.Int64); err != nil {
log.Error("DeleteWorker[%s]> Cannot restart pipeline build job: %s", name, err)
log.Error("DisableWorker[%s]> Cannot restart pipeline build job: %s", name, err)
} else {
log.Info("DeleteWorker[%s]> PipelineBuildJob %d restarted after crash", name, jobID.Int64)
log.Info("DisableWorker[%s]> PipelineBuildJob %d restarted after crash", name, jobID.Int64)
}
case sdk.JobTypeWorkflowNode:
wNodeJob, errL := workflow.LoadNodeJobRun(tx, nil, jobID.Int64)
if errL == nil && wNodeJob.Retry < 3 {
if err := workflow.RestartWorkflowNodeJob(nil, db, *wNodeJob); err != nil {
log.Warning("DeleteWorker[%s]> Cannot restart workflow node run : %s", name, err)
log.Warning("DisableWorker[%s]> Cannot restart workflow node run : %s", name, err)
} else {
log.Info("DeleteWorker[%s]> WorkflowNodeRun %d restarted after crash", name, jobID.Int64)
log.Info("DisableWorker[%s]> WorkflowNodeRun %d restarted after crash", name, jobID.Int64)
}
}
}

log.Info("DeleteWorker> Worker %s crashed while building %d !", name, jobID.Int64)
log.Info("DisableWorker> Worker %s crashed while building %d !", name, jobID.Int64)
}

// Well then, let's remove this loser
query = `DELETE FROM worker WHERE id = $1`
if _, err := tx.Exec(query, id); err != nil {
return err
if err := SetStatus(tx, id, sdk.StatusDisabled); err != nil {
if err == ErrNoWorker || err == sql.ErrNoRows {
return sdk.WrapError(sdk.ErrWrongRequest, "DisableWorker> worker %s does not exists", id)
}
return sdk.WrapError(err, "DisableWorker> cannot update worker status")
}

if err := tx.Commit(); err != nil {
Expand Down

0 comments on commit 7060b1a

Please sign in to comment.