From 2bb7678afc71ece5ef5d98b25663814941705059 Mon Sep 17 00:00:00 2001 From: Vincent Demeester Date: Thu, 28 Mar 2019 13:24:25 +0100 Subject: [PATCH] taskrun timeouts: fix timeout goroutines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Do not start two go routines 😓, my bad, I messed up a rebase on my part brought an additional timeout goroutine 🙇. - Use a channel (started) to make sure we start the timeout timer in time at the time we issue the `go …` call. When using the `go` keyword to start a goroutines, there is no guarantee the code inside the go routine will start right away. The scheduler might (and most likely will) wait for the main goroutine (or the caller goroutine) to have a waiting/sleeping time, to start working in the issued go routine. This means, that before that fix, we have no guarantee we started the timer at the right time — especially if the controller is very busy. Passing a channel and waiting for it to be closed just after the `go …` call forces the scheduler to sleep and run the goroutine's code. Which, in our case, that we started the timeout timer at the right time. Signed-off-by: Vincent Demeester --- pkg/reconciler/timeout_handler.go | 22 ++++++++++++++----- .../v1alpha1/pipelinerun/pipelinerun.go | 4 +++- pkg/reconciler/v1alpha1/taskrun/taskrun.go | 5 +++-- 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/pkg/reconciler/timeout_handler.go b/pkg/reconciler/timeout_handler.go index 7f2a693e597..212262f75b6 100644 --- a/pkg/reconciler/timeout_handler.go +++ b/pkg/reconciler/timeout_handler.go @@ -131,7 +131,9 @@ func (t *TimeoutSet) checkPipelineRunTimeouts(namespace string) { if pipelineRun.IsDone() || pipelineRun.IsCancelled() { continue } - go t.WaitPipelineRun(&pipelineRun) + started := make(chan struct{}) + go t.WaitPipelineRun(&pipelineRun, started) + <-started } } @@ -162,13 +164,15 @@ func (t *TimeoutSet) checkTaskRunTimeouts(namespace string) { if taskrun.IsDone() || taskrun.IsCancelled() { continue } - go t.WaitTaskRun(&taskrun) + started := make(chan struct{}) + go t.WaitTaskRun(&taskrun, started) + <-started } } // WaitTaskRun function creates a blocking function for taskrun to wait for // 1. Stop signal, 2. TaskRun to complete or 3. Taskrun to time out -func (t *TimeoutSet) WaitTaskRun(tr *v1alpha1.TaskRun) { +func (t *TimeoutSet) WaitTaskRun(tr *v1alpha1.TaskRun, started chan struct{}) { timeout := getTimeout(tr.Spec.Timeout) runtime := time.Duration(0) @@ -180,6 +184,9 @@ func (t *TimeoutSet) WaitTaskRun(tr *v1alpha1.TaskRun) { timeout -= runtime finished := t.getOrCreateFinishedChan(tr) + timeAfter := time.After(timeout) + close(started) + defer t.Release(tr) select { @@ -189,7 +196,7 @@ func (t *TimeoutSet) WaitTaskRun(tr *v1alpha1.TaskRun) { case <-finished: // taskrun finished, we can stop watching return - case <-time.After(timeout): + case <-timeAfter: t.StatusLock(tr) defer t.StatusUnlock(tr) if t.taskRunCallbackFunc != nil { @@ -202,7 +209,7 @@ func (t *TimeoutSet) WaitTaskRun(tr *v1alpha1.TaskRun) { // WaitPipelineRun function creates a blocking function for pipelinerun to wait for // 1. Stop signal, 2. pipelinerun to complete or 3. pipelinerun to time out -func (t *TimeoutSet) WaitPipelineRun(pr *v1alpha1.PipelineRun) { +func (t *TimeoutSet) WaitPipelineRun(pr *v1alpha1.PipelineRun, started chan struct{}) { timeout := getTimeout(pr.Spec.Timeout) runtime := time.Duration(0) @@ -214,6 +221,9 @@ func (t *TimeoutSet) WaitPipelineRun(pr *v1alpha1.PipelineRun) { timeout -= runtime finished := t.getOrCreateFinishedChan(pr) + timeAfter := time.After(timeout) + close(started) + defer t.Release(pr) select { @@ -223,7 +233,7 @@ func (t *TimeoutSet) WaitPipelineRun(pr *v1alpha1.PipelineRun) { case <-finished: // pipelinerun finished, we can stop watching return - case <-time.After(timeout): + case <-timeAfter: t.StatusLock(pr) defer t.StatusUnlock(pr) if t.pipelineRunCallbackFunc != nil { diff --git a/pkg/reconciler/v1alpha1/pipelinerun/pipelinerun.go b/pkg/reconciler/v1alpha1/pipelinerun/pipelinerun.go index 2362ae9cab0..722502ac568 100644 --- a/pkg/reconciler/v1alpha1/pipelinerun/pipelinerun.go +++ b/pkg/reconciler/v1alpha1/pipelinerun/pipelinerun.go @@ -168,7 +168,9 @@ func (c *Reconciler) Reconcile(ctx context.Context, key string) error { c.timeoutHandler.StatusLock(pr) if !pr.HasStarted() { // start goroutine to track pipelinerun timeout only startTime is not set - go c.timeoutHandler.WaitPipelineRun(pr) + started := make(chan struct{}) + go c.timeoutHandler.WaitPipelineRun(pr, started) + <-started } pr.Status.InitializeConditions() diff --git a/pkg/reconciler/v1alpha1/taskrun/taskrun.go b/pkg/reconciler/v1alpha1/taskrun/taskrun.go index 28a56becf9f..077faeeee79 100644 --- a/pkg/reconciler/v1alpha1/taskrun/taskrun.go +++ b/pkg/reconciler/v1alpha1/taskrun/taskrun.go @@ -284,7 +284,6 @@ func (c *Reconciler) reconcile(ctx context.Context, tr *v1alpha1.TaskRun) error } } else { // Pod is not present, create pod. - go c.timeoutHandler.WaitTaskRun(tr) pod, err = c.createPod(tr, rtr.TaskSpec, rtr.TaskName) if err != nil { // This Run has failed, so we need to mark it as failed and stop reconciling it @@ -304,7 +303,9 @@ func (c *Reconciler) reconcile(ctx context.Context, tr *v1alpha1.TaskRun) error c.Logger.Errorf("Failed to create build pod for task %q :%v", err, tr.Name) return nil } - go c.timeoutHandler.WaitTaskRun(tr) + started := make(chan struct{}) + go c.timeoutHandler.WaitTaskRun(tr, started) + <-started } if err := c.tracker.Track(tr.GetBuildPodRef(), tr); err != nil { c.Logger.Errorf("Failed to create tracker for build pod %q for taskrun %q: %v", tr.Name, tr.Name, err)