Skip to content

Commit

Permalink
Consolidate cancel and timeout logic
Browse files Browse the repository at this point in the history
Cancel and timeout do very similar things when they happen: they
update the status of the taskrun, set the completion time and try
and delete the pod.

Today this is done for the two cases in different places, the code
structured differently and the behaviour slightly different:
- log levels of the messages are different
- cancel does not set the completion time
- cancel does not check if the error on pod deletion is a NotFound

This commit introduces "HasTimedOut" to tasktun_types, which
matches what "IsCancelled" does. It introduces a "killTaskRun"
function that can be used by both cancel and timeout, with the
only different being the "Reason" and termination message.
The timeout_check module is not necessary anymore.

The check for IsCancelled and HasTimedOut are move out of
"reconcile" into "Reconcile", so that now "Reconcile" checks:
- HasStarted
- isDone
- IsCancelled
- HasTimedOut
and finally, if applicable, it invokes "reconcile".
  • Loading branch information
afrittoli committed Apr 9, 2020
1 parent fd7ffe3 commit 713d2fd
Show file tree
Hide file tree
Showing 5 changed files with 77 additions and 130 deletions.
22 changes: 22 additions & 0 deletions pkg/apis/pipeline/v1alpha1/taskrun_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@ package v1alpha1

import (
"fmt"
"time"

apisconfig "github.com/tektoncd/pipeline/pkg/apis/config"
"github.com/tektoncd/pipeline/pkg/apis/pipeline"
"github.com/tektoncd/pipeline/pkg/apis/pipeline/v1beta1"
corev1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -209,6 +211,26 @@ func (tr *TaskRun) IsCancelled() bool {
return tr.Spec.Status == TaskRunSpecStatusCancelled
}

// HasTimedOut returns true if the TaskRun runtime is beyond the allowed timeout
func (tr *TaskRun) HasTimedOut() bool {
if tr.Status.StartTime.IsZero() {
return false
}
var timeout time.Duration
// Use the platform default is no timeout is set
if tr.Spec.Timeout == nil {
timeout = apisconfig.DefaultTimeoutMinutes * time.Minute
} else {
timeout = tr.Spec.Timeout.Duration
}
// If timeout is set to 0 or defaulted to 0, there is no timeout.
if timeout == apisconfig.NoTimeoutDuration {
return false
}
runtime := time.Since(tr.Status.StartTime.Time)
return runtime > timeout
}

// GetRunKey return the taskrun key for timeout handler map
func (tr *TaskRun) GetRunKey() string {
// The address of the pointer is a threadsafe unique identifier for the taskrun
Expand Down
36 changes: 30 additions & 6 deletions pkg/reconciler/taskrun/cancel.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,12 @@ package taskrun

import (
"fmt"
"time"

"github.com/tektoncd/pipeline/pkg/apis/pipeline/v1alpha1"
podconvert "github.com/tektoncd/pipeline/pkg/pod"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"
"knative.dev/pkg/apis"
Expand All @@ -29,25 +32,46 @@ import (
type logger interface {
Warn(args ...interface{})
Warnf(template string, args ...interface{})
Infof(template string, args ...interface{})
}

// cancelTaskRun marks the TaskRun as cancelled and delete pods linked to it.
func cancelTaskRun(tr *v1alpha1.TaskRun, clientSet kubernetes.Interface, logger logger) error {
logger.Warn("task run %q has been cancelled", tr.Name)
func killTaskRun(tr *v1alpha1.TaskRun, clientSet kubernetes.Interface,
logger logger, reason, message string) error {

logger.Warn("stopping task run %q because of %q", tr.Name, reason)
tr.Status.SetCondition(&apis.Condition{
Type: apis.ConditionSucceeded,
Status: corev1.ConditionFalse,
Reason: "TaskRunCancelled",
Message: fmt.Sprintf("TaskRun %q was cancelled", tr.Name),
Reason: reason,
Message: message,
})

// update tr completed time
tr.Status.CompletionTime = &metav1.Time{Time: time.Now()}

if tr.Status.PodName == "" {
logger.Warnf("task run %q has no pod running yet", tr.Name)
return nil
}

if err := clientSet.CoreV1().Pods(tr.Namespace).Delete(tr.Status.PodName, &metav1.DeleteOptions{}); err != nil {
// tr.Status.PodName will be empty if the pod was never successfully created. This condition
// can be reached, for example, by the pod never being schedulable due to limits imposed by
// a namespace's ResourceQuota.
err := clientSet.CoreV1().Pods(tr.Namespace).Delete(tr.Status.PodName, &metav1.DeleteOptions{})
if err != nil && !errors.IsNotFound(err) {
logger.Warnf("Failed to terminate pod: %v", err)
return err
}
return nil
}

// cancelTaskRun marks the TaskRun as cancelled and delete pods linked to it.
func cancelTaskRun(tr *v1alpha1.TaskRun, clientSet kubernetes.Interface, logger logger) error {
message := fmt.Sprintf("TaskRun %q was cancelled", tr.Name)
return killTaskRun(tr, clientSet, logger, "TaskRunCancelled", message)
}

func timeoutTaskRun(tr *v1alpha1.TaskRun, clientSet kubernetes.Interface, logger logger) error {
message := fmt.Sprintf("TaskRun %q failed to finish within %q", tr.Name, tr.Spec.Timeout.Duration.String())
return killTaskRun(tr, clientSet, logger, podconvert.ReasonTimedOut, message)
}
46 changes: 25 additions & 21 deletions pkg/reconciler/taskrun/taskrun.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ func (c *Reconciler) Reconcile(ctx context.Context, key string) error {
tr.Status.StartTime = &tr.CreationTimestamp
}

// If the TaskRun is complete, run some post run fixtures when applicable
if tr.IsDone() {
c.Logger.Infof("taskrun done : %s \n", tr.Name)
var merr *multierror.Error
Expand Down Expand Up @@ -157,6 +158,26 @@ func (c *Reconciler) Reconcile(ctx context.Context, key string) error {

return merr.ErrorOrNil()
}

// If the TaskRun is cancelled, kill resources and update status
if tr.IsCancelled() {
before := tr.Status.GetCondition(apis.ConditionSucceeded)
err := cancelTaskRun(tr, c.KubeClientSet, c.Logger)
after := tr.Status.GetCondition(apis.ConditionSucceeded)
reconciler.EmitEvent(c.Recorder, before, after, tr)
return err
}

// Check if the TaskRun has timed out; if it is, this will set its status
// accordingly.
if tr.HasTimedOut() {
before := tr.Status.GetCondition(apis.ConditionSucceeded)
err := timeoutTaskRun(tr, c.KubeClientSet, c.Logger)
after := tr.Status.GetCondition(apis.ConditionSucceeded)
reconciler.EmitEvent(c.Recorder, before, after, tr)
return err
}

// Reconcile this copy of the task run and then write back any status
// updates regardless of whether the reconciliation errored out.
if err := c.reconcile(ctx, tr); err != nil {
Expand Down Expand Up @@ -232,6 +253,10 @@ func (c *Reconciler) reconcile(ctx context.Context, tr *v1alpha1.TaskRun) error
// and may not have had all of the assumed default specified.
tr.SetDefaults(contexts.WithUpgradeViaDefaulting(ctx))

if tr.Spec.Timeout == nil {
tr.Spec.Timeout = &metav1.Duration{Duration: config.DefaultTimeoutMinutes * time.Minute}
}

if err := tr.ConvertTo(ctx, &v1beta1.TaskRun{}); err != nil {
if ce, ok := err.(*v1beta1.CannotConvertError); ok {
tr.Status.MarkResourceNotConvertible(ce)
Expand All @@ -240,15 +265,6 @@ func (c *Reconciler) reconcile(ctx context.Context, tr *v1alpha1.TaskRun) error
return err
}

// If the taskrun is cancelled, kill resources and update status
if tr.IsCancelled() {
before := tr.Status.GetCondition(apis.ConditionSucceeded)
err := cancelTaskRun(tr, c.KubeClientSet, c.Logger)
after := tr.Status.GetCondition(apis.ConditionSucceeded)
reconciler.EmitEvent(c.Recorder, before, after, tr)
return err
}

getTaskFunc, kind := c.getTaskFunc(tr)
taskMeta, taskSpec, err := resources.GetTaskData(ctx, tr, getTaskFunc)
if err != nil {
Expand Down Expand Up @@ -285,18 +301,6 @@ func (c *Reconciler) reconcile(ctx context.Context, tr *v1alpha1.TaskRun) error
tr.ObjectMeta.Annotations[key] = value
}

if tr.Spec.Timeout == nil {
tr.Spec.Timeout = &metav1.Duration{Duration: config.DefaultTimeoutMinutes * time.Minute}
}
// Check if the TaskRun has timed out; if it is, this will set its status
// accordingly.
if CheckTimeout(tr) {
if err := c.updateTaskRunStatusForTimeout(tr, c.KubeClientSet.CoreV1().Pods(tr.Namespace).Delete); err != nil {
return err
}
return nil
}

inputs := []v1beta1.TaskResourceBinding{}
outputs := []v1beta1.TaskResourceBinding{}
if tr.Spec.Resources != nil {
Expand Down
39 changes: 0 additions & 39 deletions pkg/reconciler/taskrun/timeout_check.go

This file was deleted.

64 changes: 0 additions & 64 deletions pkg/reconciler/taskrun/timeout_check_test.go

This file was deleted.

0 comments on commit 713d2fd

Please sign in to comment.