Skip to content

Commit

Permalink
Only mark PipelineRun as cancelled if actually cancelled 🛑
Browse files Browse the repository at this point in the history
While working on tektoncd#2369 (flakey tests around cancellation that actually
revealed underlying bugs) we were running into a case where trying to
cancel a PipelineRun's TaskRuns was failing (due to a race condition).
In that case, the PipelineRun would be marked as cancelled and done,
even though the PipelineRun was actually still executing in that one or
more TaskRuns were actually still running.

Now when that happens we will indicate that the PipelineRun is still
running and return an error, which will mean that on the next reconcile,
the Reconciler will try to reconcile again, and the PipelineRun
conditions will reflect what is actually happening.

Co-authored-by: Sharon Jerop Kipruto <[email protected]>

Fixes tektoncd#2381
  • Loading branch information
bobcatfish committed Apr 17, 2020
1 parent 1a37fdb commit 3e382f0
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 12 deletions.
30 changes: 20 additions & 10 deletions pkg/reconciler/pipelinerun/cancel.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,6 @@ import (

// cancelPipelineRun marks the PipelineRun as cancelled and any resolved TaskRun(s) too.
func cancelPipelineRun(logger *zap.SugaredLogger, pr *v1alpha1.PipelineRun, pipelineState []*resources.ResolvedPipelineRunTask, clientSet clientset.Interface) error {
pr.Status.SetCondition(&apis.Condition{
Type: apis.ConditionSucceeded,
Status: corev1.ConditionFalse,
Reason: "PipelineRunCancelled",
Message: fmt.Sprintf("PipelineRun %q was cancelled", pr.Name),
})
// update pr completed time
pr.Status.CompletionTime = &metav1.Time{Time: time.Now()}
errs := []string{}
for _, rprt := range pipelineState {
if rprt.TaskRun == nil {
Expand All @@ -65,8 +57,26 @@ func cancelPipelineRun(logger *zap.SugaredLogger, pr *v1alpha1.PipelineRun, pipe
continue
}
}
if len(errs) > 0 {
return fmt.Errorf("error(s) from cancelling TaskRun(s) from PipelineRun %s: %s", pr.Name, strings.Join(errs, "\n"))
// If we successfully cancelled all the TaskRuns, we can consider the PipelineRun cancelled.
if len(errs) == 0 {
pr.Status.SetCondition(&apis.Condition{
Type: apis.ConditionSucceeded,
Status: corev1.ConditionFalse,
Reason: ReasonCancelled,
Message: fmt.Sprintf("PipelineRun %q was cancelled", pr.Name),
})
// update pr completed time
pr.Status.CompletionTime = &metav1.Time{Time: time.Now()}
} else {
e := strings.Join(errs, "\n")
// Indicate that we failed to cancel the PipelineRun
pr.Status.SetCondition(&apis.Condition{
Type: apis.ConditionSucceeded,
Status: corev1.ConditionUnknown,
Reason: ReasonCouldntCancel,
Message: fmt.Sprintf("PipelineRun %q was cancelled but had errors trying to cancel TaskRuns: %s", pr.Name, e),
})
return fmt.Errorf("error(s) from cancelling TaskRun(s) from PipelineRun %s: %s", pr.Name, e)
}
return nil
}
Expand Down
8 changes: 7 additions & 1 deletion pkg/reconciler/pipelinerun/pipelinerun.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,12 @@ const (
// ReasonInvalidGraph indicates that the reason for the failure status is that the
// associated Pipeline is an invalid graph (a.k.a wrong order, cycle, …)
ReasonInvalidGraph = "PipelineInvalidGraph"
// ReasonCancelled indicates that a PipelineRun was cancelled.
ReasonCancelled = "PipelineRunCancelled"
// ReasonCouldntCancel indicates that a PipelineRun was cancelled but attempting to update
// all of the running TaskRuns as cancelled failed.
ReasonCouldntCancel = "PipelineRunCouldntCancel"

// pipelineRunAgentName defines logging agent name for PipelineRun Controller
pipelineRunAgentName = "pipeline-controller"

Expand Down Expand Up @@ -522,7 +528,7 @@ func (c *Reconciler) reconcile(ctx context.Context, pr *v1alpha1.PipelineRun) er
}
}

// If the pipelinerun is cancelled, cancel tasks and update status
// If the running pipelinerun is cancelled, cancel tasks and update status
if pr.IsCancelled() {
before := pr.Status.GetCondition(apis.ConditionSucceeded)
err := cancelPipelineRun(c.Logger, pr, pipelineState, c.PipelineClientSet)
Expand Down
58 changes: 57 additions & 1 deletion pkg/reconciler/pipelinerun/pipelinerun_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ import (
test "github.com/tektoncd/pipeline/test/v1alpha1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
k8stesting "k8s.io/client-go/testing"
ktesting "k8s.io/client-go/testing"
"knative.dev/pkg/apis"
duckv1beta1 "knative.dev/pkg/apis/duck/v1beta1"
Expand Down Expand Up @@ -1000,6 +1002,60 @@ func TestReconcileWithoutPVC(t *testing.T) {
}
}

func TestReconcileCancelledFailsTaskRunCancellation(t *testing.T) {
names.TestingSeed()
ps := []*v1alpha1.Pipeline{tb.Pipeline("test-pipeline", "foo", tb.PipelineSpec(
tb.PipelineTask("hello-world-1", "hello-world", tb.Retries(1)),
))}
prs := []*v1alpha1.PipelineRun{tb.PipelineRun("test-pipeline-run-with-timeout", "foo",
tb.PipelineRunSpec("test-pipeline",
tb.PipelineRunCancelled,
),
)}
ts := []*v1alpha1.Task{tb.Task("hello-world", "foo")}
tr := []*v1alpha1.TaskRun{tb.TaskRun("test-pipeline-run-with-timeout-hello-world-1-9l9zj", "foo",
tb.TaskRunStatus(tb.StatusCondition(apis.Condition{Type: apis.ConditionSucceeded, Status: corev1.ConditionUnknown}))),
}

d := test.Data{
PipelineRuns: prs,
Pipelines: ps,
Tasks: ts,
TaskRuns: tr,
}

testAssets, cancel := getPipelineRunController(t, d)
defer cancel()
c := testAssets.Controller
clients := testAssets.Clients

// Make the patch call fail, i.e. make it so that the controller fails to cancel the TaskRun
clients.Pipeline.PrependReactor("patch", "taskruns", func(action k8stesting.Action) (bool, runtime.Object, error) {
fmt.Println(action.GetVerb(), action.GetResource())
return true, nil, fmt.Errorf("i'm sorry Dave, i'm afraid i can't do that")
})

err := c.Reconciler.Reconcile(context.Background(), "foo/test-pipeline-run-with-timeout")
if err == nil {
t.Errorf("Expected to see error returned from reconcile after failing to cancel TaskRun but saw none!")
}

// Check that the PipelineRun is still running with correct error message
reconciledRun, err := clients.Pipeline.TektonV1alpha1().PipelineRuns("foo").Get("test-pipeline-run-with-timeout", metav1.GetOptions{})
if err != nil {
t.Fatalf("Somehow had error getting reconciled run out of fake client: %s", err)
}

// The PipelineRun should not be cancelled b/c we couldn't cancel the TaskRun
condition := reconciledRun.Status.GetCondition(apis.ConditionSucceeded)
if !condition.IsUnknown() {
t.Errorf("Expected PipelineRun to still be running since the TaskRun could not be cancelled but succeded condition is %v", condition.Status)
}
if condition.Reason != ReasonCouldntCancel {
t.Errorf("Expected PipelineRun condition to indicate the cancellation failed but reason was %s", condition.Reason)
}
}

func TestReconcileCancelledPipelineRun(t *testing.T) {
ps := []*v1alpha1.Pipeline{tb.Pipeline("test-pipeline", "foo", tb.PipelineSpec(
tb.PipelineTask("hello-world-1", "hello-world", tb.Retries(1)),
Expand Down Expand Up @@ -1034,7 +1090,7 @@ func TestReconcileCancelledPipelineRun(t *testing.T) {
}

// The PipelineRun should be still cancelled.
if reconciledRun.Status.GetCondition(apis.ConditionSucceeded).Reason != "PipelineRunCancelled" {
if reconciledRun.Status.GetCondition(apis.ConditionSucceeded).Reason != ReasonCancelled {
t.Errorf("Expected PipelineRun to be cancelled, but condition reason is %s", reconciledRun.Status.GetCondition(apis.ConditionSucceeded))
}

Expand Down

0 comments on commit 3e382f0

Please sign in to comment.