Skip to content

Commit

Permalink
Only mark PipelineRun as cancelled if actually cancelled 🛑
Browse files Browse the repository at this point in the history
While working on #2369 (flakey tests around cancellation that actually
revealed underlying bugs) we were running into a case where trying to
cancel a PipelineRun's TaskRuns was failing (due to a race condition).
In that case, the PipelineRun would be marked as cancelled and done,
even though the PipelineRun was actually still executing in that one or
more TaskRuns were actually still running.

Now when that happens we will indicate that the PipelineRun is still
running and return an error, which will mean that on the next reconcile,
the Reconciler will try to reconcile again, and the PipelineRun
conditions will reflect what is actually happening.

Co-authored-by: Sharon Jerop Kipruto <[email protected]>

Fixes #2381
  • Loading branch information
bobcatfish authored and tekton-robot committed May 11, 2020
1 parent 90a8066 commit 62f41d8
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 11 deletions.
30 changes: 20 additions & 10 deletions pkg/reconciler/pipelinerun/cancel.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,6 @@ import (

// cancelPipelineRun marks the PipelineRun as cancelled and any resolved TaskRun(s) too.
func cancelPipelineRun(logger *zap.SugaredLogger, pr *v1alpha1.PipelineRun, clientSet clientset.Interface) error {
pr.Status.SetCondition(&apis.Condition{
Type: apis.ConditionSucceeded,
Status: corev1.ConditionFalse,
Reason: "PipelineRunCancelled",
Message: fmt.Sprintf("PipelineRun %q was cancelled", pr.Name),
})
// update pr completed time
pr.Status.CompletionTime = &metav1.Time{Time: time.Now()}
errs := []string{}

// Use Patch to update the TaskRuns since the TaskRun controller may be operating on the
Expand All @@ -62,8 +54,26 @@ func cancelPipelineRun(logger *zap.SugaredLogger, pr *v1alpha1.PipelineRun, clie
continue
}
}
if len(errs) > 0 {
return fmt.Errorf("error(s) from cancelling TaskRun(s) from PipelineRun %s: %s", pr.Name, strings.Join(errs, "\n"))
// If we successfully cancelled all the TaskRuns, we can consider the PipelineRun cancelled.
if len(errs) == 0 {
pr.Status.SetCondition(&apis.Condition{
Type: apis.ConditionSucceeded,
Status: corev1.ConditionFalse,
Reason: ReasonCancelled,
Message: fmt.Sprintf("PipelineRun %q was cancelled", pr.Name),
})
// update pr completed time
pr.Status.CompletionTime = &metav1.Time{Time: time.Now()}
} else {
e := strings.Join(errs, "\n")
// Indicate that we failed to cancel the PipelineRun
pr.Status.SetCondition(&apis.Condition{
Type: apis.ConditionSucceeded,
Status: corev1.ConditionUnknown,
Reason: ReasonCouldntCancel,
Message: fmt.Sprintf("PipelineRun %q was cancelled but had errors trying to cancel TaskRuns: %s", pr.Name, e),
})
return fmt.Errorf("error(s) from cancelling TaskRun(s) from PipelineRun %s: %s", pr.Name, e)
}
return nil
}
Expand Down
6 changes: 6 additions & 0 deletions pkg/reconciler/pipelinerun/pipelinerun.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,12 @@ const (
// ReasonInvalidGraph indicates that the reason for the failure status is that the
// associated Pipeline is an invalid graph (a.k.a wrong order, cycle, …)
ReasonInvalidGraph = "PipelineInvalidGraph"
// ReasonCancelled indicates that a PipelineRun was cancelled.
ReasonCancelled = "PipelineRunCancelled"
// ReasonCouldntCancel indicates that a PipelineRun was cancelled but attempting to update
// all of the running TaskRuns as cancelled failed.
ReasonCouldntCancel = "PipelineRunCouldntCancel"

// pipelineRunAgentName defines logging agent name for PipelineRun Controller
pipelineRunAgentName = "pipeline-controller"

Expand Down
57 changes: 56 additions & 1 deletion pkg/reconciler/pipelinerun/pipelinerun_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ import (
test "github.com/tektoncd/pipeline/test/v1alpha1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
k8stesting "k8s.io/client-go/testing"
ktesting "k8s.io/client-go/testing"
"knative.dev/pkg/apis"
duckv1beta1 "knative.dev/pkg/apis/duck/v1beta1"
Expand Down Expand Up @@ -1020,6 +1022,59 @@ func TestReconcileWithoutPVC(t *testing.T) {
}
}

func TestReconcileCancelledFailsTaskRunCancellation(t *testing.T) {
names.TestingSeed()
ptName := "hello-world-1"
prName := "test-pipeline-run-with-timeout"
prs := []*v1alpha1.PipelineRun{tb.PipelineRun(prName, tb.PipelineRunNamespace("foo"),
tb.PipelineRunSpec("test-pipeline",
tb.PipelineRunCancelled,
),
// The reconciler uses the presense of this TaskRun in the status to determine that a TaskRun
// is already running. The TaskRun will not be retrieved at all so we do not need to seed one.
tb.PipelineRunStatus(
tb.PipelineRunTaskRunsStatus(prName+ptName, &v1alpha1.PipelineRunTaskRunStatus{
PipelineTaskName: ptName,
Status: &v1alpha1.TaskRunStatus{},
}),
),
)}

d := test.Data{
PipelineRuns: prs,
}

testAssets, cancel := getPipelineRunController(t, d)
defer cancel()
c := testAssets.Controller
clients := testAssets.Clients

// Make the patch call fail, i.e. make it so that the controller fails to cancel the TaskRun
clients.Pipeline.PrependReactor("patch", "taskruns", func(action k8stesting.Action) (bool, runtime.Object, error) {
return true, nil, fmt.Errorf("i'm sorry Dave, i'm afraid i can't do that")
})

err := c.Reconciler.Reconcile(context.Background(), "foo/test-pipeline-run-with-timeout")
if err == nil {
t.Errorf("Expected to see error returned from reconcile after failing to cancel TaskRun but saw none!")
}

// Check that the PipelineRun is still running with correct error message
reconciledRun, err := clients.Pipeline.TektonV1alpha1().PipelineRuns("foo").Get("test-pipeline-run-with-timeout", metav1.GetOptions{})
if err != nil {
t.Fatalf("Somehow had error getting reconciled run out of fake client: %s", err)
}

// The PipelineRun should not be cancelled b/c we couldn't cancel the TaskRun
condition := reconciledRun.Status.GetCondition(apis.ConditionSucceeded)
if !condition.IsUnknown() {
t.Errorf("Expected PipelineRun to still be running since the TaskRun could not be cancelled but succeded condition is %v", condition.Status)
}
if condition.Reason != ReasonCouldntCancel {
t.Errorf("Expected PipelineRun condition to indicate the cancellation failed but reason was %s", condition.Reason)
}
}

func TestReconcileCancelledPipelineRun(t *testing.T) {
ps := []*v1alpha1.Pipeline{tb.Pipeline("test-pipeline", tb.PipelineNamespace("foo"), tb.PipelineSpec(
tb.PipelineTask("hello-world-1", "hello-world", tb.Retries(1)),
Expand Down Expand Up @@ -1054,7 +1109,7 @@ func TestReconcileCancelledPipelineRun(t *testing.T) {
}

// The PipelineRun should be still cancelled.
if reconciledRun.Status.GetCondition(apis.ConditionSucceeded).Reason != "PipelineRunCancelled" {
if reconciledRun.Status.GetCondition(apis.ConditionSucceeded).Reason != ReasonCancelled {
t.Errorf("Expected PipelineRun to be cancelled, but condition reason is %s", reconciledRun.Status.GetCondition(apis.ConditionSucceeded))
}

Expand Down

0 comments on commit 62f41d8

Please sign in to comment.