From 9f964d687fef8c1e59cb340074488b6173d4846e Mon Sep 17 00:00:00 2001
From: Matt Moore <mattmoor@vmware.com>
Date: Thu, 15 Oct 2020 16:02:47 -0700
Subject: [PATCH] Improve the failure mode of timeout_test.

I notice that the failure mode for `timeout_test.go` tests on flakes seems to often be hitting the Go `-timeout=X` limit, which means that without `-v` you get no logs for the test.

This change makes the tests use a context with a Timeout, and makes the various `Wait` functions check the `context.Done()` and return `context.Err()` to support the timeout terminating the test earlier than the above and producing logs (other than an ugly panic!).
---
 test/init_test.go             | 13 +++++++------
 test/timeout_test.go          | 32 +++++++++++++++-----------------
 test/v1alpha1/timeout_test.go | 21 +++++++++------------
 test/v1alpha1/wait.go         | 25 +++++++++++++++++++++++++
 test/wait.go                  | 25 +++++++++++++++++++++++++
 5 files changed, 81 insertions(+), 35 deletions(-)

diff --git a/test/init_test.go b/test/init_test.go
index 8bdcfec0553..ee02775e517 100644
--- a/test/init_test.go
+++ b/test/init_test.go
@@ -72,14 +72,15 @@ func setup(ctx context.Context, t *testing.T, fn ...func(context.Context, *testi
 	return c, namespace
 }
 
-func header(logf logging.FormatLogger, text string) {
+func header(t *testing.T, text string) {
+	t.Helper()
 	left := "### "
 	right := " ###"
 	txt := left + text + right
 	bar := strings.Repeat("#", len(txt))
-	logf(bar)
-	logf(txt)
-	logf(bar)
+	t.Logf(bar)
+	t.Logf(txt)
+	t.Logf(bar)
 }
 
 func tearDown(ctx context.Context, t *testing.T, cs *clients, namespace string) {
@@ -88,14 +89,14 @@ func tearDown(ctx context.Context, t *testing.T, cs *clients, namespace string)
 		return
 	}
 	if t.Failed() {
-		header(t.Logf, fmt.Sprintf("Dumping objects from %s", namespace))
+		header(t, fmt.Sprintf("Dumping objects from %s", namespace))
 		bs, err := getCRDYaml(ctx, cs, namespace)
 		if err != nil {
 			t.Error(err)
 		} else {
 			t.Log(string(bs))
 		}
-		header(t.Logf, fmt.Sprintf("Dumping logs from Pods in the %s", namespace))
+		header(t, fmt.Sprintf("Dumping logs from Pods in the %s", namespace))
 		taskruns, err := cs.TaskRunClient.List(ctx, metav1.ListOptions{})
 		if err != nil {
 			t.Errorf("Error getting TaskRun list %s", err)
diff --git a/test/timeout_test.go b/test/timeout_test.go
index e2e6a5845a6..e5ac47a782b 100644
--- a/test/timeout_test.go
+++ b/test/timeout_test.go
@@ -38,14 +38,14 @@ import (
 // verify that pipelinerun timeout works and leads to the the correct TaskRun statuses
 // and pod deletions.
 func TestPipelineRunTimeout(t *testing.T) {
-	ctx := context.Background()
-	ctx, cancel := context.WithCancel(ctx)
+	// cancel the context after we have waited a suitable buffer beyond the given deadline.
+	ctx, cancel := context.WithTimeout(context.Background(), timeout+2*time.Minute)
 	defer cancel()
 	c, namespace := setup(ctx, t)
 	t.Parallel()
 
-	knativetest.CleanupOnInterrupt(func() { tearDown(ctx, t, c, namespace) }, t.Logf)
-	defer tearDown(ctx, t, c, namespace)
+	knativetest.CleanupOnInterrupt(func() { tearDown(context.Background(), t, c, namespace) }, t.Logf)
+	defer tearDown(context.Background(), t, c, namespace)
 
 	t.Logf("Creating Task in namespace %s", namespace)
 	task := &v1beta1.Task{
@@ -171,14 +171,13 @@ func TestPipelineRunTimeout(t *testing.T) {
 
 // TestStepTimeout is an integration test that will verify a Step can be timed out.
 func TestStepTimeout(t *testing.T) {
-	ctx := context.Background()
-	ctx, cancel := context.WithCancel(ctx)
+	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
 	defer cancel()
 	c, namespace := setup(ctx, t)
 	t.Parallel()
 
-	knativetest.CleanupOnInterrupt(func() { tearDown(ctx, t, c, namespace) }, t.Logf)
-	defer tearDown(ctx, t, c, namespace)
+	knativetest.CleanupOnInterrupt(func() { tearDown(context.Background(), t, c, namespace) }, t.Logf)
+	defer tearDown(context.Background(), t, c, namespace)
 
 	t.Logf("Creating Task with Step step-no-timeout, Step step-timeout, and Step step-canceled in namespace %s", namespace)
 
@@ -243,14 +242,14 @@ func TestStepTimeout(t *testing.T) {
 
 // TestTaskRunTimeout is an integration test that will verify a TaskRun can be timed out.
 func TestTaskRunTimeout(t *testing.T) {
-	ctx := context.Background()
-	ctx, cancel := context.WithCancel(ctx)
+	timeout := 30 * time.Second
+	ctx, cancel := context.WithTimeout(context.Background(), timeout+2*time.Minute)
 	defer cancel()
 	c, namespace := setup(ctx, t)
 	t.Parallel()
 
-	knativetest.CleanupOnInterrupt(func() { tearDown(ctx, t, c, namespace) }, t.Logf)
-	defer tearDown(ctx, t, c, namespace)
+	knativetest.CleanupOnInterrupt(func() { tearDown(context.Background(), t, c, namespace) }, t.Logf)
+	defer tearDown(context.Background(), t, c, namespace)
 
 	t.Logf("Creating Task and TaskRun in namespace %s", namespace)
 	task := &v1beta1.Task{
@@ -272,7 +271,7 @@ func TestTaskRunTimeout(t *testing.T) {
 			TaskRef: &v1beta1.TaskRef{Name: "giraffe"},
 			// Do not reduce this timeout. Taskrun e2e test is also verifying
 			// if reconcile is triggered from timeout handler and not by pod informers
-			Timeout: &metav1.Duration{Duration: 30 * time.Second},
+			Timeout: &metav1.Duration{Duration: timeout},
 		},
 	}
 	if _, err := c.TaskRunClient.Create(ctx, taskRun, metav1.CreateOptions{}); err != nil {
@@ -300,14 +299,13 @@ func TestTaskRunTimeout(t *testing.T) {
 }
 
 func TestPipelineTaskTimeout(t *testing.T) {
-	ctx := context.Background()
-	ctx, cancel := context.WithCancel(ctx)
+	ctx, cancel := context.WithTimeout(context.Background(), timeout+2*time.Minute)
 	defer cancel()
 	c, namespace := setup(ctx, t)
 	t.Parallel()
 
-	knativetest.CleanupOnInterrupt(func() { tearDown(ctx, t, c, namespace) }, t.Logf)
-	defer tearDown(ctx, t, c, namespace)
+	knativetest.CleanupOnInterrupt(func() { tearDown(context.Background(), t, c, namespace) }, t.Logf)
+	defer tearDown(context.Background(), t, c, namespace)
 
 	t.Logf("Creating Tasks in namespace %s", namespace)
 	task1 := &v1beta1.Task{
diff --git a/test/v1alpha1/timeout_test.go b/test/v1alpha1/timeout_test.go
index 2cdbbd9c9b0..b8bb6ef290c 100644
--- a/test/v1alpha1/timeout_test.go
+++ b/test/v1alpha1/timeout_test.go
@@ -37,14 +37,13 @@ import (
 // verify that pipelinerun timeout works and leads to the the correct TaskRun statuses
 // and pod deletions.
 func TestPipelineRunTimeout(t *testing.T) {
-	ctx := context.Background()
-	ctx, cancel := context.WithCancel(ctx)
+	ctx, cancel := context.WithTimeout(context.Background(), timeout+2*time.Minute)
 	defer cancel()
 	c, namespace := setup(ctx, t)
 	t.Parallel()
 
-	knativetest.CleanupOnInterrupt(func() { tearDown(ctx, t, c, namespace) }, t.Logf)
-	defer tearDown(ctx, t, c, namespace)
+	knativetest.CleanupOnInterrupt(func() { tearDown(context.Background(), t, c, namespace) }, t.Logf)
+	defer tearDown(context.Background(), t, c, namespace)
 
 	t.Logf("Creating Task in namespace %s", namespace)
 	task := tb.Task("banana", tb.TaskSpec(
@@ -140,14 +139,13 @@ func TestPipelineRunTimeout(t *testing.T) {
 
 // TestTaskRunTimeout is an integration test that will verify a TaskRun can be timed out.
 func TestTaskRunTimeout(t *testing.T) {
-	ctx := context.Background()
-	ctx, cancel := context.WithCancel(ctx)
+	ctx, cancel := context.WithTimeout(context.Background(), timeout+2*time.Minute)
 	defer cancel()
 	c, namespace := setup(ctx, t)
 	t.Parallel()
 
-	knativetest.CleanupOnInterrupt(func() { tearDown(ctx, t, c, namespace) }, t.Logf)
-	defer tearDown(ctx, t, c, namespace)
+	knativetest.CleanupOnInterrupt(func() { tearDown(context.Background(), t, c, namespace) }, t.Logf)
+	defer tearDown(context.Background(), t, c, namespace)
 
 	t.Logf("Creating Task and TaskRun in namespace %s", namespace)
 	if _, err := c.TaskClient.Create(ctx, tb.Task("giraffe",
@@ -168,14 +166,13 @@ func TestTaskRunTimeout(t *testing.T) {
 }
 
 func TestPipelineTaskTimeout(t *testing.T) {
-	ctx := context.Background()
-	ctx, cancel := context.WithCancel(ctx)
+	ctx, cancel := context.WithTimeout(context.Background(), timeout+2*time.Minute)
 	defer cancel()
 	c, namespace := setup(ctx, t)
 	t.Parallel()
 
-	knativetest.CleanupOnInterrupt(func() { tearDown(ctx, t, c, namespace) }, t.Logf)
-	defer tearDown(ctx, t, c, namespace)
+	knativetest.CleanupOnInterrupt(func() { tearDown(context.Background(), t, c, namespace) }, t.Logf)
+	defer tearDown(context.Background(), t, c, namespace)
 
 	t.Logf("Creating Tasks in namespace %s", namespace)
 	task1 := tb.Task("success", tb.TaskSpec(
diff --git a/test/v1alpha1/wait.go b/test/v1alpha1/wait.go
index efa9ab247cd..10ae6e40a5c 100644
--- a/test/v1alpha1/wait.go
+++ b/test/v1alpha1/wait.go
@@ -75,6 +75,11 @@ func WaitForTaskRunState(ctx context.Context, c *clients, name string, inState C
 	defer span.End()
 
 	return wait.PollImmediate(interval, timeout, func() (bool, error) {
+		select {
+		case <-ctx.Done():
+			return true, ctx.Err()
+		default:
+		}
 		r, err := c.TaskRunClient.Get(ctx, name, metav1.GetOptions{})
 		if err != nil {
 			return true, err
@@ -93,6 +98,11 @@ func WaitForDeploymentState(ctx context.Context, c *clients, name string, namesp
 	defer span.End()
 
 	return wait.PollImmediate(interval, timeout, func() (bool, error) {
+		select {
+		case <-ctx.Done():
+			return true, ctx.Err()
+		default:
+		}
 		d, err := c.KubeClient.Kube.AppsV1().Deployments(namespace).Get(ctx, name, metav1.GetOptions{})
 		if err != nil {
 			return true, err
@@ -111,6 +121,11 @@ func WaitForPodState(ctx context.Context, c *clients, name string, namespace str
 	defer span.End()
 
 	return wait.PollImmediate(interval, timeout, func() (bool, error) {
+		select {
+		case <-ctx.Done():
+			return true, ctx.Err()
+		default:
+		}
 		r, err := c.KubeClient.Kube.CoreV1().Pods(namespace).Get(ctx, name, metav1.GetOptions{})
 		if err != nil {
 			return true, err
@@ -129,6 +144,11 @@ func WaitForPipelineRunState(ctx context.Context, c *clients, name string, pollt
 	defer span.End()
 
 	return wait.PollImmediate(interval, polltimeout, func() (bool, error) {
+		select {
+		case <-ctx.Done():
+			return true, ctx.Err()
+		default:
+		}
 		r, err := c.PipelineRunClient.Get(ctx, name, metav1.GetOptions{})
 		if err != nil {
 			return true, err
@@ -147,6 +167,11 @@ func WaitForServiceExternalIPState(ctx context.Context, c *clients, namespace, n
 	defer span.End()
 
 	return wait.PollImmediate(interval, timeout, func() (bool, error) {
+		select {
+		case <-ctx.Done():
+			return true, ctx.Err()
+		default:
+		}
 		r, err := c.KubeClient.Kube.CoreV1().Services(namespace).Get(ctx, name, metav1.GetOptions{})
 		if err != nil {
 			return true, err
diff --git a/test/wait.go b/test/wait.go
index efa9ab247cd..10ae6e40a5c 100644
--- a/test/wait.go
+++ b/test/wait.go
@@ -75,6 +75,11 @@ func WaitForTaskRunState(ctx context.Context, c *clients, name string, inState C
 	defer span.End()
 
 	return wait.PollImmediate(interval, timeout, func() (bool, error) {
+		select {
+		case <-ctx.Done():
+			return true, ctx.Err()
+		default:
+		}
 		r, err := c.TaskRunClient.Get(ctx, name, metav1.GetOptions{})
 		if err != nil {
 			return true, err
@@ -93,6 +98,11 @@ func WaitForDeploymentState(ctx context.Context, c *clients, name string, namesp
 	defer span.End()
 
 	return wait.PollImmediate(interval, timeout, func() (bool, error) {
+		select {
+		case <-ctx.Done():
+			return true, ctx.Err()
+		default:
+		}
 		d, err := c.KubeClient.Kube.AppsV1().Deployments(namespace).Get(ctx, name, metav1.GetOptions{})
 		if err != nil {
 			return true, err
@@ -111,6 +121,11 @@ func WaitForPodState(ctx context.Context, c *clients, name string, namespace str
 	defer span.End()
 
 	return wait.PollImmediate(interval, timeout, func() (bool, error) {
+		select {
+		case <-ctx.Done():
+			return true, ctx.Err()
+		default:
+		}
 		r, err := c.KubeClient.Kube.CoreV1().Pods(namespace).Get(ctx, name, metav1.GetOptions{})
 		if err != nil {
 			return true, err
@@ -129,6 +144,11 @@ func WaitForPipelineRunState(ctx context.Context, c *clients, name string, pollt
 	defer span.End()
 
 	return wait.PollImmediate(interval, polltimeout, func() (bool, error) {
+		select {
+		case <-ctx.Done():
+			return true, ctx.Err()
+		default:
+		}
 		r, err := c.PipelineRunClient.Get(ctx, name, metav1.GetOptions{})
 		if err != nil {
 			return true, err
@@ -147,6 +167,11 @@ func WaitForServiceExternalIPState(ctx context.Context, c *clients, namespace, n
 	defer span.End()
 
 	return wait.PollImmediate(interval, timeout, func() (bool, error) {
+		select {
+		case <-ctx.Done():
+			return true, ctx.Err()
+		default:
+		}
 		r, err := c.KubeClient.Kube.CoreV1().Services(namespace).Get(ctx, name, metav1.GetOptions{})
 		if err != nil {
 			return true, err