From 9f964d687fef8c1e59cb340074488b6173d4846e Mon Sep 17 00:00:00 2001 From: Matt Moore Date: Thu, 15 Oct 2020 16:02:47 -0700 Subject: [PATCH] Improve the failure mode of timeout_test. I notice that the failure mode for `timeout_test.go` tests on flakes seems to often be hitting the Go `-timeout=X` limit, which means that without `-v` you get no logs for the test. This change makes the tests use a context with a Timeout, and makes the various `Wait` functions check the `context.Done()` and return `context.Err()` to support the timeout terminating the test earlier than the above and producing logs (other than an ugly panic!). --- test/init_test.go | 13 +++++++------ test/timeout_test.go | 32 +++++++++++++++----------------- test/v1alpha1/timeout_test.go | 21 +++++++++------------ test/v1alpha1/wait.go | 25 +++++++++++++++++++++++++ test/wait.go | 25 +++++++++++++++++++++++++ 5 files changed, 81 insertions(+), 35 deletions(-) diff --git a/test/init_test.go b/test/init_test.go index 8bdcfec0553..ee02775e517 100644 --- a/test/init_test.go +++ b/test/init_test.go @@ -72,14 +72,15 @@ func setup(ctx context.Context, t *testing.T, fn ...func(context.Context, *testi return c, namespace } -func header(logf logging.FormatLogger, text string) { +func header(t *testing.T, text string) { + t.Helper() left := "### " right := " ###" txt := left + text + right bar := strings.Repeat("#", len(txt)) - logf(bar) - logf(txt) - logf(bar) + t.Logf(bar) + t.Logf(txt) + t.Logf(bar) } func tearDown(ctx context.Context, t *testing.T, cs *clients, namespace string) { @@ -88,14 +89,14 @@ func tearDown(ctx context.Context, t *testing.T, cs *clients, namespace string) return } if t.Failed() { - header(t.Logf, fmt.Sprintf("Dumping objects from %s", namespace)) + header(t, fmt.Sprintf("Dumping objects from %s", namespace)) bs, err := getCRDYaml(ctx, cs, namespace) if err != nil { t.Error(err) } else { t.Log(string(bs)) } - header(t.Logf, fmt.Sprintf("Dumping logs from Pods in the %s", namespace)) + header(t, fmt.Sprintf("Dumping logs from Pods in the %s", namespace)) taskruns, err := cs.TaskRunClient.List(ctx, metav1.ListOptions{}) if err != nil { t.Errorf("Error getting TaskRun list %s", err) diff --git a/test/timeout_test.go b/test/timeout_test.go index e2e6a5845a6..e5ac47a782b 100644 --- a/test/timeout_test.go +++ b/test/timeout_test.go @@ -38,14 +38,14 @@ import ( // verify that pipelinerun timeout works and leads to the the correct TaskRun statuses // and pod deletions. func TestPipelineRunTimeout(t *testing.T) { - ctx := context.Background() - ctx, cancel := context.WithCancel(ctx) + // cancel the context after we have waited a suitable buffer beyond the given deadline. + ctx, cancel := context.WithTimeout(context.Background(), timeout+2*time.Minute) defer cancel() c, namespace := setup(ctx, t) t.Parallel() - knativetest.CleanupOnInterrupt(func() { tearDown(ctx, t, c, namespace) }, t.Logf) - defer tearDown(ctx, t, c, namespace) + knativetest.CleanupOnInterrupt(func() { tearDown(context.Background(), t, c, namespace) }, t.Logf) + defer tearDown(context.Background(), t, c, namespace) t.Logf("Creating Task in namespace %s", namespace) task := &v1beta1.Task{ @@ -171,14 +171,13 @@ func TestPipelineRunTimeout(t *testing.T) { // TestStepTimeout is an integration test that will verify a Step can be timed out. func TestStepTimeout(t *testing.T) { - ctx := context.Background() - ctx, cancel := context.WithCancel(ctx) + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) defer cancel() c, namespace := setup(ctx, t) t.Parallel() - knativetest.CleanupOnInterrupt(func() { tearDown(ctx, t, c, namespace) }, t.Logf) - defer tearDown(ctx, t, c, namespace) + knativetest.CleanupOnInterrupt(func() { tearDown(context.Background(), t, c, namespace) }, t.Logf) + defer tearDown(context.Background(), t, c, namespace) t.Logf("Creating Task with Step step-no-timeout, Step step-timeout, and Step step-canceled in namespace %s", namespace) @@ -243,14 +242,14 @@ func TestStepTimeout(t *testing.T) { // TestTaskRunTimeout is an integration test that will verify a TaskRun can be timed out. func TestTaskRunTimeout(t *testing.T) { - ctx := context.Background() - ctx, cancel := context.WithCancel(ctx) + timeout := 30 * time.Second + ctx, cancel := context.WithTimeout(context.Background(), timeout+2*time.Minute) defer cancel() c, namespace := setup(ctx, t) t.Parallel() - knativetest.CleanupOnInterrupt(func() { tearDown(ctx, t, c, namespace) }, t.Logf) - defer tearDown(ctx, t, c, namespace) + knativetest.CleanupOnInterrupt(func() { tearDown(context.Background(), t, c, namespace) }, t.Logf) + defer tearDown(context.Background(), t, c, namespace) t.Logf("Creating Task and TaskRun in namespace %s", namespace) task := &v1beta1.Task{ @@ -272,7 +271,7 @@ func TestTaskRunTimeout(t *testing.T) { TaskRef: &v1beta1.TaskRef{Name: "giraffe"}, // Do not reduce this timeout. Taskrun e2e test is also verifying // if reconcile is triggered from timeout handler and not by pod informers - Timeout: &metav1.Duration{Duration: 30 * time.Second}, + Timeout: &metav1.Duration{Duration: timeout}, }, } if _, err := c.TaskRunClient.Create(ctx, taskRun, metav1.CreateOptions{}); err != nil { @@ -300,14 +299,13 @@ func TestTaskRunTimeout(t *testing.T) { } func TestPipelineTaskTimeout(t *testing.T) { - ctx := context.Background() - ctx, cancel := context.WithCancel(ctx) + ctx, cancel := context.WithTimeout(context.Background(), timeout+2*time.Minute) defer cancel() c, namespace := setup(ctx, t) t.Parallel() - knativetest.CleanupOnInterrupt(func() { tearDown(ctx, t, c, namespace) }, t.Logf) - defer tearDown(ctx, t, c, namespace) + knativetest.CleanupOnInterrupt(func() { tearDown(context.Background(), t, c, namespace) }, t.Logf) + defer tearDown(context.Background(), t, c, namespace) t.Logf("Creating Tasks in namespace %s", namespace) task1 := &v1beta1.Task{ diff --git a/test/v1alpha1/timeout_test.go b/test/v1alpha1/timeout_test.go index 2cdbbd9c9b0..b8bb6ef290c 100644 --- a/test/v1alpha1/timeout_test.go +++ b/test/v1alpha1/timeout_test.go @@ -37,14 +37,13 @@ import ( // verify that pipelinerun timeout works and leads to the the correct TaskRun statuses // and pod deletions. func TestPipelineRunTimeout(t *testing.T) { - ctx := context.Background() - ctx, cancel := context.WithCancel(ctx) + ctx, cancel := context.WithTimeout(context.Background(), timeout+2*time.Minute) defer cancel() c, namespace := setup(ctx, t) t.Parallel() - knativetest.CleanupOnInterrupt(func() { tearDown(ctx, t, c, namespace) }, t.Logf) - defer tearDown(ctx, t, c, namespace) + knativetest.CleanupOnInterrupt(func() { tearDown(context.Background(), t, c, namespace) }, t.Logf) + defer tearDown(context.Background(), t, c, namespace) t.Logf("Creating Task in namespace %s", namespace) task := tb.Task("banana", tb.TaskSpec( @@ -140,14 +139,13 @@ func TestPipelineRunTimeout(t *testing.T) { // TestTaskRunTimeout is an integration test that will verify a TaskRun can be timed out. func TestTaskRunTimeout(t *testing.T) { - ctx := context.Background() - ctx, cancel := context.WithCancel(ctx) + ctx, cancel := context.WithTimeout(context.Background(), timeout+2*time.Minute) defer cancel() c, namespace := setup(ctx, t) t.Parallel() - knativetest.CleanupOnInterrupt(func() { tearDown(ctx, t, c, namespace) }, t.Logf) - defer tearDown(ctx, t, c, namespace) + knativetest.CleanupOnInterrupt(func() { tearDown(context.Background(), t, c, namespace) }, t.Logf) + defer tearDown(context.Background(), t, c, namespace) t.Logf("Creating Task and TaskRun in namespace %s", namespace) if _, err := c.TaskClient.Create(ctx, tb.Task("giraffe", @@ -168,14 +166,13 @@ func TestTaskRunTimeout(t *testing.T) { } func TestPipelineTaskTimeout(t *testing.T) { - ctx := context.Background() - ctx, cancel := context.WithCancel(ctx) + ctx, cancel := context.WithTimeout(context.Background(), timeout+2*time.Minute) defer cancel() c, namespace := setup(ctx, t) t.Parallel() - knativetest.CleanupOnInterrupt(func() { tearDown(ctx, t, c, namespace) }, t.Logf) - defer tearDown(ctx, t, c, namespace) + knativetest.CleanupOnInterrupt(func() { tearDown(context.Background(), t, c, namespace) }, t.Logf) + defer tearDown(context.Background(), t, c, namespace) t.Logf("Creating Tasks in namespace %s", namespace) task1 := tb.Task("success", tb.TaskSpec( diff --git a/test/v1alpha1/wait.go b/test/v1alpha1/wait.go index efa9ab247cd..10ae6e40a5c 100644 --- a/test/v1alpha1/wait.go +++ b/test/v1alpha1/wait.go @@ -75,6 +75,11 @@ func WaitForTaskRunState(ctx context.Context, c *clients, name string, inState C defer span.End() return wait.PollImmediate(interval, timeout, func() (bool, error) { + select { + case <-ctx.Done(): + return true, ctx.Err() + default: + } r, err := c.TaskRunClient.Get(ctx, name, metav1.GetOptions{}) if err != nil { return true, err @@ -93,6 +98,11 @@ func WaitForDeploymentState(ctx context.Context, c *clients, name string, namesp defer span.End() return wait.PollImmediate(interval, timeout, func() (bool, error) { + select { + case <-ctx.Done(): + return true, ctx.Err() + default: + } d, err := c.KubeClient.Kube.AppsV1().Deployments(namespace).Get(ctx, name, metav1.GetOptions{}) if err != nil { return true, err @@ -111,6 +121,11 @@ func WaitForPodState(ctx context.Context, c *clients, name string, namespace str defer span.End() return wait.PollImmediate(interval, timeout, func() (bool, error) { + select { + case <-ctx.Done(): + return true, ctx.Err() + default: + } r, err := c.KubeClient.Kube.CoreV1().Pods(namespace).Get(ctx, name, metav1.GetOptions{}) if err != nil { return true, err @@ -129,6 +144,11 @@ func WaitForPipelineRunState(ctx context.Context, c *clients, name string, pollt defer span.End() return wait.PollImmediate(interval, polltimeout, func() (bool, error) { + select { + case <-ctx.Done(): + return true, ctx.Err() + default: + } r, err := c.PipelineRunClient.Get(ctx, name, metav1.GetOptions{}) if err != nil { return true, err @@ -147,6 +167,11 @@ func WaitForServiceExternalIPState(ctx context.Context, c *clients, namespace, n defer span.End() return wait.PollImmediate(interval, timeout, func() (bool, error) { + select { + case <-ctx.Done(): + return true, ctx.Err() + default: + } r, err := c.KubeClient.Kube.CoreV1().Services(namespace).Get(ctx, name, metav1.GetOptions{}) if err != nil { return true, err diff --git a/test/wait.go b/test/wait.go index efa9ab247cd..10ae6e40a5c 100644 --- a/test/wait.go +++ b/test/wait.go @@ -75,6 +75,11 @@ func WaitForTaskRunState(ctx context.Context, c *clients, name string, inState C defer span.End() return wait.PollImmediate(interval, timeout, func() (bool, error) { + select { + case <-ctx.Done(): + return true, ctx.Err() + default: + } r, err := c.TaskRunClient.Get(ctx, name, metav1.GetOptions{}) if err != nil { return true, err @@ -93,6 +98,11 @@ func WaitForDeploymentState(ctx context.Context, c *clients, name string, namesp defer span.End() return wait.PollImmediate(interval, timeout, func() (bool, error) { + select { + case <-ctx.Done(): + return true, ctx.Err() + default: + } d, err := c.KubeClient.Kube.AppsV1().Deployments(namespace).Get(ctx, name, metav1.GetOptions{}) if err != nil { return true, err @@ -111,6 +121,11 @@ func WaitForPodState(ctx context.Context, c *clients, name string, namespace str defer span.End() return wait.PollImmediate(interval, timeout, func() (bool, error) { + select { + case <-ctx.Done(): + return true, ctx.Err() + default: + } r, err := c.KubeClient.Kube.CoreV1().Pods(namespace).Get(ctx, name, metav1.GetOptions{}) if err != nil { return true, err @@ -129,6 +144,11 @@ func WaitForPipelineRunState(ctx context.Context, c *clients, name string, pollt defer span.End() return wait.PollImmediate(interval, polltimeout, func() (bool, error) { + select { + case <-ctx.Done(): + return true, ctx.Err() + default: + } r, err := c.PipelineRunClient.Get(ctx, name, metav1.GetOptions{}) if err != nil { return true, err @@ -147,6 +167,11 @@ func WaitForServiceExternalIPState(ctx context.Context, c *clients, namespace, n defer span.End() return wait.PollImmediate(interval, timeout, func() (bool, error) { + select { + case <-ctx.Done(): + return true, ctx.Err() + default: + } r, err := c.KubeClient.Kube.CoreV1().Services(namespace).Get(ctx, name, metav1.GetOptions{}) if err != nil { return true, err