Skip to content

Commit

Permalink
Improve the failure mode of timeout_test.
Browse files Browse the repository at this point in the history
I notice that the failure mode for `timeout_test.go` tests on flakes seems to often be hitting the Go `-timeout=X` limit, which means that without `-v` you get no logs for the test.

This change makes the tests use a context with a Timeout, and makes the various `Wait` functions check the `context.Done()` and return `context.Err()` to support the timeout terminating the test earlier than the above and producing logs (other than an ugly panic!).
  • Loading branch information
mattmoor committed Oct 20, 2020
1 parent d876923 commit 9f964d6
Show file tree
Hide file tree
Showing 5 changed files with 81 additions and 35 deletions.
13 changes: 7 additions & 6 deletions test/init_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,14 +72,15 @@ func setup(ctx context.Context, t *testing.T, fn ...func(context.Context, *testi
return c, namespace
}

func header(logf logging.FormatLogger, text string) {
func header(t *testing.T, text string) {
t.Helper()
left := "### "
right := " ###"
txt := left + text + right
bar := strings.Repeat("#", len(txt))
logf(bar)
logf(txt)
logf(bar)
t.Logf(bar)
t.Logf(txt)
t.Logf(bar)
}

func tearDown(ctx context.Context, t *testing.T, cs *clients, namespace string) {
Expand All @@ -88,14 +89,14 @@ func tearDown(ctx context.Context, t *testing.T, cs *clients, namespace string)
return
}
if t.Failed() {
header(t.Logf, fmt.Sprintf("Dumping objects from %s", namespace))
header(t, fmt.Sprintf("Dumping objects from %s", namespace))
bs, err := getCRDYaml(ctx, cs, namespace)
if err != nil {
t.Error(err)
} else {
t.Log(string(bs))
}
header(t.Logf, fmt.Sprintf("Dumping logs from Pods in the %s", namespace))
header(t, fmt.Sprintf("Dumping logs from Pods in the %s", namespace))
taskruns, err := cs.TaskRunClient.List(ctx, metav1.ListOptions{})
if err != nil {
t.Errorf("Error getting TaskRun list %s", err)
Expand Down
32 changes: 15 additions & 17 deletions test/timeout_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,14 @@ import (
// verify that pipelinerun timeout works and leads to the the correct TaskRun statuses
// and pod deletions.
func TestPipelineRunTimeout(t *testing.T) {
ctx := context.Background()
ctx, cancel := context.WithCancel(ctx)
// cancel the context after we have waited a suitable buffer beyond the given deadline.
ctx, cancel := context.WithTimeout(context.Background(), timeout+2*time.Minute)
defer cancel()
c, namespace := setup(ctx, t)
t.Parallel()

knativetest.CleanupOnInterrupt(func() { tearDown(ctx, t, c, namespace) }, t.Logf)
defer tearDown(ctx, t, c, namespace)
knativetest.CleanupOnInterrupt(func() { tearDown(context.Background(), t, c, namespace) }, t.Logf)
defer tearDown(context.Background(), t, c, namespace)

t.Logf("Creating Task in namespace %s", namespace)
task := &v1beta1.Task{
Expand Down Expand Up @@ -171,14 +171,13 @@ func TestPipelineRunTimeout(t *testing.T) {

// TestStepTimeout is an integration test that will verify a Step can be timed out.
func TestStepTimeout(t *testing.T) {
ctx := context.Background()
ctx, cancel := context.WithCancel(ctx)
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
defer cancel()
c, namespace := setup(ctx, t)
t.Parallel()

knativetest.CleanupOnInterrupt(func() { tearDown(ctx, t, c, namespace) }, t.Logf)
defer tearDown(ctx, t, c, namespace)
knativetest.CleanupOnInterrupt(func() { tearDown(context.Background(), t, c, namespace) }, t.Logf)
defer tearDown(context.Background(), t, c, namespace)

t.Logf("Creating Task with Step step-no-timeout, Step step-timeout, and Step step-canceled in namespace %s", namespace)

Expand Down Expand Up @@ -243,14 +242,14 @@ func TestStepTimeout(t *testing.T) {

// TestTaskRunTimeout is an integration test that will verify a TaskRun can be timed out.
func TestTaskRunTimeout(t *testing.T) {
ctx := context.Background()
ctx, cancel := context.WithCancel(ctx)
timeout := 30 * time.Second
ctx, cancel := context.WithTimeout(context.Background(), timeout+2*time.Minute)
defer cancel()
c, namespace := setup(ctx, t)
t.Parallel()

knativetest.CleanupOnInterrupt(func() { tearDown(ctx, t, c, namespace) }, t.Logf)
defer tearDown(ctx, t, c, namespace)
knativetest.CleanupOnInterrupt(func() { tearDown(context.Background(), t, c, namespace) }, t.Logf)
defer tearDown(context.Background(), t, c, namespace)

t.Logf("Creating Task and TaskRun in namespace %s", namespace)
task := &v1beta1.Task{
Expand All @@ -272,7 +271,7 @@ func TestTaskRunTimeout(t *testing.T) {
TaskRef: &v1beta1.TaskRef{Name: "giraffe"},
// Do not reduce this timeout. Taskrun e2e test is also verifying
// if reconcile is triggered from timeout handler and not by pod informers
Timeout: &metav1.Duration{Duration: 30 * time.Second},
Timeout: &metav1.Duration{Duration: timeout},
},
}
if _, err := c.TaskRunClient.Create(ctx, taskRun, metav1.CreateOptions{}); err != nil {
Expand Down Expand Up @@ -300,14 +299,13 @@ func TestTaskRunTimeout(t *testing.T) {
}

func TestPipelineTaskTimeout(t *testing.T) {
ctx := context.Background()
ctx, cancel := context.WithCancel(ctx)
ctx, cancel := context.WithTimeout(context.Background(), timeout+2*time.Minute)
defer cancel()
c, namespace := setup(ctx, t)
t.Parallel()

knativetest.CleanupOnInterrupt(func() { tearDown(ctx, t, c, namespace) }, t.Logf)
defer tearDown(ctx, t, c, namespace)
knativetest.CleanupOnInterrupt(func() { tearDown(context.Background(), t, c, namespace) }, t.Logf)
defer tearDown(context.Background(), t, c, namespace)

t.Logf("Creating Tasks in namespace %s", namespace)
task1 := &v1beta1.Task{
Expand Down
21 changes: 9 additions & 12 deletions test/v1alpha1/timeout_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,13 @@ import (
// verify that pipelinerun timeout works and leads to the the correct TaskRun statuses
// and pod deletions.
func TestPipelineRunTimeout(t *testing.T) {
ctx := context.Background()
ctx, cancel := context.WithCancel(ctx)
ctx, cancel := context.WithTimeout(context.Background(), timeout+2*time.Minute)
defer cancel()
c, namespace := setup(ctx, t)
t.Parallel()

knativetest.CleanupOnInterrupt(func() { tearDown(ctx, t, c, namespace) }, t.Logf)
defer tearDown(ctx, t, c, namespace)
knativetest.CleanupOnInterrupt(func() { tearDown(context.Background(), t, c, namespace) }, t.Logf)
defer tearDown(context.Background(), t, c, namespace)

t.Logf("Creating Task in namespace %s", namespace)
task := tb.Task("banana", tb.TaskSpec(
Expand Down Expand Up @@ -140,14 +139,13 @@ func TestPipelineRunTimeout(t *testing.T) {

// TestTaskRunTimeout is an integration test that will verify a TaskRun can be timed out.
func TestTaskRunTimeout(t *testing.T) {
ctx := context.Background()
ctx, cancel := context.WithCancel(ctx)
ctx, cancel := context.WithTimeout(context.Background(), timeout+2*time.Minute)
defer cancel()
c, namespace := setup(ctx, t)
t.Parallel()

knativetest.CleanupOnInterrupt(func() { tearDown(ctx, t, c, namespace) }, t.Logf)
defer tearDown(ctx, t, c, namespace)
knativetest.CleanupOnInterrupt(func() { tearDown(context.Background(), t, c, namespace) }, t.Logf)
defer tearDown(context.Background(), t, c, namespace)

t.Logf("Creating Task and TaskRun in namespace %s", namespace)
if _, err := c.TaskClient.Create(ctx, tb.Task("giraffe",
Expand All @@ -168,14 +166,13 @@ func TestTaskRunTimeout(t *testing.T) {
}

func TestPipelineTaskTimeout(t *testing.T) {
ctx := context.Background()
ctx, cancel := context.WithCancel(ctx)
ctx, cancel := context.WithTimeout(context.Background(), timeout+2*time.Minute)
defer cancel()
c, namespace := setup(ctx, t)
t.Parallel()

knativetest.CleanupOnInterrupt(func() { tearDown(ctx, t, c, namespace) }, t.Logf)
defer tearDown(ctx, t, c, namespace)
knativetest.CleanupOnInterrupt(func() { tearDown(context.Background(), t, c, namespace) }, t.Logf)
defer tearDown(context.Background(), t, c, namespace)

t.Logf("Creating Tasks in namespace %s", namespace)
task1 := tb.Task("success", tb.TaskSpec(
Expand Down
25 changes: 25 additions & 0 deletions test/v1alpha1/wait.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,11 @@ func WaitForTaskRunState(ctx context.Context, c *clients, name string, inState C
defer span.End()

return wait.PollImmediate(interval, timeout, func() (bool, error) {
select {
case <-ctx.Done():
return true, ctx.Err()
default:
}
r, err := c.TaskRunClient.Get(ctx, name, metav1.GetOptions{})
if err != nil {
return true, err
Expand All @@ -93,6 +98,11 @@ func WaitForDeploymentState(ctx context.Context, c *clients, name string, namesp
defer span.End()

return wait.PollImmediate(interval, timeout, func() (bool, error) {
select {
case <-ctx.Done():
return true, ctx.Err()
default:
}
d, err := c.KubeClient.Kube.AppsV1().Deployments(namespace).Get(ctx, name, metav1.GetOptions{})
if err != nil {
return true, err
Expand All @@ -111,6 +121,11 @@ func WaitForPodState(ctx context.Context, c *clients, name string, namespace str
defer span.End()

return wait.PollImmediate(interval, timeout, func() (bool, error) {
select {
case <-ctx.Done():
return true, ctx.Err()
default:
}
r, err := c.KubeClient.Kube.CoreV1().Pods(namespace).Get(ctx, name, metav1.GetOptions{})
if err != nil {
return true, err
Expand All @@ -129,6 +144,11 @@ func WaitForPipelineRunState(ctx context.Context, c *clients, name string, pollt
defer span.End()

return wait.PollImmediate(interval, polltimeout, func() (bool, error) {
select {
case <-ctx.Done():
return true, ctx.Err()
default:
}
r, err := c.PipelineRunClient.Get(ctx, name, metav1.GetOptions{})
if err != nil {
return true, err
Expand All @@ -147,6 +167,11 @@ func WaitForServiceExternalIPState(ctx context.Context, c *clients, namespace, n
defer span.End()

return wait.PollImmediate(interval, timeout, func() (bool, error) {
select {
case <-ctx.Done():
return true, ctx.Err()
default:
}
r, err := c.KubeClient.Kube.CoreV1().Services(namespace).Get(ctx, name, metav1.GetOptions{})
if err != nil {
return true, err
Expand Down
25 changes: 25 additions & 0 deletions test/wait.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,11 @@ func WaitForTaskRunState(ctx context.Context, c *clients, name string, inState C
defer span.End()

return wait.PollImmediate(interval, timeout, func() (bool, error) {
select {
case <-ctx.Done():
return true, ctx.Err()
default:
}
r, err := c.TaskRunClient.Get(ctx, name, metav1.GetOptions{})
if err != nil {
return true, err
Expand All @@ -93,6 +98,11 @@ func WaitForDeploymentState(ctx context.Context, c *clients, name string, namesp
defer span.End()

return wait.PollImmediate(interval, timeout, func() (bool, error) {
select {
case <-ctx.Done():
return true, ctx.Err()
default:
}
d, err := c.KubeClient.Kube.AppsV1().Deployments(namespace).Get(ctx, name, metav1.GetOptions{})
if err != nil {
return true, err
Expand All @@ -111,6 +121,11 @@ func WaitForPodState(ctx context.Context, c *clients, name string, namespace str
defer span.End()

return wait.PollImmediate(interval, timeout, func() (bool, error) {
select {
case <-ctx.Done():
return true, ctx.Err()
default:
}
r, err := c.KubeClient.Kube.CoreV1().Pods(namespace).Get(ctx, name, metav1.GetOptions{})
if err != nil {
return true, err
Expand All @@ -129,6 +144,11 @@ func WaitForPipelineRunState(ctx context.Context, c *clients, name string, pollt
defer span.End()

return wait.PollImmediate(interval, polltimeout, func() (bool, error) {
select {
case <-ctx.Done():
return true, ctx.Err()
default:
}
r, err := c.PipelineRunClient.Get(ctx, name, metav1.GetOptions{})
if err != nil {
return true, err
Expand All @@ -147,6 +167,11 @@ func WaitForServiceExternalIPState(ctx context.Context, c *clients, namespace, n
defer span.End()

return wait.PollImmediate(interval, timeout, func() (bool, error) {
select {
case <-ctx.Done():
return true, ctx.Err()
default:
}
r, err := c.KubeClient.Kube.CoreV1().Services(namespace).Get(ctx, name, metav1.GetOptions{})
if err != nil {
return true, err
Expand Down

0 comments on commit 9f964d6

Please sign in to comment.