From 9972bffe121de88dcfa41e5de5b2b6759aca50c6 Mon Sep 17 00:00:00 2001 From: Andrea Frittoli Date: Sun, 5 Apr 2020 11:21:25 +0100 Subject: [PATCH] Emit events for all TaskRun lifecycle events Start emitting events for additional TaskRun lifecyle events: - taskrun started - taskrun timeout Introduce pre-run and post-run functions that are invoked asynchronously when the taskrun starts and completes, to emit events. These same functions shall be used to trigger any other async behaviour on start/stop of taskruns. Add documentation on events. Fixes #2328 Work towards #2082 --- docs/events.md | 39 ++++++++++++++ docs/pipelineruns.md | 1 + docs/taskruns.md | 6 +-- pkg/reconciler/event.go | 4 ++ pkg/reconciler/event_test.go | 8 +++ .../cloudevent/cloud_event_controller.go | 3 +- pkg/reconciler/taskrun/taskrun.go | 51 ++++++++++++++++--- 7 files changed, 99 insertions(+), 13 deletions(-) create mode 100644 docs/events.md diff --git a/docs/events.md b/docs/events.md new file mode 100644 index 00000000000..d3e5f51a181 --- /dev/null +++ b/docs/events.md @@ -0,0 +1,39 @@ + +# Events + +Tekton runtime resources, specifically `TaskRuns` and `PipelineRuns`, +emit events when they are executed, so that users can monitor their lifecycle +and react to it. Tekton emits [kubernetes events](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.18/#event-v1-core), that can be retrieve from the resource via +`kubectl describe [resource]`. + +No events are emitted for `Conditions` today. + +## TaskRuns + +`TaskRun` events are generated for the following `Reasons`: +- `Started`: this is triggered the first time the `TaskRun` is picked by the + reconciler from its work queue, so it only happens if web-hook validation was + successful. Note that this event does not imply that a step started executing, + as several conditions must be met first: + - task and bound resource validation must be successful + - attached conditions must run successfully + - the `Pod` associated to the `TaskRun` must be successfully scheduled +- `Succeeded`: this is triggered once all steps in the `TaskRun` are executed + successfully, including post-steps injected by Tekton. +- `Failed`: this is triggered if the `TaskRun` is completed, but not successfully. + Causes of failure may be: one the steps failed, the `TaskRun` was cancelled or + the `TaskRun` timed out. + +## PipelineRuns + +`PipelineRun` events are generated for the following `Reasons`: +- `Succeeded`: this is triggered once all `Tasks` reachable via the DAG are + executed successfully. +- `Failed`: this is triggered if the `PipelineRun` is completed, but not + successfully. Causes of failure may be: one the `Tasks` failed or the + `PipelineRun` was cancelled. diff --git a/docs/pipelineruns.md b/docs/pipelineruns.md index b6a91cc14dc..e8eb6e84af9 100644 --- a/docs/pipelineruns.md +++ b/docs/pipelineruns.md @@ -29,6 +29,7 @@ Creation of a `PipelineRun` will trigger the creation of - [Workspaces](#workspaces) - [Cancelling a PipelineRun](#cancelling-a-pipelinerun) - [LimitRanges](#limitranges) + - [Events](events.md#pipelineruns) ## Syntax diff --git a/docs/taskruns.md b/docs/taskruns.md index 880dfd81b29..017fba5c3ca 100644 --- a/docs/taskruns.md +++ b/docs/taskruns.md @@ -30,14 +30,14 @@ A `TaskRun` runs until all `steps` have completed or until a failure occurs. - [Steps](#steps) - [Results](#results) - [Cancelling a TaskRun](#cancelling-a-taskrun) + - [Sidecars](#sidecars) + - [LimitRanges](#limitranges) + - [Events](events.md#taskruns) - [Examples](#examples) - [Example TaskRun](#example-taskrun) - [Example with embedded specs](#example-with-embedded-specs) - [Example Task Reuse](#example-task-reuse) - [Using a `ServiceAccount`](#using-a-serviceaccount) - - [Sidecars](#sidecars) - - [LimitRanges](#limitranges) - --- ## Syntax diff --git a/pkg/reconciler/event.go b/pkg/reconciler/event.go index 5341ad73c87..08b4c0fc8fa 100644 --- a/pkg/reconciler/event.go +++ b/pkg/reconciler/event.go @@ -31,6 +31,10 @@ func EmitEvent(c record.EventRecorder, beforeCondition *apis.Condition, afterCon c.Event(object, corev1.EventTypeNormal, "Succeeded", afterCondition.Message) } else if afterCondition.Status == corev1.ConditionFalse { c.Event(object, corev1.EventTypeWarning, "Failed", afterCondition.Message) + } else { + if beforeCondition == nil { + c.Event(object, corev1.EventTypeNormal, "Started", "") + } } } } diff --git a/pkg/reconciler/event_test.go b/pkg/reconciler/event_test.go index f6c464c23c7..091492acaa0 100644 --- a/pkg/reconciler/event_test.go +++ b/pkg/reconciler/event_test.go @@ -80,6 +80,14 @@ func TestEmitEvent(t *testing.T) { Status: corev1.ConditionTrue, }, expectEvent: true, + }, { + name: "nil to unknown", + before: nil, + after: &apis.Condition{ + Type: apis.ConditionSucceeded, + Status: corev1.ConditionUnknown, + }, + expectEvent: true, }} for _, ts := range testcases { diff --git a/pkg/reconciler/taskrun/resources/cloudevent/cloud_event_controller.go b/pkg/reconciler/taskrun/resources/cloudevent/cloud_event_controller.go index 9849d152950..ef0946afb84 100644 --- a/pkg/reconciler/taskrun/resources/cloudevent/cloud_event_controller.go +++ b/pkg/reconciler/taskrun/resources/cloudevent/cloud_event_controller.go @@ -66,8 +66,7 @@ func cloudEventDeliveryFromTargets(targets []string) []v1alpha1.CloudEventDelive } // SendCloudEvents is used by the TaskRun controller to send cloud events once -// the TaskRun is complete. `tr` is used to obtain the list of targets but also -// to construct the body of the +// the TaskRun is complete. `tr` is used to obtain the list of targets func SendCloudEvents(tr *v1alpha1.TaskRun, ceclient CEClient, logger *zap.SugaredLogger) error { logger = logger.With(zap.String("taskrun", tr.Name)) diff --git a/pkg/reconciler/taskrun/taskrun.go b/pkg/reconciler/taskrun/taskrun.go index 53aabf49ad4..8b47232e71f 100644 --- a/pkg/reconciler/taskrun/taskrun.go +++ b/pkg/reconciler/taskrun/taskrun.go @@ -104,11 +104,15 @@ func (c *Reconciler) Reconcile(ctx context.Context, key string) error { // If the TaskRun is just starting, this will also set the starttime, // from which the timeout will immediately begin counting down. - tr.Status.InitializeConditions() - // In case node time was not synchronized, when controller has been scheduled to other nodes. - if tr.Status.StartTime.Sub(tr.CreationTimestamp.Time) < 0 { - c.Logger.Warnf("TaskRun %s createTimestamp %s is after the taskRun started %s", tr.GetRunKey(), tr.CreationTimestamp, tr.Status.StartTime) - tr.Status.StartTime = &tr.CreationTimestamp + if !tr.HasStarted() { + tr.Status.InitializeConditions() + // In case node time was not synchronized, when controller has been scheduled to other nodes. + if tr.Status.StartTime.Sub(tr.CreationTimestamp.Time) < 0 { + c.Logger.Warnf("TaskRun %s createTimestamp %s is after the taskRun started %s", tr.GetRunKey(), tr.CreationTimestamp, tr.Status.StartTime) + tr.Status.StartTime = &tr.CreationTimestamp + } + // Run asnyc startup hooks + go c.preRunAsyncHook(ctx, tr) } if tr.IsDone() { @@ -227,6 +231,26 @@ func (c *Reconciler) getTaskFunc(tr *v1alpha1.TaskRun) (resources.GetTask, v1alp return gtFunc, kind } +// Run any async logic that may be required at start-up time. This method is used +// to emit events, notifications or any other async operation +func (c *Reconciler) preRunAsyncHook(ctx context.Context, tr *v1alpha1.TaskRun) { + c.Logger.Infof("preRunAsyncHook: %s", tr.Name) + + // Emit event + afterCondition := tr.Status.GetCondition(apis.ConditionSucceeded) + reconciler.EmitEvent(c.Recorder, nil, afterCondition, tr) +} + +// Run any async logic that may be required once the tr is successfully reconciled +// This method is used to emit events, notifications or any other async operation +func (c *Reconciler) postRunAsyncHook(ctx context.Context, tr *v1alpha1.TaskRun, beforeCondition *apis.Condition) { + c.Logger.Infof("postRunAsyncHook: %s", tr.Name) + + // Emit event + afterCondition := tr.Status.GetCondition(apis.ConditionSucceeded) + reconciler.EmitEvent(c.Recorder, beforeCondition, afterCondition, tr) +} + func (c *Reconciler) reconcile(ctx context.Context, tr *v1alpha1.TaskRun) error { // We may be reading a version of the object that was stored at an older version // and may not have had all of the assumed default specified. @@ -244,8 +268,7 @@ func (c *Reconciler) reconcile(ctx context.Context, tr *v1alpha1.TaskRun) error if tr.IsCancelled() { before := tr.Status.GetCondition(apis.ConditionSucceeded) err := cancelTaskRun(tr, c.KubeClientSet, c.Logger) - after := tr.Status.GetCondition(apis.ConditionSucceeded) - reconciler.EmitEvent(c.Recorder, before, after, tr) + go c.postRunAsyncHook(ctx, tr, before) return err } @@ -291,9 +314,13 @@ func (c *Reconciler) reconcile(ctx context.Context, tr *v1alpha1.TaskRun) error // Check if the TaskRun has timed out; if it is, this will set its status // accordingly. if CheckTimeout(tr) { + // Store the condition before the update for the postRunAsyncHook + before := tr.Status.GetCondition(apis.ConditionSucceeded) if err := c.updateTaskRunStatusForTimeout(tr, c.KubeClientSet.CoreV1().Pods(tr.Namespace).Delete); err != nil { return err } + // The TaskRun is complete, so we run the post hook + go c.postRunAsyncHook(ctx, tr, before) return nil } @@ -408,7 +435,14 @@ func (c *Reconciler) reconcile(ctx context.Context, tr *v1alpha1.TaskRun) error after := tr.Status.GetCondition(apis.ConditionSucceeded) - reconciler.EmitEvent(c.Recorder, before, after, tr) + // If after is different from before and status is not Unknown, the taskrun + // has completed its work - except for post-run tasks like emitting events, + // recording metrics, sending cloud events. + // Once tr.isDone becomes true, even when this key is queued, `reconcile` + // won't be invoked so we won't pass through here again + if tr.IsDone() && after != before { + go c.postRunAsyncHook(ctx, tr, before) + } c.Logger.Infof("Successfully reconciled taskrun %s/%s with status: %#v", tr.Name, tr.Namespace, after) return nil @@ -581,6 +615,7 @@ func (c *Reconciler) createPod(tr *v1alpha1.TaskRun, rtr *resources.ResolvedTask type DeletePod func(podName string, options *metav1.DeleteOptions) error func (c *Reconciler) updateTaskRunStatusForTimeout(tr *v1alpha1.TaskRun, dp DeletePod) error { + c.Logger.Infof("TaskRun %q has timed out, deleting pod", tr.Name) // tr.Status.PodName will be empty if the pod was never successfully created. This condition // can be reached, for example, by the pod never being schedulable due to limits imposed by