From 9972bffe121de88dcfa41e5de5b2b6759aca50c6 Mon Sep 17 00:00:00 2001
From: Andrea Frittoli <andrea.frittoli@uk.ibm.com>
Date: Sun, 5 Apr 2020 11:21:25 +0100
Subject: [PATCH] Emit events for all TaskRun lifecycle events

Start emitting events for additional TaskRun lifecyle events:
- taskrun started
- taskrun timeout

Introduce pre-run and post-run functions that are invoked
asynchronously when the taskrun starts and completes, to emit
events.

These same functions shall be used to trigger any other async
behaviour on start/stop of taskruns.

Add documentation on events.

Fixes #2328
Work towards #2082
---
 docs/events.md                                | 39 ++++++++++++++
 docs/pipelineruns.md                          |  1 +
 docs/taskruns.md                              |  6 +--
 pkg/reconciler/event.go                       |  4 ++
 pkg/reconciler/event_test.go                  |  8 +++
 .../cloudevent/cloud_event_controller.go      |  3 +-
 pkg/reconciler/taskrun/taskrun.go             | 51 ++++++++++++++++---
 7 files changed, 99 insertions(+), 13 deletions(-)
 create mode 100644 docs/events.md

diff --git a/docs/events.md b/docs/events.md
new file mode 100644
index 00000000000..d3e5f51a181
--- /dev/null
+++ b/docs/events.md
@@ -0,0 +1,39 @@
+<!--
+---
+linkTitle: "Events"
+weight: 2
+---
+-->
+# Events
+
+Tekton runtime resources, specifically `TaskRuns` and `PipelineRuns`,
+emit events when they are executed, so that users can monitor their lifecycle
+and react to it. Tekton emits [kubernetes events](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.18/#event-v1-core), that can be retrieve from the resource via
+`kubectl describe [resource]`.
+
+No events are emitted for `Conditions` today.
+
+## TaskRuns
+
+`TaskRun` events are generated for the following `Reasons`:
+- `Started`: this is triggered the first time the `TaskRun` is picked by the
+  reconciler from its work queue, so it only happens if web-hook validation was
+  successful. Note that this event does not imply that a step started executing,
+  as several conditions must be met first:
+  - task and bound resource validation must be successful
+  - attached conditions must run successfully
+  - the `Pod` associated to the `TaskRun` must be successfully scheduled
+- `Succeeded`: this is triggered once all steps in the `TaskRun` are executed
+  successfully, including post-steps injected by Tekton.
+- `Failed`: this is triggered if the `TaskRun` is completed, but not successfully.
+  Causes of failure may be: one the steps failed, the `TaskRun` was cancelled or
+  the `TaskRun` timed out.
+
+## PipelineRuns
+
+`PipelineRun` events are generated for the following `Reasons`:
+- `Succeeded`: this is triggered once all `Tasks` reachable via the DAG are
+  executed successfully.
+- `Failed`: this is triggered if the `PipelineRun` is completed, but not
+  successfully. Causes of failure may be: one the `Tasks` failed or the
+  `PipelineRun` was cancelled.
diff --git a/docs/pipelineruns.md b/docs/pipelineruns.md
index b6a91cc14dc..e8eb6e84af9 100644
--- a/docs/pipelineruns.md
+++ b/docs/pipelineruns.md
@@ -29,6 +29,7 @@ Creation of a `PipelineRun` will trigger the creation of
   - [Workspaces](#workspaces)
   - [Cancelling a PipelineRun](#cancelling-a-pipelinerun)
   - [LimitRanges](#limitranges)
+  - [Events](events.md#pipelineruns)
 
 ## Syntax
 
diff --git a/docs/taskruns.md b/docs/taskruns.md
index 880dfd81b29..017fba5c3ca 100644
--- a/docs/taskruns.md
+++ b/docs/taskruns.md
@@ -30,14 +30,14 @@ A `TaskRun` runs until all `steps` have completed or until a failure occurs.
     - [Steps](#steps)
     - [Results](#results)
   - [Cancelling a TaskRun](#cancelling-a-taskrun)
+  - [Sidecars](#sidecars)
+  - [LimitRanges](#limitranges)
+  - [Events](events.md#taskruns)
   - [Examples](#examples)
     - [Example TaskRun](#example-taskrun)
     - [Example with embedded specs](#example-with-embedded-specs)
     - [Example Task Reuse](#example-task-reuse)
       - [Using a `ServiceAccount`](#using-a-serviceaccount)
-  - [Sidecars](#sidecars)
-  - [LimitRanges](#limitranges)
-
 ---
 
 ## Syntax
diff --git a/pkg/reconciler/event.go b/pkg/reconciler/event.go
index 5341ad73c87..08b4c0fc8fa 100644
--- a/pkg/reconciler/event.go
+++ b/pkg/reconciler/event.go
@@ -31,6 +31,10 @@ func EmitEvent(c record.EventRecorder, beforeCondition *apis.Condition, afterCon
 			c.Event(object, corev1.EventTypeNormal, "Succeeded", afterCondition.Message)
 		} else if afterCondition.Status == corev1.ConditionFalse {
 			c.Event(object, corev1.EventTypeWarning, "Failed", afterCondition.Message)
+		} else {
+			if beforeCondition == nil {
+				c.Event(object, corev1.EventTypeNormal, "Started", "")
+			}
 		}
 	}
 }
diff --git a/pkg/reconciler/event_test.go b/pkg/reconciler/event_test.go
index f6c464c23c7..091492acaa0 100644
--- a/pkg/reconciler/event_test.go
+++ b/pkg/reconciler/event_test.go
@@ -80,6 +80,14 @@ func TestEmitEvent(t *testing.T) {
 			Status: corev1.ConditionTrue,
 		},
 		expectEvent: true,
+	}, {
+		name:   "nil to unknown",
+		before: nil,
+		after: &apis.Condition{
+			Type:   apis.ConditionSucceeded,
+			Status: corev1.ConditionUnknown,
+		},
+		expectEvent: true,
 	}}
 
 	for _, ts := range testcases {
diff --git a/pkg/reconciler/taskrun/resources/cloudevent/cloud_event_controller.go b/pkg/reconciler/taskrun/resources/cloudevent/cloud_event_controller.go
index 9849d152950..ef0946afb84 100644
--- a/pkg/reconciler/taskrun/resources/cloudevent/cloud_event_controller.go
+++ b/pkg/reconciler/taskrun/resources/cloudevent/cloud_event_controller.go
@@ -66,8 +66,7 @@ func cloudEventDeliveryFromTargets(targets []string) []v1alpha1.CloudEventDelive
 }
 
 // SendCloudEvents is used by the TaskRun controller to send cloud events once
-// the TaskRun is complete. `tr` is used to obtain the list of targets but also
-// to construct the body of the
+// the TaskRun is complete. `tr` is used to obtain the list of targets
 func SendCloudEvents(tr *v1alpha1.TaskRun, ceclient CEClient, logger *zap.SugaredLogger) error {
 	logger = logger.With(zap.String("taskrun", tr.Name))
 
diff --git a/pkg/reconciler/taskrun/taskrun.go b/pkg/reconciler/taskrun/taskrun.go
index 53aabf49ad4..8b47232e71f 100644
--- a/pkg/reconciler/taskrun/taskrun.go
+++ b/pkg/reconciler/taskrun/taskrun.go
@@ -104,11 +104,15 @@ func (c *Reconciler) Reconcile(ctx context.Context, key string) error {
 
 	// If the TaskRun is just starting, this will also set the starttime,
 	// from which the timeout will immediately begin counting down.
-	tr.Status.InitializeConditions()
-	// In case node time was not synchronized, when controller has been scheduled to other nodes.
-	if tr.Status.StartTime.Sub(tr.CreationTimestamp.Time) < 0 {
-		c.Logger.Warnf("TaskRun %s createTimestamp %s is after the taskRun started %s", tr.GetRunKey(), tr.CreationTimestamp, tr.Status.StartTime)
-		tr.Status.StartTime = &tr.CreationTimestamp
+	if !tr.HasStarted() {
+		tr.Status.InitializeConditions()
+		// In case node time was not synchronized, when controller has been scheduled to other nodes.
+		if tr.Status.StartTime.Sub(tr.CreationTimestamp.Time) < 0 {
+			c.Logger.Warnf("TaskRun %s createTimestamp %s is after the taskRun started %s", tr.GetRunKey(), tr.CreationTimestamp, tr.Status.StartTime)
+			tr.Status.StartTime = &tr.CreationTimestamp
+		}
+		// Run asnyc startup hooks
+		go c.preRunAsyncHook(ctx, tr)
 	}
 
 	if tr.IsDone() {
@@ -227,6 +231,26 @@ func (c *Reconciler) getTaskFunc(tr *v1alpha1.TaskRun) (resources.GetTask, v1alp
 	return gtFunc, kind
 }
 
+// Run any async logic that may be required at start-up time. This method is used
+// to emit events, notifications or any other async operation
+func (c *Reconciler) preRunAsyncHook(ctx context.Context, tr *v1alpha1.TaskRun) {
+	c.Logger.Infof("preRunAsyncHook: %s", tr.Name)
+
+	// Emit event
+	afterCondition := tr.Status.GetCondition(apis.ConditionSucceeded)
+	reconciler.EmitEvent(c.Recorder, nil, afterCondition, tr)
+}
+
+// Run any async logic that may be required once the tr is successfully reconciled
+// This method is used to emit events, notifications or any other async operation
+func (c *Reconciler) postRunAsyncHook(ctx context.Context, tr *v1alpha1.TaskRun, beforeCondition *apis.Condition) {
+	c.Logger.Infof("postRunAsyncHook: %s", tr.Name)
+
+	// Emit event
+	afterCondition := tr.Status.GetCondition(apis.ConditionSucceeded)
+	reconciler.EmitEvent(c.Recorder, beforeCondition, afterCondition, tr)
+}
+
 func (c *Reconciler) reconcile(ctx context.Context, tr *v1alpha1.TaskRun) error {
 	// We may be reading a version of the object that was stored at an older version
 	// and may not have had all of the assumed default specified.
@@ -244,8 +268,7 @@ func (c *Reconciler) reconcile(ctx context.Context, tr *v1alpha1.TaskRun) error
 	if tr.IsCancelled() {
 		before := tr.Status.GetCondition(apis.ConditionSucceeded)
 		err := cancelTaskRun(tr, c.KubeClientSet, c.Logger)
-		after := tr.Status.GetCondition(apis.ConditionSucceeded)
-		reconciler.EmitEvent(c.Recorder, before, after, tr)
+		go c.postRunAsyncHook(ctx, tr, before)
 		return err
 	}
 
@@ -291,9 +314,13 @@ func (c *Reconciler) reconcile(ctx context.Context, tr *v1alpha1.TaskRun) error
 	// Check if the TaskRun has timed out; if it is, this will set its status
 	// accordingly.
 	if CheckTimeout(tr) {
+		// Store the condition before the update for the postRunAsyncHook
+		before := tr.Status.GetCondition(apis.ConditionSucceeded)
 		if err := c.updateTaskRunStatusForTimeout(tr, c.KubeClientSet.CoreV1().Pods(tr.Namespace).Delete); err != nil {
 			return err
 		}
+		// The TaskRun is complete, so we run the post hook
+		go c.postRunAsyncHook(ctx, tr, before)
 		return nil
 	}
 
@@ -408,7 +435,14 @@ func (c *Reconciler) reconcile(ctx context.Context, tr *v1alpha1.TaskRun) error
 
 	after := tr.Status.GetCondition(apis.ConditionSucceeded)
 
-	reconciler.EmitEvent(c.Recorder, before, after, tr)
+	// If after is different from before and status is not Unknown, the taskrun
+	// has completed its work - except for post-run tasks like emitting events,
+	// recording metrics, sending cloud events.
+	// Once tr.isDone becomes true, even when this key is queued, `reconcile`
+	// won't be invoked so we won't pass through here again
+	if tr.IsDone() && after != before {
+		go c.postRunAsyncHook(ctx, tr, before)
+	}
 	c.Logger.Infof("Successfully reconciled taskrun %s/%s with status: %#v", tr.Name, tr.Namespace, after)
 
 	return nil
@@ -581,6 +615,7 @@ func (c *Reconciler) createPod(tr *v1alpha1.TaskRun, rtr *resources.ResolvedTask
 type DeletePod func(podName string, options *metav1.DeleteOptions) error
 
 func (c *Reconciler) updateTaskRunStatusForTimeout(tr *v1alpha1.TaskRun, dp DeletePod) error {
+
 	c.Logger.Infof("TaskRun %q has timed out, deleting pod", tr.Name)
 	// tr.Status.PodName will be empty if the pod was never successfully created. This condition
 	// can be reached, for example, by the pod never being schedulable due to limits imposed by