-
Notifications
You must be signed in to change notification settings - Fork 1.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Panic in controller when step fails before image digest exporter #2222
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,8 +24,8 @@ import ( | |
"time" | ||
|
||
"github.com/tektoncd/pipeline/pkg/apis/pipeline/v1alpha1" | ||
"github.com/tektoncd/pipeline/pkg/logging" | ||
"github.com/tektoncd/pipeline/pkg/termination" | ||
"go.uber.org/zap" | ||
corev1 "k8s.io/api/core/v1" | ||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
"knative.dev/pkg/apis" | ||
|
@@ -99,7 +99,7 @@ func SidecarsReady(podStatus corev1.PodStatus) bool { | |
} | ||
|
||
// MakeTaskRunStatus returns a TaskRunStatus based on the Pod's status. | ||
func MakeTaskRunStatus(tr v1alpha1.TaskRun, pod *corev1.Pod, taskSpec v1alpha1.TaskSpec) v1alpha1.TaskRunStatus { | ||
func MakeTaskRunStatus(logger *zap.SugaredLogger, tr v1alpha1.TaskRun, pod *corev1.Pod, taskSpec v1alpha1.TaskSpec) v1alpha1.TaskRunStatus { | ||
trs := &tr.Status | ||
if trs.GetCondition(apis.ConditionSucceeded) == nil || trs.GetCondition(apis.ConditionSucceeded).Status == corev1.ConditionUnknown { | ||
// If the taskRunStatus doesn't exist yet, it's because we just started running | ||
|
@@ -114,39 +114,12 @@ func MakeTaskRunStatus(tr v1alpha1.TaskRun, pod *corev1.Pod, taskSpec v1alpha1.T | |
trs.PodName = pod.Name | ||
trs.Steps = []v1alpha1.StepState{} | ||
trs.Sidecars = []v1alpha1.SidecarState{} | ||
logger, _ := logging.NewLogger("", "status") | ||
defer func() { | ||
_ = logger.Sync() | ||
}() | ||
|
||
for _, s := range pod.Status.ContainerStatuses { | ||
if IsContainerStep(s.Name) { | ||
if s.State.Terminated != nil && len(s.State.Terminated.Message) != 0 { | ||
msg := s.State.Terminated.Message | ||
r, err := termination.ParseMessage(msg) | ||
if err != nil { | ||
logger.Errorf("Could not parse json message %q because of %w", msg, err) | ||
break | ||
} | ||
for index, result := range r { | ||
if result.Key == "StartedAt" { | ||
t, err := time.Parse(time.RFC3339, result.Value) | ||
if err != nil { | ||
logger.Errorf("Could not parse time: %q: %w", result.Value, err) | ||
break | ||
} | ||
s.State.Terminated.StartedAt = metav1.NewTime(t) | ||
// remove the entry for the starting time | ||
r = append(r[:index], r[index+1:]...) | ||
if len(r) == 0 { | ||
s.State.Terminated.Message = "" | ||
} else if bytes, err := json.Marshal(r); err != nil { | ||
logger.Errorf("Error marshalling remaining results: %w", err) | ||
} else { | ||
s.State.Terminated.Message = string(bytes) | ||
} | ||
break | ||
} | ||
if err := updateStatusStartTime(&s); err != nil { | ||
logger.Errorf("error setting the start time of step %q in taskrun %q: %w", s.Name, tr.Name, err) | ||
} | ||
} | ||
trs.Steps = append(trs.Steps, v1alpha1.StepState{ | ||
|
@@ -180,6 +153,36 @@ func MakeTaskRunStatus(tr v1alpha1.TaskRun, pod *corev1.Pod, taskSpec v1alpha1.T | |
return *trs | ||
} | ||
|
||
// updateStatusStartTime searches for a result called "StartedAt" in the JSON-formatted termination message | ||
// of a step and sets the State.Terminated.StartedAt field to this time if it's found. The "StartedAt" result | ||
// is also removed from the list of results in the container status. | ||
func updateStatusStartTime(s *corev1.ContainerStatus) error { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i love that this is a separate function! i dont mind if you want to ignore this, but since its separate id expect to see unit tests for it, and also i find that once we have a private/unexported function that 1) can have meaningful tests and 2) has good reason to have a docstring, it often makes sense to move it into its own package and make it public/exported |
||
r, err := termination.ParseMessage(s.State.Terminated.Message) | ||
if err != nil { | ||
return fmt.Errorf("termination message could not be parsed as JSON: %w", err) | ||
} | ||
for index, result := range r { | ||
if result.Key == "StartedAt" { | ||
t, err := time.Parse(time.RFC3339, result.Value) | ||
if err != nil { | ||
return fmt.Errorf("could not parse time value %q in StartedAt field: %w", result.Value, err) | ||
} | ||
s.State.Terminated.StartedAt = metav1.NewTime(t) | ||
// remove the entry for the starting time | ||
r = append(r[:index], r[index+1:]...) | ||
if len(r) == 0 { | ||
s.State.Terminated.Message = "" | ||
} else if bytes, err := json.Marshal(r); err != nil { | ||
return fmt.Errorf("error marshalling remaining results back into termination message: %w", err) | ||
} else { | ||
s.State.Terminated.Message = string(bytes) | ||
} | ||
break | ||
} | ||
} | ||
return nil | ||
} | ||
|
||
func updateCompletedTaskRun(trs *v1alpha1.TaskRunStatus, pod *corev1.Pod) { | ||
if DidTaskRunFail(pod) { | ||
msg := getFailureMessage(pod) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,6 +22,8 @@ import ( | |
|
||
"github.com/google/go-cmp/cmp" | ||
"github.com/tektoncd/pipeline/pkg/apis/pipeline/v1alpha1" | ||
"github.com/tektoncd/pipeline/pkg/apis/pipeline/v1beta1" | ||
"github.com/tektoncd/pipeline/pkg/logging" | ||
corev1 "k8s.io/api/core/v1" | ||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
"knative.dev/pkg/apis" | ||
|
@@ -40,6 +42,7 @@ func TestMakeTaskRunStatus(t *testing.T) { | |
for _, c := range []struct { | ||
desc string | ||
podStatus corev1.PodStatus | ||
taskSpec v1alpha1.TaskSpec | ||
want v1alpha1.TaskRunStatus | ||
}{{ | ||
desc: "empty", | ||
|
@@ -600,6 +603,94 @@ func TestMakeTaskRunStatus(t *testing.T) { | |
}}, | ||
}, | ||
}, | ||
}, { | ||
desc: "non-json-termination-message-with-steps-afterwards-shouldnt-panic", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🎉 |
||
taskSpec: v1alpha1.TaskSpec{TaskSpec: v1beta1.TaskSpec{ | ||
Steps: []v1alpha1.Step{{Container: corev1.Container{ | ||
Name: "non-json", | ||
}}, {Container: corev1.Container{ | ||
Name: "after-non-json", | ||
}}, {Container: corev1.Container{ | ||
Name: "this-step-might-panic", | ||
}}, {Container: corev1.Container{ | ||
Name: "foo", | ||
}}}, | ||
}}, | ||
podStatus: corev1.PodStatus{ | ||
Phase: corev1.PodFailed, | ||
ContainerStatuses: []corev1.ContainerStatus{{ | ||
Name: "step-this-step-might-panic", | ||
ImageID: "image", | ||
State: corev1.ContainerState{ | ||
Terminated: &corev1.ContainerStateTerminated{}, | ||
}, | ||
}, { | ||
Name: "step-foo", | ||
ImageID: "image", | ||
State: corev1.ContainerState{ | ||
Terminated: &corev1.ContainerStateTerminated{}, | ||
}, | ||
}, { | ||
Name: "step-non-json", | ||
ImageID: "image", | ||
State: corev1.ContainerState{ | ||
Terminated: &corev1.ContainerStateTerminated{ | ||
ExitCode: 1, | ||
Message: "this is a non-json termination message. dont panic!", | ||
}, | ||
}, | ||
}, { | ||
Name: "step-after-non-json", | ||
ImageID: "image", | ||
State: corev1.ContainerState{ | ||
Terminated: &corev1.ContainerStateTerminated{}, | ||
}, | ||
}}, | ||
}, | ||
want: v1alpha1.TaskRunStatus{ | ||
Status: duckv1beta1.Status{ | ||
Conditions: []apis.Condition{{ | ||
Type: apis.ConditionSucceeded, | ||
Status: corev1.ConditionFalse, | ||
Reason: ReasonFailed, | ||
Message: `"step-non-json" exited with code 1 (image: "image"); for logs run: kubectl -n foo logs pod -c step-non-json`, | ||
}}, | ||
}, | ||
TaskRunStatusFields: v1alpha1.TaskRunStatusFields{ | ||
Steps: []v1alpha1.StepState{{ | ||
ContainerState: corev1.ContainerState{ | ||
Terminated: &corev1.ContainerStateTerminated{ | ||
ExitCode: 1, | ||
Message: "this is a non-json termination message. dont panic!", | ||
}}, | ||
|
||
Name: "non-json", | ||
ContainerName: "step-non-json", | ||
ImageID: "image", | ||
}, { | ||
ContainerState: corev1.ContainerState{ | ||
Terminated: &corev1.ContainerStateTerminated{}}, | ||
Name: "after-non-json", | ||
ContainerName: "step-after-non-json", | ||
ImageID: "image", | ||
}, { | ||
ContainerState: corev1.ContainerState{ | ||
Terminated: &corev1.ContainerStateTerminated{}}, | ||
Name: "this-step-might-panic", | ||
ContainerName: "step-this-step-might-panic", | ||
ImageID: "image", | ||
}, { | ||
ContainerState: corev1.ContainerState{ | ||
Terminated: &corev1.ContainerStateTerminated{}}, | ||
Name: "foo", | ||
ContainerName: "step-foo", | ||
ImageID: "image", | ||
}}, | ||
Sidecars: []v1alpha1.SidecarState{}, | ||
// We don't actually care about the time, just that it's not nil | ||
CompletionTime: &metav1.Time{Time: time.Now()}, | ||
}, | ||
}, | ||
}} { | ||
t.Run(c.desc, func(t *testing.T) { | ||
now := metav1.Now() | ||
|
@@ -624,7 +715,8 @@ func TestMakeTaskRunStatus(t *testing.T) { | |
}, | ||
} | ||
|
||
got := MakeTaskRunStatus(tr, pod, v1alpha1.TaskSpec{}) | ||
logger, _ := logging.NewLogger("", "status") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. thanks for threading the logger through! its kind of a pain but i think it's right way to do it :D |
||
got := MakeTaskRunStatus(logger, tr, pod, c.taskSpec) | ||
|
||
// Common traits, set for test case brevity. | ||
c.want.PodName = "pod" | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ah okay so we're just logging an error but continuing onward - i could see including a comment here about why, since it might look like an oversight if just reading the code