Skip to content

Commit

Permalink
feat: report error when common PVC cleanup job hangs
Browse files Browse the repository at this point in the history
Fix devfile#551

Signed-off-by: Andrew Obuchowicz <[email protected]>
  • Loading branch information
AObuchow committed Jun 14, 2022
1 parent 7dcf0e8 commit 32f04ca
Showing 1 changed file with 66 additions and 3 deletions.
69 changes: 66 additions & 3 deletions pkg/provision/storage/cleanup.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@ package storage

import (
"fmt"
"path"
"time"

dw "github.com/devfile/api/v2/pkg/apis/workspaces/v1alpha2"
check "github.com/devfile/devworkspace-operator/pkg/library/status"
nsconfig "github.com/devfile/devworkspace-operator/pkg/provision/config"
"github.com/devfile/devworkspace-operator/pkg/provision/sync"
batchv1 "k8s.io/api/batch/v1"
Expand All @@ -29,6 +29,7 @@ import (
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
k8sclient "sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"

"github.com/devfile/devworkspace-operator/internal/images"
Expand Down Expand Up @@ -91,6 +92,21 @@ func runCommonPVCCleanupJob(workspace *dw.DevWorkspace, clusterAPI sync.ClusterA
}
}
}

msg, err := checkCleanupPodsState(clusterJob, workspace.Status.DevWorkspaceId, clusterAPI)
if err != nil {
return &ProvisioningError{
Err: err,
}
}

if msg != "" {
errMsg := fmt.Sprintf("DevWorkspace PVC cleanup job failed: see logs for job %q for details. Additional information: %s", clusterJob.Name, msg)
return &ProvisioningError{
Message: errMsg,
}
}

// Requeue at least each 10 seconds to check if PVC is not removed by someone else
return &NotReadyError{
Message: "Cleanup job is not in completed state",
Expand All @@ -110,7 +126,9 @@ func getSpecCommonPVCCleanupJob(workspace *dw.DevWorkspace, clusterAPI sync.Clus
}

jobLabels := map[string]string{
constants.DevWorkspaceIDLabel: workspaceId,
constants.DevWorkspaceIDLabel: workspaceId,
constants.DevWorkspaceNameLabel: workspace.Name,
constants.DevWorkspaceCreatorLabel: workspace.Labels[constants.DevWorkspaceCreatorLabel],
}
if restrictedAccess, needsRestrictedAccess := workspace.Annotations[constants.DevWorkspaceRestrictedAccessAnnotation]; needsRestrictedAccess {
jobLabels[constants.DevWorkspaceRestrictedAccessAnnotation] = restrictedAccess
Expand All @@ -126,6 +144,9 @@ func getSpecCommonPVCCleanupJob(workspace *dw.DevWorkspace, clusterAPI sync.Clus
Completions: &cleanupJobCompletions,
BackoffLimit: &cleanupJobBackoffLimit,
Template: corev1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Labels: jobLabels,
},
Spec: corev1.PodSpec{
RestartPolicy: "Never",
SecurityContext: wsprovision.GetDevWorkspaceSecurityContext(),
Expand All @@ -146,7 +167,8 @@ func getSpecCommonPVCCleanupJob(workspace *dw.DevWorkspace, clusterAPI sync.Clus
Command: []string{"/bin/sh"},
Args: []string{
"-c",
fmt.Sprintf(cleanupCommandFmt, path.Join(pvcClaimMountPath, workspaceId)),
//fmt.Sprintf(cleanupCommandFmt, path.Join(pvcClaimMountPath, workspaceId)),
"exit 1",
},
Resources: corev1.ResourceRequirements{
Requests: corev1.ResourceList{
Expand Down Expand Up @@ -203,3 +225,44 @@ func commonPVCExists(workspace *dw.DevWorkspace, clusterAPI sync.ClusterAPI) (bo
}
return true, nil
}

func checkCleanupPodsState(job *batchv1.Job, workspaceID string, clusterAPI sync.ClusterAPI) (msg string, err error) {
pods, err := check.GetPods(job.Namespace, k8sclient.MatchingLabels{"job-name": common.PVCCleanupJobName(workspaceID)}, clusterAPI.Client)
if err != nil {
return "", err
}

for _, pod := range pods.Items {

for _, containerStatus := range pod.Status.ContainerStatuses {
if check.CheckContainerStatusForFailure(&containerStatus) {
// TODO: Maybe move this logic into CheckContainerStatusForFailure and return bool, reason ?
reason := ""
if containerStatus.State.Waiting != nil {
reason = containerStatus.State.Waiting.Reason
} else if containerStatus.State.Terminated != nil {
reason = containerStatus.State.Terminated.Reason
}
return fmt.Sprintf("Common PVC Cleanup related container %s has state %s.", containerStatus.Name, reason), nil
}
}

for _, initContainerStatus := range pod.Status.InitContainerStatuses {
if check.CheckContainerStatusForFailure(&initContainerStatus) {
reason := ""
if initContainerStatus.State.Waiting != nil {
reason = initContainerStatus.State.Waiting.Reason
} else if initContainerStatus.State.Terminated != nil {
reason = initContainerStatus.State.Terminated.Reason
}
return fmt.Sprintf("Common PVC Cleanup related init container %s has state %s.", initContainerStatus.Name, reason), nil
}
}

if msg, err := check.CheckPodEvents(&pod, workspaceID, clusterAPI); err != nil || msg != "" {
return msg, err
}
}

return "", nil
}

0 comments on commit 32f04ca

Please sign in to comment.