Skip to content

Commit

Permalink
feat: report error when common PVC cleanup job hangs
Browse files Browse the repository at this point in the history
Fix #551

Signed-off-by: Andrew Obuchowicz <[email protected]>
  • Loading branch information
AObuchow committed Jun 21, 2022
1 parent 540f408 commit f3e317a
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 26 deletions.
41 changes: 20 additions & 21 deletions pkg/library/status/check.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,13 @@
// limitations under the License.
//

package check
package status

import (
"context"
"fmt"
"strings"

dw "github.com/devfile/api/v2/pkg/apis/workspaces/v1alpha2"
"github.com/devfile/devworkspace-operator/pkg/common"
"github.com/devfile/devworkspace-operator/pkg/config"
"github.com/devfile/devworkspace-operator/pkg/infrastructure"
Expand All @@ -29,7 +28,6 @@ import (
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/fields"
k8sclient "sigs.k8s.io/controller-runtime/pkg/client"
runtimeClient "sigs.k8s.io/controller-runtime/pkg/client"
)

var containerFailureStateReasons = []string{
Expand Down Expand Up @@ -76,25 +74,27 @@ func CheckDeploymentConditions(deployment *appsv1.Deployment) (healthy bool, err
// matching unrecoverablePodEventReasons) has the pod as the involved object.
// Returns optional message with detected unrecoverable state details
// error if any happens during check
func CheckPodsState(workspace *dw.DevWorkspace, namespace string, labelSelector k8sclient.MatchingLabels,
func CheckPodsState(workspaceID string, namespace string, labelSelector k8sclient.MatchingLabels,
clusterAPI sync.ClusterAPI) (stateMsg string, checkFailure error) {
podList, err := GetPods(namespace, labelSelector, clusterAPI.Client)
if err != nil {
podList := &corev1.PodList{}
if err := clusterAPI.Client.List(context.TODO(), podList, k8sclient.InNamespace(namespace), labelSelector); err != nil {
return "", err
}

for _, pod := range podList.Items {
for _, containerStatus := range pod.Status.ContainerStatuses {
if !CheckContainerStatusForFailure(&containerStatus) {
return fmt.Sprintf("Container %s has state %s", containerStatus.Name, containerStatus.State.Waiting.Reason), nil
ok, reason := CheckContainerStatusForFailure(&containerStatus)
if !ok {
return fmt.Sprintf("Container %s has state %s", containerStatus.Name, reason), nil
}
}
for _, initContainerStatus := range pod.Status.InitContainerStatuses {
if !CheckContainerStatusForFailure(&initContainerStatus) {
return fmt.Sprintf("Init Container %s has state %s", initContainerStatus.Name, initContainerStatus.State.Waiting.Reason), nil
ok, reason := CheckContainerStatusForFailure(&initContainerStatus)
if !ok {
return fmt.Sprintf("Init Container %s has state %s", initContainerStatus.Name, reason), nil
}
}
if msg, err := CheckPodEvents(&pod, workspace.Status.DevWorkspaceId, clusterAPI); err != nil || msg != "" {
if msg, err := CheckPodEvents(&pod, workspaceID, clusterAPI); err != nil || msg != "" {
return msg, err
}
}
Expand Down Expand Up @@ -138,24 +138,23 @@ func CheckPodEvents(pod *corev1.Pod, workspaceID string, clusterAPI sync.Cluster
return "", nil
}

func CheckContainerStatusForFailure(containerStatus *corev1.ContainerStatus) (ok bool) {
func CheckContainerStatusForFailure(containerStatus *corev1.ContainerStatus) (ok bool, reason string) {
if containerStatus.State.Waiting != nil {
for _, failureReason := range containerFailureStateReasons {
if containerStatus.State.Waiting.Reason == failureReason {
return checkIfUnrecoverableEventIgnored(containerStatus.State.Waiting.Reason)
return checkIfUnrecoverableEventIgnored(containerStatus.State.Waiting.Reason), containerStatus.State.Waiting.Reason
}
}
}
return true
}

// TODO: Remove this function?
func GetPods(namespace string, labelSelector k8sclient.MatchingLabels, client runtimeClient.Client) (*corev1.PodList, error) {
pods := &corev1.PodList{}
if err := client.List(context.TODO(), pods, k8sclient.InNamespace(namespace), labelSelector); err != nil {
return nil, err
if containerStatus.State.Terminated != nil {
for _, failureReason := range containerFailureStateReasons {
if containerStatus.State.Terminated.Reason == failureReason {
return checkIfUnrecoverableEventIgnored(containerStatus.State.Terminated.Reason), containerStatus.State.Terminated.Reason
}
}
}
return pods, nil
return true, ""
}

func checkIfUnrecoverableEventIgnored(reason string) (ignored bool) {
Expand Down
24 changes: 23 additions & 1 deletion pkg/provision/storage/cleanup.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"time"

dw "github.com/devfile/api/v2/pkg/apis/workspaces/v1alpha2"
"github.com/devfile/devworkspace-operator/pkg/library/status"
nsconfig "github.com/devfile/devworkspace-operator/pkg/provision/config"
"github.com/devfile/devworkspace-operator/pkg/provision/sync"
batchv1 "k8s.io/api/batch/v1"
Expand All @@ -29,6 +30,7 @@ import (
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
k8sclient "sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"

"github.com/devfile/devworkspace-operator/internal/images"
Expand Down Expand Up @@ -91,6 +93,21 @@ func runCommonPVCCleanupJob(workspace *dw.DevWorkspace, clusterAPI sync.ClusterA
}
}
}

msg, err := status.CheckPodsState(workspace.Status.DevWorkspaceId, clusterJob.Namespace, k8sclient.MatchingLabels{"job-name": common.PVCCleanupJobName(workspace.Status.DevWorkspaceId)}, clusterAPI)
if err != nil {
return &ProvisioningError{
Err: err,
}
}

if msg != "" {
errMsg := fmt.Sprintf("DevWorkspace common PVC cleanup job failed: see logs for job %q for details. Additional information: %s", clusterJob.Name, msg)
return &ProvisioningError{
Message: errMsg,
}
}

// Requeue at least each 10 seconds to check if PVC is not removed by someone else
return &NotReadyError{
Message: "Cleanup job is not in completed state",
Expand All @@ -110,7 +127,9 @@ func getSpecCommonPVCCleanupJob(workspace *dw.DevWorkspace, clusterAPI sync.Clus
}

jobLabels := map[string]string{
constants.DevWorkspaceIDLabel: workspaceId,
constants.DevWorkspaceIDLabel: workspaceId,
constants.DevWorkspaceNameLabel: workspace.Name,
constants.DevWorkspaceCreatorLabel: workspace.Labels[constants.DevWorkspaceCreatorLabel],
}
if restrictedAccess, needsRestrictedAccess := workspace.Annotations[constants.DevWorkspaceRestrictedAccessAnnotation]; needsRestrictedAccess {
jobLabels[constants.DevWorkspaceRestrictedAccessAnnotation] = restrictedAccess
Expand All @@ -126,6 +145,9 @@ func getSpecCommonPVCCleanupJob(workspace *dw.DevWorkspace, clusterAPI sync.Clus
Completions: &cleanupJobCompletions,
BackoffLimit: &cleanupJobBackoffLimit,
Template: corev1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Labels: jobLabels,
},
Spec: corev1.PodSpec{
RestartPolicy: "Never",
SecurityContext: wsprovision.GetDevWorkspaceSecurityContext(),
Expand Down
8 changes: 4 additions & 4 deletions pkg/provision/workspace/deployment.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ import (
"errors"
"fmt"

check "github.com/devfile/devworkspace-operator/pkg/library/status"
"github.com/devfile/devworkspace-operator/pkg/library/status"
nsconfig "github.com/devfile/devworkspace-operator/pkg/provision/config"
"github.com/devfile/devworkspace-operator/pkg/provision/sync"

Expand Down Expand Up @@ -97,7 +97,7 @@ func SyncDeploymentToCluster(
}
clusterDeployment := clusterObj.(*appsv1.Deployment)

deploymentReady := check.CheckDeploymentStatus(clusterDeployment)
deploymentReady := status.CheckDeploymentStatus(clusterDeployment)
if deploymentReady {
return DeploymentProvisioningStatus{
ProvisioningStatus: ProvisioningStatus{
Expand All @@ -106,7 +106,7 @@ func SyncDeploymentToCluster(
}
}

deploymentHealthy, deploymentErrMsg := check.CheckDeploymentConditions(clusterDeployment)
deploymentHealthy, deploymentErrMsg := status.CheckDeploymentConditions(clusterDeployment)
if !deploymentHealthy {
return DeploymentProvisioningStatus{
ProvisioningStatus: ProvisioningStatus{
Expand All @@ -116,7 +116,7 @@ func SyncDeploymentToCluster(
}
}

failureMsg, checkErr := check.CheckPodsState(workspace, workspace.Namespace, k8sclient.MatchingLabels{
failureMsg, checkErr := status.CheckPodsState(workspace.Status.DevWorkspaceId, workspace.Namespace, k8sclient.MatchingLabels{
constants.DevWorkspaceIDLabel: workspace.Status.DevWorkspaceId,
}, clusterAPI)
if checkErr != nil {
Expand Down

0 comments on commit f3e317a

Please sign in to comment.