diff --git a/test/e2e/storage/csi_mock/csi_selinux_mount.go b/test/e2e/storage/csi_mock/csi_selinux_mount.go index b6b74c78c69ae..5322d5518165a 100644 --- a/test/e2e/storage/csi_mock/csi_selinux_mount.go +++ b/test/e2e/storage/csi_mock/csi_selinux_mount.go @@ -18,16 +18,22 @@ package csi_mock import ( "context" + "fmt" + "sort" "sync/atomic" + "time" "github.com/onsi/ginkgo/v2" "github.com/onsi/gomega" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/fields" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/apimachinery/pkg/util/wait" "k8s.io/kubernetes/pkg/kubelet/events" "k8s.io/kubernetes/test/e2e/framework" e2eevents "k8s.io/kubernetes/test/e2e/framework/events" + e2emetrics "k8s.io/kubernetes/test/e2e/framework/metrics" e2epod "k8s.io/kubernetes/test/e2e/framework/pod" e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" "k8s.io/kubernetes/test/e2e/storage/utils" @@ -237,3 +243,214 @@ var _ = utils.SIGDescribe("CSI Mock selinux on mount", func() { } }) }) + +var _ = utils.SIGDescribe("CSI Mock selinux on mount metrics", func() { + f := framework.NewDefaultFramework("csi-mock-volumes-selinux-metrics") + f.NamespacePodSecurityEnforceLevel = admissionapi.LevelPrivileged + m := newMockDriverSetup(f) + + // [Serial]: the tests read global kube-controller-manager metrics, so no other test changes them in parallel. + ginkgo.Context("SELinuxMount metrics [LinuxOnly][Feature:SELinux][Feature:SELinuxMountReadWriteOncePod][Serial]", func() { + + // All SELinux metrics. Unless explicitly mentioned in test.expectIncreases, these metrics must not grow during + // a test. + allMetrics := sets.NewString( + "volume_manager_selinux_container_errors_total", + "volume_manager_selinux_container_warnings_total", + "volume_manager_selinux_pod_context_mismatch_errors_total", + "volume_manager_selinux_pod_context_mismatch_warnings_total", + "volume_manager_selinux_volume_context_mismatch_errors_total", + "volume_manager_selinux_volume_context_mismatch_warnings_total", + "volume_manager_selinux_volumes_admitted_total", + ) + + // Make sure all options are set so system specific defaults are not used. + seLinuxOpts1 := v1.SELinuxOptions{ + User: "system_u", + Role: "object_r", + Type: "container_file_t", + Level: "s0:c0,c1", + } + seLinuxOpts2 := v1.SELinuxOptions{ + User: "system_u", + Role: "object_r", + Type: "container_file_t", + Level: "s0:c98,c99", + } + + tests := []struct { + name string + csiDriverSELinuxEnabled bool + firstPodSELinuxOpts *v1.SELinuxOptions + secondPodSELinuxOpts *v1.SELinuxOptions + volumeMode v1.PersistentVolumeAccessMode + waitForSecondPodStart bool + secondPodFailureEvent string + expectIncreases sets.String + }{ + { + name: "warning is not bumped on two Pods with the same context on RWO volume", + csiDriverSELinuxEnabled: true, + firstPodSELinuxOpts: &seLinuxOpts1, + secondPodSELinuxOpts: &seLinuxOpts1, + volumeMode: v1.ReadWriteOnce, + waitForSecondPodStart: true, + expectIncreases: sets.NewString( /* no metric is increased, admitted_total was already increased when the first pod started */ ), + }, + { + name: "warning is bumped on two Pods with a different context on RWO volume", + csiDriverSELinuxEnabled: true, + firstPodSELinuxOpts: &seLinuxOpts1, + secondPodSELinuxOpts: &seLinuxOpts2, + volumeMode: v1.ReadWriteOnce, + waitForSecondPodStart: true, + expectIncreases: sets.NewString("volume_manager_selinux_volume_context_mismatch_warnings_total"), + }, + { + name: "error is bumped on two Pods with a different context on RWOP volume", + csiDriverSELinuxEnabled: true, + firstPodSELinuxOpts: &seLinuxOpts1, + secondPodSELinuxOpts: &seLinuxOpts2, + secondPodFailureEvent: "conflicting SELinux labels of volume", + volumeMode: v1.ReadWriteOncePod, + waitForSecondPodStart: false, + expectIncreases: sets.NewString("volume_manager_selinux_volume_context_mismatch_errors_total"), + }, + } + for _, t := range tests { + t := t + ginkgo.It(t.name, func(ctx context.Context) { + if framework.NodeOSDistroIs("windows") { + e2eskipper.Skipf("SELinuxMount is only applied on linux nodes -- skipping") + } + grabber, err := e2emetrics.NewMetricsGrabber(ctx, f.ClientSet, nil, f.ClientConfig(), true, false, false, false, false, false) + framework.ExpectNoError(err, "creating the metrics grabber") + + var nodeStageMountOpts, nodePublishMountOpts []string + var unstageCalls, stageCalls, unpublishCalls, publishCalls atomic.Int32 + m.init(ctx, testParameters{ + disableAttach: true, + registerDriver: true, + enableSELinuxMount: &t.csiDriverSELinuxEnabled, + hooks: createSELinuxMountPreHook(&nodeStageMountOpts, &nodePublishMountOpts, &stageCalls, &unstageCalls, &publishCalls, &unpublishCalls), + }) + ginkgo.DeferCleanup(m.cleanup) + + ginkgo.By("Starting the first pod") + accessModes := []v1.PersistentVolumeAccessMode{t.volumeMode} + _, claim, pod := m.createPodWithSELinux(ctx, accessModes, []string{}, t.firstPodSELinuxOpts) + err = e2epod.WaitForPodNameRunningInNamespace(ctx, m.cs, pod.Name, pod.Namespace) + framework.ExpectNoError(err, "starting the initial pod") + + ginkgo.By("Grabbing initial metrics") + pod, err = m.cs.CoreV1().Pods(pod.Namespace).Get(ctx, pod.Name, metav1.GetOptions{}) + framework.ExpectNoError(err, "getting the initial pod") + metrics, err := grabMetrics(ctx, grabber, pod.Spec.NodeName, allMetrics) + framework.ExpectNoError(err, "collecting the initial metrics") + dumpMetrics(metrics) + + // Act + ginkgo.By("Starting the second pod") + // Skip scheduler, it would block scheduling the second pod with ReadWriteOncePod PV. + nodeSelection := e2epod.NodeSelection{Name: pod.Spec.NodeName} + pod2, err := startPausePodWithSELinuxOptions(f.ClientSet, claim, nodeSelection, f.Namespace.Name, t.secondPodSELinuxOpts) + framework.ExpectNoError(err, "creating second pod with SELinux context %s", t.secondPodSELinuxOpts) + m.pods = append(m.pods, pod2) + + if t.waitForSecondPodStart { + err := e2epod.WaitForPodNameRunningInNamespace(ctx, m.cs, pod2.Name, pod2.Namespace) + framework.ExpectNoError(err, "starting the second pod") + } else { + ginkgo.By("Waiting for the second pod to fail to start") + eventSelector := fields.Set{ + "involvedObject.kind": "Pod", + "involvedObject.name": pod2.Name, + "involvedObject.namespace": pod2.Namespace, + "reason": events.FailedMountVolume, + }.AsSelector().String() + err = e2eevents.WaitTimeoutForEvent(ctx, m.cs, pod2.Namespace, eventSelector, t.secondPodFailureEvent, f.Timeouts.PodStart) + framework.ExpectNoError(err, "waiting for event %q in the second test pod", t.secondPodFailureEvent) + } + + // Assert: count the metrics + ginkgo.By("Waiting for expected metric changes") + err = waitForMetricIncrease(ctx, grabber, pod.Spec.NodeName, allMetrics, t.expectIncreases, metrics, framework.PodStartShortTimeout) + framework.ExpectNoError(err, "waiting for metrics %s to increase", t.expectIncreases) + }) + } + }) +}) + +func grabMetrics(ctx context.Context, grabber *e2emetrics.Grabber, nodeName string, metricNames sets.String) (map[string]float64, error) { + response, err := grabber.GrabFromKubelet(ctx, nodeName) + framework.ExpectNoError(err) + + metrics := map[string]float64{} + for method, samples := range response { + if metricNames.Has(method) { + if len(samples) == 0 { + return nil, fmt.Errorf("metric %s has no samples", method) + } + lastSample := samples[len(samples)-1] + metrics[method] = float64(lastSample.Value) + } + } + + // Ensure all metrics were provided + for name := range metricNames { + if _, found := metrics[name]; !found { + return nil, fmt.Errorf("metric %s not found", name) + } + } + + return metrics, nil +} + +func waitForMetricIncrease(ctx context.Context, grabber *e2emetrics.Grabber, nodeName string, allMetricNames, expectedIncreaseNames sets.String, initialValues map[string]float64, timeout time.Duration) error { + var noIncreaseMetrics sets.String + var metrics map[string]float64 + + err := wait.Poll(time.Second, timeout, func() (bool, error) { + var err error + metrics, err = grabMetrics(ctx, grabber, nodeName, allMetricNames) + if err != nil { + return false, err + } + + noIncreaseMetrics = sets.NewString() + // Always evaluate all SELinux metrics to check that the other metrics are not unexpectedly increased. + for name := range allMetricNames { + if expectedIncreaseNames.Has(name) { + if metrics[name] <= initialValues[name] { + noIncreaseMetrics.Insert(name) + } + } else { + if initialValues[name] != metrics[name] { + return false, fmt.Errorf("metric %s unexpectedly increased to %v", name, metrics[name]) + } + } + } + return noIncreaseMetrics.Len() == 0, nil + }) + + ginkgo.By("Dumping final metrics") + dumpMetrics(metrics) + + if err == context.DeadlineExceeded { + return fmt.Errorf("timed out waiting for metrics %v", noIncreaseMetrics.List()) + } + return err +} + +func dumpMetrics(metrics map[string]float64) { + // Print the metrics sorted by metric name for better readability + keys := make([]string, 0, len(metrics)) + for key := range metrics { + keys = append(keys, key) + } + sort.Strings(keys) + + for _, key := range keys { + framework.Logf("Metric %s: %v", key, metrics[key]) + } +}