From fffbb32e0d725e525e77507efc9235f52cbfc7c0 Mon Sep 17 00:00:00 2001 From: danfengl Date: Wed, 3 Apr 2024 05:49:31 +0000 Subject: [PATCH] Add checkpoint for FS backup deletion test As per PR #7281, if repository count is more than 1, then snapshots deletion is achieved with a fast way, then we should have more than 1 FS backup repository per backup. Signed-off-by: danfengl --- test/e2e/backups/deletion.go | 132 +++++++++++++++--------- test/e2e/privilegesmgmt/ssr.go | 10 +- test/util/common/common.go | 4 +- test/util/velero/velero_utils.go | 171 ++++++++++++++++++++++++++++--- 4 files changed, 252 insertions(+), 65 deletions(-) diff --git a/test/e2e/backups/deletion.go b/test/e2e/backups/deletion.go index 10a9e09c05..488283784c 100644 --- a/test/e2e/backups/deletion.go +++ b/test/e2e/backups/deletion.go @@ -19,6 +19,7 @@ import ( "context" "flag" "fmt" + "strings" "time" "github.com/google/uuid" @@ -33,8 +34,6 @@ import ( . "github.com/vmware-tanzu/velero/test/util/velero" ) -const deletionTest = "deletion-workload" - // Test backup and restore of Kibishi using restic func BackupDeletionWithSnapshots() { @@ -45,11 +44,7 @@ func BackupDeletionWithRestic() { backup_deletion_test(false) } func backup_deletion_test(useVolumeSnapshots bool) { - var ( - backupName string - veleroCfg VeleroConfig - ) - veleroCfg = VeleroCfg + veleroCfg := VeleroCfg veleroCfg.UseVolumeSnapshots = useVolumeSnapshots veleroCfg.UseNodeAgent = !useVolumeSnapshots @@ -76,16 +71,23 @@ func backup_deletion_test(useVolumeSnapshots bool) { When("kibishii is the sample workload", func() { It("Deleted backups are deleted from object storage and backups deleted from object storage can be deleted locally", func() { - backupName = "backup-" + UUIDgen.String() - Expect(runBackupDeletionTests(*veleroCfg.ClientToInstallVelero, veleroCfg, backupName, "", useVolumeSnapshots, veleroCfg.KibishiiDirectory)).To(Succeed(), + Expect(runBackupDeletionTests(*veleroCfg.ClientToInstallVelero, veleroCfg, "", useVolumeSnapshots, veleroCfg.KibishiiDirectory)).To(Succeed(), "Failed to run backup deletion test") }) }) } -// runBackupDeletionTests runs upgrade test on the provider by kibishii. -func runBackupDeletionTests(client TestClient, veleroCfg VeleroConfig, backupName, backupLocation string, +// runUpgradeTests runs upgrade test on the provider by kibishii. +func runBackupDeletionTests(client TestClient, veleroCfg VeleroConfig, backupLocation string, useVolumeSnapshots bool, kibishiiDirectory string) error { + var err error + var snapshotCheckPoint SnapshotCheckPoint + backupName := "backup-" + UUIDgen.String() + + workloadNamespaceList := []string{"backup-deletion-1-" + UUIDgen.String(), "backup-deletion-2-" + UUIDgen.String()} + nsCount := len(workloadNamespaceList) + workloadNamespaces := strings.Join(workloadNamespaceList[:], ",") + if useVolumeSnapshots && veleroCfg.CloudProvider == "kind" { Skip("Volume snapshots not supported on kind") } @@ -98,29 +100,30 @@ func runBackupDeletionTests(client TestClient, veleroCfg VeleroConfig, backupNam bslPrefix := veleroCfg.BSLPrefix bslConfig := veleroCfg.BSLConfig veleroFeatures := veleroCfg.Features - - if err := CreateNamespace(oneHourTimeout, client, deletionTest); err != nil { - return errors.Wrapf(err, "Failed to create namespace %s to install Kibishii workload", deletionTest) - } - if !veleroCfg.Debug { - defer func() { - if err := DeleteNamespace(context.Background(), client, deletionTest, true); err != nil { - fmt.Println(errors.Wrapf(err, "failed to delete the namespace %q", deletionTest)) - } - }() + for _, ns := range workloadNamespaceList { + if err := CreateNamespace(oneHourTimeout, client, ns); err != nil { + return errors.Wrapf(err, "Failed to create namespace %s to install Kibishii workload", ns) + } + if !veleroCfg.Debug { + defer func() { + if err := DeleteNamespace(context.Background(), client, ns, true); err != nil { + fmt.Println(errors.Wrapf(err, "failed to delete the namespace %q", ns)) + } + }() + } + if err := KibishiiPrepareBeforeBackup(oneHourTimeout, client, providerName, ns, + registryCredentialFile, veleroFeatures, kibishiiDirectory, useVolumeSnapshots, DefaultKibishiiData); err != nil { + return errors.Wrapf(err, "Failed to install and prepare data for kibishii %s", ns) + } + err := ObjectsShouldNotBeInBucket(veleroCfg.ObjectStoreProvider, veleroCfg.CloudCredentialsFile, veleroCfg.BSLBucket, veleroCfg.BSLPrefix, veleroCfg.BSLConfig, backupName, BackupObjectsPrefix, 1) + if err != nil { + return err + } } - if err := KibishiiPrepareBeforeBackup(oneHourTimeout, client, providerName, deletionTest, - registryCredentialFile, veleroFeatures, kibishiiDirectory, useVolumeSnapshots, DefaultKibishiiData); err != nil { - return errors.Wrapf(err, "Failed to install and prepare data for kibishii %s", deletionTest) - } - err := ObjectsShouldNotBeInBucket(veleroCfg.ObjectStoreProvider, veleroCfg.CloudCredentialsFile, veleroCfg.BSLBucket, veleroCfg.BSLPrefix, veleroCfg.BSLConfig, backupName, BackupObjectsPrefix, 1) - if err != nil { - return err - } var BackupCfg BackupConfig BackupCfg.BackupName = backupName - BackupCfg.Namespace = deletionTest + BackupCfg.Namespace = workloadNamespaces BackupCfg.BackupLocation = backupLocation BackupCfg.UseVolumeSnapshots = useVolumeSnapshots BackupCfg.DefaultVolumesToFsBackup = !useVolumeSnapshots @@ -133,34 +136,71 @@ func runBackupDeletionTests(client TestClient, veleroCfg VeleroConfig, backupNam return "Fail to backup workload" }) }) - - if providerName == Vsphere && useVolumeSnapshots { - // Wait for uploads started by the Velero Plugin for vSphere to complete - // TODO - remove after upload progress monitoring is implemented - fmt.Println("Waiting for vSphere uploads to complete") - if err := WaitForVSphereUploadCompletion(oneHourTimeout, time.Hour, deletionTest, 2); err != nil { - return errors.Wrapf(err, "Error waiting for uploads to complete") + for _, ns := range workloadNamespaceList { + if providerName == Vsphere && useVolumeSnapshots { + // Wait for uploads started by the Velero Plugin for vSphere to complete + // TODO - remove after upload progress monitoring is implemented + fmt.Println("Waiting for vSphere uploads to complete") + if err := WaitForVSphereUploadCompletion(oneHourTimeout, time.Hour, ns, DefaultKibishiiWorkerCounts); err != nil { + return errors.Wrapf(err, "Error waiting for uploads to complete") + } } } err = ObjectsShouldBeInBucket(veleroCfg.ObjectStoreProvider, veleroCfg.CloudCredentialsFile, veleroCfg.BSLBucket, bslPrefix, bslConfig, backupName, BackupObjectsPrefix) if err != nil { return err } - var snapshotCheckPoint SnapshotCheckPoint + if useVolumeSnapshots { - snapshotCheckPoint, err = GetSnapshotCheckPoint(client, veleroCfg, 2, deletionTest, backupName, KibishiiPVCNameList) - Expect(err).NotTo(HaveOccurred(), "Fail to get Azure CSI snapshot checkpoint") - err = SnapshotsShouldBeCreatedInCloud(veleroCfg.CloudProvider, - veleroCfg.CloudCredentialsFile, veleroCfg.BSLBucket, bslConfig, - backupName, snapshotCheckPoint) + // Check for snapshots existence + if veleroCfg.CloudProvider == Vsphere { + // For vSphere, checking snapshot should base on namespace and backup name + for _, ns := range workloadNamespaceList { + snapshotCheckPoint, err = GetSnapshotCheckPoint(client, veleroCfg, DefaultKibishiiWorkerCounts, ns, backupName, KibishiiPVCNameList) + Expect(err).NotTo(HaveOccurred(), "Fail to get Azure CSI snapshot checkpoint") + err = SnapshotsShouldBeCreatedInCloud(veleroCfg.CloudProvider, + veleroCfg.CloudCredentialsFile, veleroCfg.BSLBucket, bslConfig, + backupName, snapshotCheckPoint) + if err != nil { + return errors.Wrap(err, "exceed waiting for snapshot created in cloud") + } + } + } else { + // For public cloud, When using backup name to index VolumeSnapshotContents, make sure count of VolumeSnapshotContents should including PVs in all namespace + // so VolumeSnapshotContents count should be equal to "namespace count" * "Kibishii worker count per namespace". + snapshotCheckPoint, err = GetSnapshotCheckPoint(client, veleroCfg, DefaultKibishiiWorkerCounts*nsCount, "", backupName, KibishiiPVCNameList) + Expect(err).NotTo(HaveOccurred(), "Fail to get Azure CSI snapshot checkpoint") + + // Get all snapshots base on backup name, regardless of namespaces + err = SnapshotsShouldBeCreatedInCloud(veleroCfg.CloudProvider, + veleroCfg.CloudCredentialsFile, veleroCfg.BSLBucket, bslConfig, + backupName, snapshotCheckPoint) + if err != nil { + return errors.Wrap(err, "exceed waiting for snapshot created in cloud") + } + } + + } else { + // Check for BackupRepository and DeleteRequest + var brList, pvbList []string + brList, err = KubectlGetBackupRepository(oneHourTimeout, "kopia", veleroCfg.VeleroNamespace) if err != nil { - return errors.Wrap(err, "exceed waiting for snapshot created in cloud") + return err + } + pvbList, err = KubectlGetPodVolumeBackup(oneHourTimeout, BackupCfg.BackupName, veleroCfg.VeleroNamespace) + + fmt.Println(brList) + fmt.Println(pvbList) + if err != nil { + return err } } - err = DeleteBackupResource(context.Background(), backupName, &veleroCfg) + + err = DeleteBackup(context.Background(), backupName, &veleroCfg) if err != nil { return err } + if useVolumeSnapshots { err = SnapshotsShouldNotExistInCloud(veleroCfg.CloudProvider, veleroCfg.CloudCredentialsFile, veleroCfg.BSLBucket, veleroCfg.BSLConfig, @@ -207,7 +247,7 @@ func runBackupDeletionTests(client TestClient, veleroCfg VeleroConfig, backupNam return err } - err = DeleteBackupResource(context.Background(), backupName, &veleroCfg) + err = DeleteBackup(context.Background(), backupName, &veleroCfg) if err != nil { return errors.Wrapf(err, "|| UNEXPECTED || - Failed to delete backup %q", backupName) } else { diff --git a/test/e2e/privilegesmgmt/ssr.go b/test/e2e/privilegesmgmt/ssr.go index c5a47da707..d6e955bf54 100644 --- a/test/e2e/privilegesmgmt/ssr.go +++ b/test/e2e/privilegesmgmt/ssr.go @@ -25,6 +25,7 @@ import ( . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/util/wait" waitutil "k8s.io/apimachinery/pkg/util/wait" kbclient "sigs.k8s.io/controller-runtime/pkg/client" @@ -66,8 +67,8 @@ func SSRTest() { }) ssrListResp := new(v1.ServerStatusRequestList) By(fmt.Sprintf("Check ssr object in %s namespace", veleroCfg.VeleroNamespace)) - err = waitutil.PollImmediate(5*time.Second, time.Minute, - func() (bool, error) { + err = wait.PollUntilContextTimeout(context.Background(), 5*time.Second, time.Minute, true, + func(context.Context) (bool, error) { if err = veleroCfg.ClientToInstallVelero.Kubebuilder.List(ctx, ssrListResp, &kbclient.ListOptions{Namespace: veleroCfg.VeleroNamespace}); err != nil { return false, fmt.Errorf("failed to list ssr object in %s namespace with err %v", veleroCfg.VeleroNamespace, err) } @@ -85,9 +86,8 @@ func SSRTest() { } return true, nil }) - if err == waitutil.ErrWaitTimeout { - fmt.Printf("exceed test case deadline and failed to check ssr object in %s namespace", veleroCfg.VeleroNamespace) - } + fmt.Printf("exceed test case deadline and failed to check ssr object in %s namespace", veleroCfg.VeleroNamespace) + Expect(err).To(Succeed(), fmt.Sprintf("Failed to check ssr object in %s namespace", veleroCfg.VeleroNamespace)) By(fmt.Sprintf("Check ssr object in %s namespace", testNS)) diff --git a/test/util/common/common.go b/test/util/common/common.go index d014d2a09f..3b130f209a 100644 --- a/test/util/common/common.go +++ b/test/util/common/common.go @@ -20,11 +20,12 @@ func GetListByCmdPipes(ctx context.Context, cmdlines []*OsCommandLine) ([]string var buf bytes.Buffer var err error var cmds []*exec.Cmd + for _, cmdline := range cmdlines { cmd := exec.Command(cmdline.Cmd, cmdline.Args...) cmds = append(cmds, cmd) - fmt.Println(cmd) } + fmt.Println(cmds) for i := 0; i < len(cmds); i++ { if i == len(cmds)-1 { break @@ -55,7 +56,6 @@ func GetListByCmdPipes(ctx context.Context, cmdlines []*OsCommandLine) ([]string if err := scanner.Err(); err != nil { return nil, err } - return ret, nil } diff --git a/test/util/velero/velero_utils.go b/test/util/velero/velero_utils.go index ac55a7c776..d90ddab79c 100644 --- a/test/util/velero/velero_utils.go +++ b/test/util/velero/velero_utils.go @@ -929,7 +929,7 @@ func getVeleroCliTarball(cliTarballUrl string) (*os.File, error) { return tmpfile, nil } -func DeleteBackupResource(ctx context.Context, backupName string, velerocfg *VeleroConfig) error { +func DeleteBackup(ctx context.Context, backupName string, velerocfg *VeleroConfig) error { veleroCLI := velerocfg.VeleroCLI args := []string{"--namespace", velerocfg.VeleroNamespace, "backup", "delete", backupName, "--confirm"} @@ -945,20 +945,53 @@ func DeleteBackupResource(ctx context.Context, backupName string, velerocfg *Vel args = []string{"--namespace", velerocfg.VeleroNamespace, "backup", "get", backupName} - retryTimes := 5 - for i := 1; i < retryTimes+1; i++ { - cmd = exec.CommandContext(ctx, veleroCLI, args...) - fmt.Printf("Try %d times to delete backup %s \n", i, cmd.String()) - stdout, stderr, err = veleroexec.RunCommand(cmd) - if err != nil { - if strings.Contains(stderr, "not found") { - fmt.Printf("|| EXPECTED || - Backup %s was deleted successfully according to message %s\n", backupName, stderr) - return nil + err = wait.PollUntilContextTimeout(context.Background(), 5*time.Second, time.Minute, true, + func(context.Context) (bool, error) { + cmd = exec.CommandContext(ctx, veleroCLI, args...) + fmt.Printf("Try to get backup with cmd: %s \n", cmd.String()) + stdout, stderr, err = veleroexec.RunCommand(cmd) + if err != nil { + if strings.Contains(stderr, "not found") { + fmt.Printf("|| EXPECTED || - Backup %s was deleted successfully according to message %s\n", backupName, stderr) + return true, nil + } + return false, errors.Wrapf(err, "Fail to perform get backup, stdout=%s, stderr=%s", stdout, stderr) + } + + var status string + var drList []string + drList, err = KubectlGetAllDeleteBackupRequest(context.Background(), backupName, velerocfg.VeleroNamespace) + if len(drList) > 1 { + return false, errors.New(fmt.Sprintf("Count of DeleteBackupRequest %d is not expected", len(drList))) + } + + // Record DeleteBackupRequest status for debugging + for _, dr := range drList { + status, err = KubectlGetDeleteBackupRequestStatus(context.Background(), dr, velerocfg.VeleroNamespace) + fmt.Printf("DeleteBackupRequest status: %s\n", status) } - return errors.Wrapf(err, "Fail to perform get backup, stdout=%s, stderr=%s", stdout, stderr) + + return true, nil + }) + + // Waiting for completion of handling deleteBackupRequest CR + time.Sleep(1 * time.Minute) + + // Verify deleteBackupRequest are all gone because they are handled successfully + var drList []string + drList, err = KubectlGetAllDeleteBackupRequest(context.Background(), backupName, velerocfg.VeleroNamespace) + if len(drList) > 1 { + // Log deleteBackupRequest details for debug + for _, dr := range drList { + details, err := KubectlGetDeleteBackupRequestDetails(context.Background(), dr, velerocfg.VeleroNamespace) + if err != nil { + return errors.Wrapf(err, "fail to get DeleteBackupRequest %s details", dr) + } + fmt.Printf("Failed DeleteBackupRequest details: %s", details) } - time.Sleep(1 * time.Minute) + return errors.New(fmt.Sprintf("Count of DeleteBackupRequest %d is not expected", len(drList))) } + return nil } @@ -1635,3 +1668,117 @@ func CleanAllRetainedPV(ctx context.Context, client TestClient) { } } } + +func KubectlGetBackupRepository(ctx context.Context, uploaderType, veleroNamespace string) ([]string, error) { + args1 := []string{"get", "backuprepository", "-n", veleroNamespace} + + cmds := []*common.OsCommandLine{} + + cmd := &common.OsCommandLine{ + Cmd: "kubectl", + Args: args1, + } + cmds = append(cmds, cmd) + + cmd = &common.OsCommandLine{ + Cmd: "grep", + Args: []string{uploaderType}, + } + cmds = append(cmds, cmd) + + cmd = &common.OsCommandLine{ + Cmd: "awk", + Args: []string{"{print $1}"}, + } + cmds = append(cmds, cmd) + + return common.GetListByCmdPipes(ctx, cmds) +} + +func KubectlGetPodVolumeBackup(ctx context.Context, backupName, veleroNamespace string) ([]string, error) { + args1 := []string{"get", "podvolumebackup", "-n", veleroNamespace} + + cmds := []*common.OsCommandLine{} + + cmd := &common.OsCommandLine{ + Cmd: "kubectl", + Args: args1, + } + cmds = append(cmds, cmd) + + cmd = &common.OsCommandLine{ + Cmd: "grep", + Args: []string{backupName}, + } + cmds = append(cmds, cmd) + + cmd = &common.OsCommandLine{ + Cmd: "awk", + Args: []string{"{print $1}"}, + } + cmds = append(cmds, cmd) + + return common.GetListByCmdPipes(ctx, cmds) +} + +func KubectlGetDeleteBackupRequestDetails(ctx context.Context, deleteBackupRequest, veleroNamespace string) (string, error) { + cmd := exec.CommandContext(ctx, "kubectl", "get", "deletebackuprequests", "-n", veleroNamespace, deleteBackupRequest, "-o", "json") + fmt.Printf("Get DeleteBackupRequest details cmd =%v\n", cmd) + stdout, stderr, err := veleroexec.RunCommand(cmd) + if err != nil { + fmt.Print(stdout) + fmt.Print(stderr) + return "", errors.Wrap(err, fmt.Sprintf("failed to run command %s", cmd)) + } + return stdout, err +} +func KubectlGetDeleteBackupRequestStatus(ctx context.Context, deleteBackupRequest, veleroNamespace string) (string, error) { + args1 := []string{"get", "deletebackuprequests", "-n", veleroNamespace, deleteBackupRequest, "-o", "json"} + + cmds := []*common.OsCommandLine{} + + cmd := &common.OsCommandLine{ + Cmd: "kubectl", + Args: args1, + } + cmds = append(cmds, cmd) + + cmd = &common.OsCommandLine{ + Cmd: "jq", + Args: []string{"-r", ".status.phase"}, + } + cmds = append(cmds, cmd) + + ret, err := common.GetListByCmdPipes(ctx, cmds) + + if len(ret) != 1 { + return "", errors.New(fmt.Sprintf("fail to get status of deletebackuprequests %s", deleteBackupRequest)) + } + return ret[0], err +} + +func KubectlGetAllDeleteBackupRequest(ctx context.Context, backupName, veleroNamespace string) ([]string, error) { + args1 := []string{"get", "deletebackuprequests", "-n", veleroNamespace} + + cmds := []*common.OsCommandLine{} + + cmd := &common.OsCommandLine{ + Cmd: "kubectl", + Args: args1, + } + cmds = append(cmds, cmd) + + cmd = &common.OsCommandLine{ + Cmd: "grep", + Args: []string{backupName}, + } + cmds = append(cmds, cmd) + + cmd = &common.OsCommandLine{ + Cmd: "awk", + Args: []string{"{print $1}"}, + } + cmds = append(cmds, cmd) + + return common.GetListByCmdPipes(ctx, cmds) +}