Skip to content

Commit

Permalink
fix(purge): retry harder on delete 503 errors
Browse files Browse the repository at this point in the history
Signed-off-by: Frederic BIDON <[email protected]>
  • Loading branch information
fredbi committed Jan 27, 2023
1 parent e2194c3 commit 1e13ea1
Show file tree
Hide file tree
Showing 14 changed files with 46 additions and 25 deletions.
2 changes: 1 addition & 1 deletion k8s/purge/build-index/values.default.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
image:
repository: gcr.io/onec-co/datamon
tag: v2.6.7
tag: v2.6.9
pullPolicy: Always

serviceAccountName: flood
Expand Down
2 changes: 1 addition & 1 deletion k8s/purge/build-index/values.dev.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
image:
repository: gcr.io/onec-co/datamon
tag: v2.6.7
tag: v2.6.9
pullPolicy: Always

serviceAccountName: flood
Expand Down
2 changes: 1 addition & 1 deletion k8s/purge/build-index/values.prod.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
image:
repository: gcr.io/onec-co/datamon
tag: v2.6.7
tag: v2.6.9
pullPolicy: Always

serviceAccountName: flood
Expand Down
2 changes: 1 addition & 1 deletion k8s/purge/build-index/values.staging.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
image:
repository: gcr.io/onec-co/datamon
tag: v2.6.7
tag: v2.6.9
pullPolicy: Always

serviceAccountName: flood
Expand Down
2 changes: 1 addition & 1 deletion k8s/purge/delete-repo/values.default.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
image:
repository: gcr.io/onec-co/datamon
tag: v2.6.5
tag: v2.6.9
pullPolicy: Always

serviceAccountName: flood
Expand Down
2 changes: 1 addition & 1 deletion k8s/purge/delete-repo/values.dev.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
image:
repository: gcr.io/onec-co/datamon
tag: v2.6.6
tag: v2.6.9
pullPolicy: Always

serviceAccountName: flood
Expand Down
2 changes: 1 addition & 1 deletion k8s/purge/delete-repo/values.prod.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
image:
repository: gcr.io/onec-co/datamon
tag: v2.6.7
tag: v2.6.9
pullPolicy: Always

serviceAccountName: flood
Expand Down
2 changes: 1 addition & 1 deletion k8s/purge/delete-repo/values.staging.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
image:
repository: gcr.io/onec-co/datamon
tag: v2.6.6
tag: v2.6.9
pullPolicy: Always

serviceAccountName: flood
Expand Down
4 changes: 2 additions & 2 deletions k8s/purge/delete-unused/values.default.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
image:
repository: gcr.io/onec-co/datamon
tag: v2.6.8
tag: v2.6.9
pullPolicy: Always

serviceAccountName: flood
Expand All @@ -18,7 +18,7 @@ config: |
command: delete-unused
force: false
dryrun: false
concurrency: 300
concurrency: 150 # we have to tame concurrency, or delete operations fail with "googleapi: Error 503: We encountered an internal error. Please try again., backendError"

# the size of the volume claim to attach as download staging
stagingSize: 36Gi
Expand Down
2 changes: 1 addition & 1 deletion k8s/purge/squash/values.default.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
image:
repository: gcr.io/onec-co/datamon
tag: v2.6.6
tag: v2.6.9
pullPolicy: Always

serviceAccountName: flood
Expand Down
2 changes: 1 addition & 1 deletion k8s/purge/squash/values.dev.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
image:
repository: gcr.io/onec-co/datamon
tag: v2.6.6
tag: v2.6.9
pullPolicy: Always

serviceAccountName: flood
Expand Down
2 changes: 1 addition & 1 deletion k8s/purge/squash/values.prod.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
image:
repository: gcr.io/onec-co/datamon
tag: v2.6.6
tag: v2.6.9
pullPolicy: Always

serviceAccountName: flood
Expand Down
2 changes: 1 addition & 1 deletion k8s/purge/squash/values.staging.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
image:
repository: gcr.io/onec-co/datamon
tag: v2.6.6
tag: v2.6.9
pullPolicy: Always

serviceAccountName: flood
Expand Down
43 changes: 32 additions & 11 deletions pkg/core/purge.go
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,15 @@ func defaultBackoff() backoff.BackOff {
return withRetry
}

// insistentBackoff will try with up to 5 min delay between retries
func insistantBackoff() backoff.BackOff {
withRetry := backoff.NewExponentialBackOff()
withRetry.MaxElapsedTime = 600 * time.Second
withRetry.Reset()

return withRetry
}

func bundleKeys(ctx context.Context, b *Bundle, size uint32, db kvStore, logger *zap.Logger) ([]string, error) {
if err := backoff.Retry(func() error {
return unpackBundleFileList(ctx, b, false, defaultBundleEntriesPerFile)
Expand Down Expand Up @@ -754,12 +763,20 @@ func checkAndDeleteKey(ctx context.Context,
return err
}

// key not found
attrs, err := blob.GetAttr(ctx, key)
if err != nil {
logger.Error("retrieving blob attributes", zap.Error(err))
// key not found in index
var attrs storage.Attributes
if err = backoff.Retry(func() error {
var e error
attrs, e = blob.GetAttr(ctx, key)
if !errors.Is(e, status.ErrNotExists) {
return err
}

return err
return nil
},
backoff.WithContext(insistantBackoff(), ctx),
); err != nil {
logger.Error("retrieving blob attributes", zap.Error(err))
}

// the blob has been created after the index: skip
Expand All @@ -780,17 +797,21 @@ func checkAndDeleteKey(ctx context.Context,
}

// proceed with deletion from the blob store
return backoff.Retry(func() error {
if err := blob.Delete(ctx, key); err != nil {
logger.Error("deleting blob", zap.Error(err))

if err = backoff.Retry(func() error {
e := blob.Delete(ctx, key)
if !errors.Is(e, status.ErrNotExists) {
return err
}
// under high pressure, google API often fails with: "googleapi: Error 503: We encountered an internal error. Please try again., backendError"

return nil
},
backoff.WithContext(defaultBackoff(), ctx),
)
backoff.WithContext(insistantBackoff(), ctx),
); err != nil {
logger.Error("deleting blob", zap.Error(err))
}

return nil
}

// copyIndexChunks iterates over all index chunks and loads the keys in the local KV store.
Expand Down

0 comments on commit 1e13ea1

Please sign in to comment.