Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix complete MPU with missing parts #5691

Merged
merged 9 commits into from
Nov 18, 2024
55 changes: 45 additions & 10 deletions lib/api/apiUtils/object/abortMultipartUpload.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ const locationConstraintCheck = require('../object/locationConstraintCheck');
const { standardMetadataValidateBucketAndObj } =
require('../../../metadata/metadataUtils');
const services = require('../../../services');
const metadata = require('../../../metadata/wrapper');

function abortMultipartUpload(authInfo, bucketName, objectKey, uploadId, log,
callback, request) {
Expand All @@ -17,6 +18,8 @@ function abortMultipartUpload(authInfo, bucketName, objectKey, uploadId, log,
preciseRequestType: request.apiMethods || 'multipartDelete',
request,
};

log.debug('processing request', { method: 'abortMultipartUpload' });
// For validating the request at the destinationBucket level
// params are the same as validating at the MPU level
// but the requestType is the more general 'objectDelete'
Expand All @@ -27,8 +30,9 @@ function abortMultipartUpload(authInfo, bucketName, objectKey, uploadId, log,
async.waterfall([
function checkDestBucketVal(next) {
standardMetadataValidateBucketAndObj(metadataValParams, authzIdentityResult, log,
(err, destinationBucket) => {
(err, destinationBucket, objectMD) => {
if (err) {
log.error('error validating request', { error: err });
return next(err, destinationBucket);
}
if (destinationBucket.policies) {
Expand All @@ -41,20 +45,21 @@ function abortMultipartUpload(authInfo, bucketName, objectKey, uploadId, log,
metadataValMPUparams.requestType =
'bucketPolicyGoAhead';
}
return next(null, destinationBucket);
return next(null, destinationBucket, objectMD);
});
},
function checkMPUval(destBucket, next) {
function checkMPUval(destBucket, objectMD, next) {
metadataValParams.log = log;
services.metadataValidateMultipart(metadataValParams,
(err, mpuBucket, mpuOverviewObj) => {
if (err) {
log.error('error validating multipart', { error: err });
return next(err, destBucket);
}
return next(err, mpuBucket, mpuOverviewObj, destBucket);
return next(err, mpuBucket, mpuOverviewObj, destBucket, objectMD);
});
},
function abortExternalMpu(mpuBucket, mpuOverviewObj, destBucket,
function abortExternalMpu(mpuBucket, mpuOverviewObj, destBucket, objectMD,
next) {
const location = mpuOverviewObj.controllingLocationConstraint;
const originalIdentityAuthzResults = request.actionImplicitDenies;
Expand All @@ -66,26 +71,49 @@ function abortMultipartUpload(authInfo, bucketName, objectKey, uploadId, log,
// eslint-disable-next-line no-param-reassign
request.actionImplicitDenies = originalIdentityAuthzResults;
if (err) {
log.error('error aborting MPU', { error: err });
return next(err, destBucket);
}
// for Azure and GCP we do not need to delete data
// for all other backends, skipDataDelete will be set to false
return next(null, mpuBucket, destBucket, skipDataDelete);
return next(null, mpuBucket, destBucket, objectMD, skipDataDelete);
});
},
function getPartLocations(mpuBucket, destBucket, skipDataDelete,
function getPartLocations(mpuBucket, destBucket, objectMD, skipDataDelete,
next) {
services.getMPUparts(mpuBucket.getName(), uploadId, log,
(err, result) => {
if (err) {
log.error('error getting parts', { error: err });
return next(err, destBucket);
}
const storedParts = result.Contents;
return next(null, mpuBucket, storedParts, destBucket,
return next(null, mpuBucket, storedParts, destBucket, objectMD,
skipDataDelete);
});
},
function deleteData(mpuBucket, storedParts, destBucket,
function deleteObjectMetadata(mpuBucket, storedParts, destBucket, objectMD, skipDataDelete, next) {
if (!objectMD || metadataValMPUparams.uploadId !== objectMD.uploadId) {
francoisferrand marked this conversation as resolved.
Show resolved Hide resolved
return next(null, mpuBucket, storedParts, destBucket, objectMD, skipDataDelete);
}
// In case there has been an error during cleanup after a complete MPU
// (e.g. failure to delete MPU MD in shadow bucket),
// we need to ensure that the MPU metadata is deleted.
log.info('Object has existing metadata, deleting them', {
KillianG marked this conversation as resolved.
Show resolved Hide resolved
method: 'abortMultipartUpload',
bucketName,
objectKey,
KillianG marked this conversation as resolved.
Show resolved Hide resolved
uploadId,
versionId: objectMD.versionId
KillianG marked this conversation as resolved.
Show resolved Hide resolved
});
return metadata.deleteObjectMD(bucketName, objectKey, { versionId: objectMD.versionId }, log, err => {
if (err) {
log.error('error deleting object metadata', { error: err });
}
return next(err, mpuBucket, storedParts, destBucket, objectMD, skipDataDelete);
});
},
function deleteData(mpuBucket, storedParts, destBucket, objectMD,
skipDataDelete, next) {
if (skipDataDelete) {
return next(null, mpuBucket, storedParts, destBucket);
Expand All @@ -98,6 +126,13 @@ function abortMultipartUpload(authInfo, bucketName, objectKey, uploadId, log,
}
// flatten the array
locations = [].concat(...locations);

if (objectMD?.location) {
const objectLocationLeft = objectMD.location.filter(loc =>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: "left" looks like the directoin, maybe best to rename to something more intuitive, maybe remainingObjectLocations

more importantly, this can be quite CPU intensive for large objects (n^2 complexity, with n up to 10000): need to make this more efficient...

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe a more optimized approach for this would be to use a map ?

if (objectMD?.location) {
    const locationMap = new Map(locations.map(loc => [loc.key, true]));
    const objectLocationLeft = objectMD.location.filter(loc => !locationMap.has(loc.key));
    locations = locations.concat(objectLocationLeft);
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

since we are building a list of locations, maybe we could directly use a map instead (and pass it to async.eachLimit directly, to process each "pair"?)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pushed a more optimized version here
e9ae9c2

FOr locations being a Set directly, I can do it, but as we'll need to map over it anyway for getting keys, we'll end up needing an array (as map function does not exist for Sets)

!locations.some(existingLoc => existingLoc.key === loc.key));
locations = locations.concat(objectLocationLeft);
}

return async.eachLimit(locations, 5, (loc, cb) => {
data.delete(loc, log, err => {
if (err) {
Expand All @@ -107,7 +142,7 @@ function abortMultipartUpload(authInfo, bucketName, objectKey, uploadId, log,
});
}, () => next(null, mpuBucket, storedParts, destBucket));
},
function deleteMetadata(mpuBucket, storedParts, destBucket, next) {
function deleteShadowObjectMetadata(mpuBucket, storedParts, destBucket, next) {
let splitter = constants.splitter;
// BACKWARD: Remove to remove the old splitter
if (mpuBucket.getMdBucketModelVersion() < 2) {
Expand Down
11 changes: 11 additions & 0 deletions lib/api/completeMultipartUpload.js
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ function completeMultipartUpload(authInfo, request, log, callback) {
return services.metadataValidateMultipart(metadataValParams,
(err, mpuBucket, mpuOverview, storedMetadata) => {
if (err) {
log.error('error validating request', { error: err });
return next(err, destBucket);
}
return next(null, destBucket, objMD, mpuBucket,
Expand All @@ -172,6 +173,7 @@ function completeMultipartUpload(authInfo, request, log, callback) {
if (request.post) {
return parseXml(request.post, (err, jsonList) => {
if (err) {
log.error('error parsing XML', { error: err });
return next(err, destBucket);
}
return next(null, destBucket, objMD, mpuBucket,
Expand All @@ -190,6 +192,12 @@ function completeMultipartUpload(authInfo, request, log, callback) {
storedMetadata,
}, log, err => {
if (err) {
log.error('error marking MPU object for completion', {
bucketName: mpuBucket.getName(),
objectKey,
uploadId,
error: err,
});
return next(err);
}
return next(null, destBucket, objMD, mpuBucket,
Expand All @@ -201,6 +209,7 @@ function completeMultipartUpload(authInfo, request, log, callback) {
return services.getMPUparts(mpuBucket.getName(), uploadId, log,
(err, result) => {
if (err) {
log.error('error getting parts', { error: err });
return next(err, destBucket);
}
const storedParts = result.Contents;
Expand All @@ -222,6 +231,7 @@ function completeMultipartUpload(authInfo, request, log, callback) {
// eslint-disable-next-line no-param-reassign
request.actionImplicitDenies = originalIdentityImpDenies;
if (err) {
log.error('error completing MPU externally', { error: err });
return next(err, destBucket);
}
// if mpu not handled externally, completeObjData will be null
Expand Down Expand Up @@ -452,6 +462,7 @@ function completeMultipartUpload(authInfo, request, log, callback) {
dataLocations, pseudoCipherBundle, metaStoreParams,
(err, res) => {
if (err) {
log.error('error storing object metadata', { error: err });
return next(err, destinationBucket);
}

Expand Down
Loading
Loading