-
Notifications
You must be signed in to change notification settings - Fork 30.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
lib: fix fs.readdir recursive async #56041
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1369,6 +1369,102 @@ function mkdirSync(path, options) { | |
} | ||
} | ||
|
||
/* | ||
* An recursive algorithm for reading the entire contents of the `basePath` directory. | ||
* This function does not validate `basePath` as a directory. It is passed directly to | ||
* `binding.readdir`. | ||
* @param {string} basePath | ||
* @param {{ encoding: string, withFileTypes: boolean }} options | ||
* @param {( | ||
* err?: Error, | ||
* files?: string[] | Buffer[] | Dirent[] | ||
* ) => any} callback | ||
* @returns {void} | ||
*/ | ||
function readdirRecursive(basePath, options, callback) { | ||
const context = { | ||
withFileTypes: Boolean(options.withFileTypes), | ||
encoding: options.encoding, | ||
basePath, | ||
readdirResults: [], | ||
pathsQueue: [basePath], | ||
}; | ||
|
||
let i = 0; | ||
|
||
function read(path) { | ||
const req = new FSReqCallback(); | ||
req.oncomplete = (err, result) => { | ||
if (err) { | ||
callback(err); | ||
return; | ||
} | ||
|
||
if (result === undefined) { | ||
callback(null, context.readdirResults); | ||
return; | ||
} | ||
|
||
processReaddirResult({ | ||
result, | ||
currentPath: path, | ||
context, | ||
}); | ||
|
||
if (i < context.pathsQueue.length) { | ||
read(context.pathsQueue[i++]); | ||
} else { | ||
callback(null, context.readdirResults); | ||
} | ||
}; | ||
|
||
binding.readdir( | ||
path, | ||
context.encoding, | ||
context.withFileTypes, | ||
req, | ||
); | ||
} | ||
|
||
read(context.pathsQueue[i++]); | ||
} | ||
|
||
// Calling `readdir` with `withFileTypes=true`, the result is an array of arrays. | ||
// The first array is the names, and the second array is the types. | ||
// They are guaranteed to be the same length; hence, setting `length` to the length | ||
// of the first array within the result. | ||
const processReaddirResult = (context) => (context.withFileTypes ? handleDirents(context) : handleFilePaths(context)); | ||
|
||
function handleDirents({ result, currentPath, context }) { | ||
const { 0: names, 1: types } = result; | ||
const { length } = names; | ||
|
||
for (let i = 0; i < length; i++) { | ||
// Avoid excluding symlinks, as they are not directories. | ||
// Refs: https://github.com/nodejs/node/issues/52663 | ||
const fullPath = pathModule.join(currentPath, names[i]); | ||
const dirent = getDirent(currentPath, names[i], types[i]); | ||
ArrayPrototypePush(context.readdirResults, dirent); | ||
|
||
if (dirent.isDirectory() || binding.internalModuleStat(binding, fullPath) === 1) { | ||
ArrayPrototypePush(context.pathsQueue, fullPath); | ||
} | ||
} | ||
} | ||
|
||
function handleFilePaths({ result, currentPath, context }) { | ||
for (let i = 0; i < result.length; i++) { | ||
const resultPath = pathModule.join(currentPath, result[i]); | ||
const relativeResultPath = pathModule.relative(context.basePath, resultPath); | ||
const stat = binding.internalModuleStat(binding, resultPath); | ||
ArrayPrototypePush(context.readdirResults, relativeResultPath); | ||
|
||
if (stat === 1) { | ||
ArrayPrototypePush(context.pathsQueue, resultPath); | ||
} | ||
} | ||
} | ||
|
||
/** | ||
* An iterative algorithm for reading the entire contents of the `basePath` directory. | ||
* This function does not validate `basePath` as a directory. It is passed directly to | ||
|
@@ -1378,58 +1474,37 @@ function mkdirSync(path, options) { | |
* @returns {string[] | Dirent[]} | ||
*/ | ||
function readdirSyncRecursive(basePath, options) { | ||
const withFileTypes = Boolean(options.withFileTypes); | ||
const encoding = options.encoding; | ||
|
||
const readdirResults = []; | ||
const pathsQueue = [basePath]; | ||
const context = { | ||
withFileTypes: Boolean(options.withFileTypes), | ||
encoding: options.encoding, | ||
basePath, | ||
readdirResults: [], | ||
pathsQueue: [basePath], | ||
}; | ||
|
||
function read(path) { | ||
const readdirResult = binding.readdir( | ||
path, | ||
encoding, | ||
withFileTypes, | ||
context.encoding, | ||
context.withFileTypes, | ||
); | ||
|
||
if (readdirResult === undefined) { | ||
return; | ||
} | ||
|
||
if (withFileTypes) { | ||
// Calling `readdir` with `withFileTypes=true`, the result is an array of arrays. | ||
// The first array is the names, and the second array is the types. | ||
// They are guaranteed to be the same length; hence, setting `length` to the length | ||
// of the first array within the result. | ||
const length = readdirResult[0].length; | ||
for (let i = 0; i < length; i++) { | ||
// Avoid excluding symlinks, as they are not directories. | ||
// Refs: https://github.com/nodejs/node/issues/52663 | ||
const stat = binding.internalModuleStat(binding, pathModule.join(path, readdirResult[0][i])); | ||
const dirent = getDirent(path, readdirResult[0][i], readdirResult[1][i]); | ||
ArrayPrototypePush(readdirResults, dirent); | ||
if (dirent.isDirectory() || stat === 1) { | ||
ArrayPrototypePush(pathsQueue, pathModule.join(dirent.parentPath, dirent.name)); | ||
} | ||
} | ||
} else { | ||
for (let i = 0; i < readdirResult.length; i++) { | ||
const resultPath = pathModule.join(path, readdirResult[i]); | ||
const relativeResultPath = pathModule.relative(basePath, resultPath); | ||
const stat = binding.internalModuleStat(binding, resultPath); | ||
ArrayPrototypePush(readdirResults, relativeResultPath); | ||
// 1 indicates directory | ||
if (stat === 1) { | ||
ArrayPrototypePush(pathsQueue, resultPath); | ||
} | ||
} | ||
} | ||
processReaddirResult({ | ||
result: readdirResult, | ||
currentPath: path, | ||
context, | ||
}); | ||
} | ||
|
||
for (let i = 0; i < pathsQueue.length; i++) { | ||
read(pathsQueue[i]); | ||
for (let i = 0; i < context.pathsQueue.length; i++) { | ||
read(context.pathsQueue[i]); | ||
} | ||
Comment on lines
+1503
to
1505
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I know this is somewhat out of scope of the PR, I just wondered if we want to limit the amount of reads triggered to an upper bound of available file descriptors? That way we would not allocate anything before it's needed and it would probably reduce memory and increase performance a tad. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, that makes sense. But I wouldn't include it on this PR as it just fixes an async behavior. Currently, -> readdir -> cb -> readdir -> cb ... But at some point, we could read from parallel folders within the same callback, which would improve performance significantly (memory would increase, though). |
||
|
||
return readdirResults; | ||
return context.readdirResults; | ||
} | ||
|
||
/** | ||
|
@@ -1455,7 +1530,7 @@ function readdir(path, options, callback) { | |
} | ||
|
||
if (options.recursive) { | ||
callback(null, readdirSyncRecursive(path, options)); | ||
readdirRecursive(path, options, callback); | ||
return; | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would you mind adding jsdoc to all newly/updated functions?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I haven't added jsdoc to these ones because they are not public, they are used inside
readdirRecursive
(which has JSDoc). It seems a pattern on this file.