Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix the bug caused by fast api changes in v22.5.0 and have a post-mortem #53934

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion lib/fs.js
Original file line number Diff line number Diff line change
Expand Up @@ -515,7 +515,7 @@ function close(fd, callback = defaultCloseCallback) {
* @returns {void}
*/
function closeSync(fd) {
binding.close(fd);
binding.closeSync(fd);
}

/**
Expand Down
13 changes: 8 additions & 5 deletions lib/internal/fs/read/context.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@ const {

const { Buffer } = require('buffer');

const { FSReqCallback, close, read } = internalBinding('fs');
// It is important to not destructure the binding object, as it will
// cause the creation context to be undefined, and cause fast API calls
// to fail.
anonrig marked this conversation as resolved.
Show resolved Hide resolved
const binding = internalBinding('fs');

const {
AbortError,
Expand Down Expand Up @@ -102,11 +105,11 @@ class ReadFileContext {
length = MathMin(kReadFileBufferLength, this.size - this.pos);
}

const req = new FSReqCallback();
const req = new binding.FSReqCallback();
req.oncomplete = readFileAfterRead;
req.context = this;

read(this.fd, buffer, offset, length, -1, req);
binding.read(this.fd, buffer, offset, length, -1, req);
}

close(err) {
Expand All @@ -117,12 +120,12 @@ class ReadFileContext {
return;
}

const req = new FSReqCallback();
const req = new binding.FSReqCallback();
req.oncomplete = readFileAfterClose;
req.context = this;
this.err = err;

close(this.fd, req);
binding.close(this.fd, req);
}
}

Expand Down
23 changes: 20 additions & 3 deletions src/env.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1841,12 +1841,29 @@ void Environment::AddUnmanagedFd(int fd) {
}
}

void Environment::RemoveUnmanagedFd(int fd) {
void Environment::RemoveUnmanagedFd(int fd,
ProcessEmitScheduleType schedule_type) {
if (!tracks_unmanaged_fds()) return;
size_t removed_count = unmanaged_fds_.erase(fd);
if (removed_count == 0) {
ProcessEmitWarning(
this, "File descriptor %d closed but not opened in unmanaged mode", fd);
switch (schedule_type) {
case kSync:
ProcessEmitWarning(
this,
"File descriptor %d closed but not opened in unmanaged mode",
fd);
break;
case kSetImmediate:
SetImmediateThreadsafe([&](Environment* env) {
ProcessEmitWarning(
env,
"File descriptor %d closed but not opened in unmanaged mode",
fd);
});
break;
default:
UNREACHABLE();
}
}
}

Expand Down
7 changes: 6 additions & 1 deletion src/env.h
Original file line number Diff line number Diff line change
Expand Up @@ -1026,8 +1026,13 @@ class Environment : public MemoryRetainer {
uv_buf_t allocate_managed_buffer(const size_t suggested_size);
std::unique_ptr<v8::BackingStore> release_managed_buffer(const uv_buf_t& buf);

enum ProcessEmitScheduleType {
kSync,
kSetImmediate,
};

void AddUnmanagedFd(int fd);
void RemoveUnmanagedFd(int fd);
void RemoveUnmanagedFd(int fd, ProcessEmitScheduleType schedule_type = kSync);

template <typename T>
void ForEachRealm(T&& iterator) const;
Expand Down
4 changes: 4 additions & 0 deletions src/node_external_reference.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@ using CFunctionWithInt64Fallback = void (*)(v8::Local<v8::Value>,
const int64_t,
v8::FastApiCallbackOptions&);
using CFunctionWithBool = void (*)(v8::Local<v8::Value>, bool);
using CFunctionWithUint32AndOptions = void (*)(v8::Local<v8::Object>,
const uint32_t,
v8::FastApiCallbackOptions&);

// This class manages the external references from the V8 heap
// to the C++ addresses in Node.js.
Expand All @@ -79,6 +82,7 @@ class ExternalReferenceRegistry {
V(CFunctionWithDoubleReturnDouble) \
V(CFunctionWithInt64Fallback) \
V(CFunctionWithBool) \
V(CFunctionWithUint32AndOptions) \
V(const v8::CFunctionInfo*) \
V(v8::FunctionCallback) \
V(v8::AccessorNameGetterCallback) \
Expand Down
68 changes: 54 additions & 14 deletions src/node_file.cc
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ namespace fs {

using v8::Array;
using v8::BigInt;
using v8::CFunction;
using v8::Context;
using v8::EscapableHandleScope;
using v8::FastApiCallbackOptions;
Expand Down Expand Up @@ -971,32 +972,67 @@ void Access(const FunctionCallbackInfo<Value>& args) {
}
}

void Close(const FunctionCallbackInfo<Value>& args) {
static void Close(const FunctionCallbackInfo<Value>& args) {
Environment* env = Environment::GetCurrent(args);

const int argc = args.Length();
CHECK_GE(argc, 1);
CHECK_EQ(args.Length(), 2); // fd, req

int fd;
if (!GetValidatedFd(env, args[0]).To(&fd)) {
return;
}
env->RemoveUnmanagedFd(fd);

if (argc > 1) { // close(fd, req)
FSReqBase* req_wrap_async = GetReqWrap(args, 1);
CHECK_NOT_NULL(req_wrap_async);
FS_ASYNC_TRACE_BEGIN0(UV_FS_CLOSE, req_wrap_async)
AsyncCall(env, req_wrap_async, args, "close", UTF8, AfterNoArgs,
uv_fs_close, fd);
} else { // close(fd)
FSReqWrapSync req_wrap_sync("close");
FS_SYNC_TRACE_BEGIN(close);
SyncCallAndThrowOnError(env, &req_wrap_sync, uv_fs_close, fd);
FS_SYNC_TRACE_END(close);
FSReqBase* req_wrap_async = GetReqWrap(args, 1);
CHECK_NOT_NULL(req_wrap_async);
FS_ASYNC_TRACE_BEGIN0(UV_FS_CLOSE, req_wrap_async)
AsyncCall(
env, req_wrap_async, args, "close", UTF8, AfterNoArgs, uv_fs_close, fd);
}

// Separate implementations are required to provide fast API for closeSync.
// If both close and closeSync are implemented using the same function, and
// if a fast API implementation is added for closeSync, close(fd, req) will
// also trigger the fast API implementation and cause an incident.
// Ref: https://github.com/nodejs/node/issues/53902
static void CloseSync(const FunctionCallbackInfo<Value>& args) {
Environment* env = Environment::GetCurrent(args);
CHECK_EQ(args.Length(), 1);

int fd;
if (!GetValidatedFd(env, args[0]).To(&fd)) {
return;
}
env->RemoveUnmanagedFd(fd);
FSReqWrapSync req_wrap_sync("close");
FS_SYNC_TRACE_BEGIN(close);
SyncCallAndThrowOnError(env, &req_wrap_sync, uv_fs_close, fd);
FS_SYNC_TRACE_END(close);
}

static void FastCloseSync(Local<Object> recv,
const uint32_t fd,
// NOLINTNEXTLINE(runtime/references) This is V8 api.
v8::FastApiCallbackOptions& options) {
Environment* env = Environment::GetCurrent(recv->GetCreationContextChecked());
Copy link
Member

@devsnek devsnek Jul 19, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not 100% sure but i think you may need to declare a HandleScope using options.isolate prior to opening this context handle, otherwise its leaking into whatever ambient HandleScope is present up the stack. (also, Env is reused across all contexts in an Isolate right? if that's the case, maybe we can just store an env pointer on the isolate instead of reaching through the context? if i'm misremembering and this doesn't work please ignore me)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't use the latest V8 version to leverage options.isolate. Node 22 uses 12.4.254.21-node.15. Without options.isolate change landed on v8, I don't think there's much we can do.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Env is reused across all contexts in an Isolate right? if that's the case, maybe we can just store an env pointer on the isolate instead of reaching through the context?

That is not strictly maintained, though we try to keep it that way (e.g. see discussions in #47855).

I think we discussed using Isolate::SetData() somewhere else, though the issue is that there are only 4 slots, and it may be a problem for Node.js embedders that also are already using those slots.

Copy link
Member

@joyeecheung joyeecheung Jul 24, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Re. handle scopes, you can already get the isolate from the context and create a handle scope from that. I am not sure if that's really an issue, though - we almost never create handle scopes in our callbacks, it seems fine to assume that fast APIs work in the same way? Maybe it's still good to do for the sake of code hygine..

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we discussed using Isolate::SetData() somewhere else, though the issue is that there are only 4 slots, and it may be a problem for Node.js embedders that also are already using those slots.

We used to store IsolateData* there in the past: c3cd453

That's not quite Environment* (which, yeah, technically doesn't always need to be the same for a single Isolate* – if we ever wanted to productionize something like synchronous-worker, we could).

I agree that that's probably not ideal for embedders, and we should avoid re-entangling these bits if we can. If we need to pass something like an Environment* through, would something like a v8::External* or BaseObject* work?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually I am hitting the handle scope issue in #54384 so yes it is required - I used a fix with v8::Isolate::TryGetCurrent there since v22 is stuck with 12.4, on main which has 12.8 this can use the isolate from fallback options to create the handle scope.

anonrig marked this conversation as resolved.
Show resolved Hide resolved

uv_fs_t req;
FS_SYNC_TRACE_BEGIN(close);
int err = uv_fs_close(nullptr, &req, fd, nullptr);
FS_SYNC_TRACE_END(close);

if (is_uv_error(err)) {
options.fallback = true;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This means retrying, correct? In that case, quoting https://man7.org/linux/man-pages/man2/close.2.html: (emphasis mine)

Dealing with error returns from close()
A careful programmer will check the return value of close(), since it is quite possible that errors on a previous write(2) operation are reported only on the final close() that releases the open file description. Failing to check the return value when closing a file may lead to silent loss of data. This can especially be observed with NFS and with disk quota.

Note, however, that a failure return should be used only for diagnostic purposes (i.e., a warning to the application that there may still be I/O pending or there may have been failed I/O) or remedial purposes (e.g., writing the file once more or creating a backup).

Retrying the close() after a failure return is the wrong thing to do, since this may cause a reused file descriptor from another thread to be closed. This can occur because the Linux kernel always releases the file descriptor early in the close operation, freeing it for reuse; the steps that may return an error, such as flushing data to the filesystem or device, occur only later in the close operation.

Many other implementations similarly always close the file descriptor (except in the case of EBADF, meaning that the file descriptor was invalid) even if they subsequently report an error on return from close(). POSIX.1 is currently silent on this point, but there are plans to mandate this behavior in the next major release of the standard.

A careful programmer who wants to know about I/O errors may precede close() with a call to fsync(2).

The EINTR error is a somewhat special case. Regarding the EINTR error, POSIX.1-2008 says:

If close() is interrupted by a signal that is to be caught, it shall return -1 with errno set to EINTR and the state of fildes is unspecified.

This permits the behavior that occurs on Linux and many other implementations, where, as with other errors that may be reported by close(), the file descriptor is guaranteed to be closed. However, it also permits another possibility: that the implementation returns an EINTR error and keeps the file descriptor open. (According to its documentation, HP-UX's close() does this.) The caller must then once more use close() to close the file descriptor, to avoid file descriptor leaks. This divergence in implementation behaviors provides a difficult hurdle for portable applications, since on many implementations, close() must not be called again after an EINTR error, and on at least one, close() must be called again. There are plans to address this conundrum for the next major release of the POSIX.1 standard.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, options.fallback = true will retry on the slow path.

Retrying the close() after a failure return is the wrong thing to do, since this may cause a reused file descriptor from another thread to be closed. This can occur because the Linux kernel always releases the file descriptor early in the close operation, freeing it for reuse; the steps that may return an error, such as flushing data to the filesystem or device, occur only later in the close operation.

Does this still apply since this is a IO blocking operation? If it does apply, does this make this pull-request invalid?

A careful programmer who wants to know about I/O errors may precede close() with a call to fsync(2).

This might be a breaking change isn't it?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, options.fallback = true will retry on the slow path.

Ok – we can't really do that.

Retrying the close() after a failure return is the wrong thing to do, since this may cause a reused file descriptor from another thread to be closed. This can occur because the Linux kernel always releases the file descriptor early in the close operation, freeing it for reuse; the steps that may return an error, such as flushing data to the filesystem or device, occur only later in the close operation.

Does this still apply since this is a IO blocking operation? If it does apply, does this make this pull-request invalid?

Not really – it just means we can't use a fallback path here.

A careful programmer who wants to know about I/O errors may precede close() with a call to fsync(2).

This might be a breaking change isn't it?

It's not a suggestion that I'd implement inside of our fs.close() calls. If a user wants this, they can call our own fsync() wrapper.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

when isolate is available on FastApiCallbackOptions, you'll be able to raise an exception directly from the fast function. Maybe this should wait for that?

} else {
// Note: Only remove unmanaged fds if the close was successful.
// RemoveUnmanagedFd() can call ProcessEmitWarning() which calls back into
// JS process.emitWarning() and violates the fast API protocol.
env->RemoveUnmanagedFd(fd, Environment::kSetImmediate);
anonrig marked this conversation as resolved.
Show resolved Hide resolved
}
}

CFunction fast_close_sync_(CFunction::Make(FastCloseSync));

static void ExistsSync(const FunctionCallbackInfo<Value>& args) {
Environment* env = Environment::GetCurrent(args);
Isolate* isolate = env->isolate();
Expand Down Expand Up @@ -3547,6 +3583,7 @@ static void CreatePerIsolateProperties(IsolateData* isolate_data,
GetFormatOfExtensionlessFile);
SetMethod(isolate, target, "access", Access);
SetMethod(isolate, target, "close", Close);
SetFastMethod(isolate, target, "closeSync", CloseSync, &fast_close_sync_);
SetMethod(isolate, target, "existsSync", ExistsSync);
SetMethod(isolate, target, "open", Open);
SetMethod(isolate, target, "openFileHandle", OpenFileHandle);
Expand Down Expand Up @@ -3674,6 +3711,9 @@ void RegisterExternalReferences(ExternalReferenceRegistry* registry) {

registry->Register(GetFormatOfExtensionlessFile);
registry->Register(Close);
registry->Register(CloseSync);
registry->Register(FastCloseSync);
registry->Register(fast_close_sync_.GetTypeInfo());
registry->Register(ExistsSync);
registry->Register(Open);
registry->Register(OpenFileHandle);
Expand Down
31 changes: 31 additions & 0 deletions test/parallel/test-fs-close-fast-api.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
'use strict';

const common = require('../common');
const fs = require('fs');

// A large number is selected to ensure that the fast path is called.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it ensured that 100_000 is enough?
Any chance to really verify that fast API is actually used?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can try something similar to https://chromium.googlesource.com/v8/v8/+/e3e8ea5d657b886201d4e1982589a72f77909ba4/test/mjsunit/compiler/fast-api-helpers.js#7 though it needs to be paired with specific V8 optimizer flags and can be subject to changes as V8 changes their optimizing strategy.

Copy link
Member

@devsnek devsnek Jul 26, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mjsunit.js exists in the node tree, you should be able to load it into this test with --allow-natives-syntax and then you can use stuff like assertOptimized and it will continue to work as v8 is updated. You could also add counters or use the v8 cpu profiler to verify that the fast function was called.

// Ref: https://github.com/nodejs/node/issues/53902
for (let i = 0; i < 100_000; i++) {
try {
fs.closeSync(100);
} catch (e) {
// Ignore all EBADF errors.
// EBADF stands for "Bad file descriptor".
if (e.code !== 'EBADF') {
throw e;
}
}
}

// This test is to ensure that fs.close() does not crash under pressure.
for (let i = 0; i < 100_000; i++) {
fs.close(100, common.mustCall());
}

// This test is to ensure that fs.readFile() does not crash.
// readFile() calls fs.close() internally if the input is not a file descriptor.
// Large number is selected to ensure that the fast path is called.
// Ref: https://github.com/nodejs/node/issues/53902
for (let i = 0; i < 100_000; i++) {
fs.readFile(__filename, common.mustCall());
}
3 changes: 2 additions & 1 deletion typings/internalBinding/fs.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ declare namespace InternalFSBinding {
function chown(path: string, uid: number, gid: number): void;

function close(fd: number, req: FSReqCallback): void;
function close(fd: number): void;
function closeSync(fd: number): void;

function copyFile(src: StringOrBuffer, dest: StringOrBuffer, mode: number, req: FSReqCallback): void;
function copyFile(src: StringOrBuffer, dest: StringOrBuffer, mode: number, req: undefined, ctx: FSSyncContext): void;
Expand Down Expand Up @@ -258,6 +258,7 @@ export interface FsBinding {
chmod: typeof InternalFSBinding.chmod;
chown: typeof InternalFSBinding.chown;
close: typeof InternalFSBinding.close;
closeSync: typeof InternalFSBinding.closeSync;
copyFile: typeof InternalFSBinding.copyFile;
cpSyncCheckPaths: typeof InternalFSBinding.cpSyncCheckPaths;
fchmod: typeof InternalFSBinding.fchmod;
Expand Down
Loading