From 8ea835419efe104ca8eb7c4d493daaa05b2a480c Mon Sep 17 00:00:00 2001 From: Darshan Sen Date: Sat, 18 Feb 2023 08:19:18 +0530 Subject: [PATCH] src: add initial support for single executable applications Compile a JavaScript file into a single executable application: ```console $ echo 'console.log(`Hello, ${process.argv[2]}!`);' > hello.js $ cp $(command -v node) hello $ npx postject hello NODE_JS_CODE hello.js \ --sentinel-fuse NODE_JS_FUSE_fce680ab2cc467b6e072b8b5df1996b2 $ npx postject hello NODE_JS_CODE hello.js \ --sentinel-fuse NODE_JS_FUSE_fce680ab2cc467b6e072b8b5df1996b2 \ --macho-segment-name NODE_JS $ ./hello world Hello, world! ``` Signed-off-by: Darshan Sen PR-URL: https://github.com/nodejs/node/pull/45038 Backport-PR-URL: https://github.com/nodejs/node/pull/47495 Reviewed-By: Anna Henningsen Reviewed-By: Michael Dawson Reviewed-By: Joyee Cheung Reviewed-By: Matteo Collina Reviewed-By: Colin Ihrig --- configure.py | 10 ++ doc/api/index.md | 1 + doc/api/single-executable-applications.md | 140 ++++++++++++++++++ ...g-single-executable-application-support.md | 81 ++++++++++ .../main/single_executable_application.js | 55 +++++++ node.gyp | 7 +- src/node.cc | 18 +++ src/node_binding.cc | 1 + src/node_external_reference.h | 1 + src/node_options.cc | 5 + src/node_sea.cc | 130 ++++++++++++++++ src/node_sea.h | 23 +++ test/fixtures/sea.js | 35 +++++ 13 files changed, 506 insertions(+), 1 deletion(-) create mode 100644 doc/api/single-executable-applications.md create mode 100644 doc/contributing/maintaining-single-executable-application-support.md create mode 100644 lib/internal/main/single_executable_application.js create mode 100644 src/node_sea.cc create mode 100644 src/node_sea.h create mode 100644 test/fixtures/sea.js diff --git a/configure.py b/configure.py index 92877f262af5ca..40e0395ebd2c3c 100755 --- a/configure.py +++ b/configure.py @@ -146,6 +146,12 @@ default=None, help='use on deprecated SunOS systems that do not support ifaddrs.h') +parser.add_argument('--disable-single-executable-application', + action='store_true', + dest='disable_single_executable_application', + default=None, + help='Disable Single Executable Application support.') + parser.add_argument("--fully-static", action="store_true", dest="fully_static", @@ -1402,6 +1408,10 @@ def configure_node(o): if options.no_ifaddrs: o['defines'] += ['SUNOS_NO_IFADDRS'] + o['variables']['single_executable_application'] = b(not options.disable_single_executable_application) + if options.disable_single_executable_application: + o['defines'] += ['DISABLE_SINGLE_EXECUTABLE_APPLICATION'] + # By default, enable ETW on Windows. if flavor == 'win': o['variables']['node_use_etw'] = b(not options.without_etw) diff --git a/doc/api/index.md b/doc/api/index.md index 9c35550f5daf81..81ef77491b1f1b 100644 --- a/doc/api/index.md +++ b/doc/api/index.md @@ -52,6 +52,7 @@ * [Readline](readline.md) * [REPL](repl.md) * [Report](report.md) +* [Single executable applications](single-executable-applications.md) * [Stream](stream.md) * [String decoder](string_decoder.md) * [Test runner](test.md) diff --git a/doc/api/single-executable-applications.md b/doc/api/single-executable-applications.md new file mode 100644 index 00000000000000..ef0604ce618f3e --- /dev/null +++ b/doc/api/single-executable-applications.md @@ -0,0 +1,140 @@ +# Single executable applications + + + +> Stability: 1 - Experimental: This feature is being designed and will change. + + + +This feature allows the distribution of a Node.js application conveniently to a +system that does not have Node.js installed. + +Node.js supports the creation of [single executable applications][] by allowing +the injection of a JavaScript file into the `node` binary. During start up, the +program checks if anything has been injected. If the script is found, it +executes its contents. Otherwise Node.js operates as it normally does. + +The single executable application feature only supports running a single +embedded [CommonJS][] file. + +A bundled JavaScript file can be turned into a single executable application +with any tool which can inject resources into the `node` binary. + +Here are the steps for creating a single executable application using one such +tool, [postject][]: + +1. Create a JavaScript file: + ```console + $ echo 'console.log(`Hello, ${process.argv[2]}!`);' > hello.js + ``` + +2. Create a copy of the `node` executable and name it according to your needs: + ```console + $ cp $(command -v node) hello + ``` + +3. Inject the JavaScript file into the copied binary by running `postject` with + the following options: + + * `hello` - The name of the copy of the `node` executable created in step 2. + * `NODE_JS_CODE` - The name of the resource / note / section in the binary + where the contents of the JavaScript file will be stored. + * `hello.js` - The name of the JavaScript file created in step 1. + * `--sentinel-fuse NODE_JS_FUSE_fce680ab2cc467b6e072b8b5df1996b2` - The + [fuse][] used by the Node.js project to detect if a file has been injected. + * `--macho-segment-name NODE_JS` (only needed on macOS) - The name of the + segment in the binary where the contents of the JavaScript file will be + stored. + + To summarize, here is the required command for each platform: + + * On systems other than macOS: + ```console + $ npx postject hello NODE_JS_CODE hello.js \ + --sentinel-fuse NODE_JS_FUSE_fce680ab2cc467b6e072b8b5df1996b2 + ``` + + * On macOS: + ```console + $ npx postject hello NODE_JS_CODE hello.js \ + --sentinel-fuse NODE_JS_FUSE_fce680ab2cc467b6e072b8b5df1996b2 \ + --macho-segment-name NODE_JS + ``` + +4. Run the binary: + ```console + $ ./hello world + Hello, world! + ``` + +## Notes + +### `require(id)` in the injected module is not file based + +`require()` in the injected module is not the same as the [`require()`][] +available to modules that are not injected. It also does not have any of the +properties that non-injected [`require()`][] has except [`require.main`][]. It +can only be used to load built-in modules. Attempting to load a module that can +only be found in the file system will throw an error. + +Instead of relying on a file based `require()`, users can bundle their +application into a standalone JavaScript file to inject into the executable. +This also ensures a more deterministic dependency graph. + +However, if a file based `require()` is still needed, that can also be achieved: + +```js +const { createRequire } = require('node:module'); +require = createRequire(__filename); +``` + +### `__filename` and `module.filename` in the injected module + +The values of `__filename` and `module.filename` in the injected module are +equal to [`process.execPath`][]. + +### `__dirname` in the injected module + +The value of `__dirname` in the injected module is equal to the directory name +of [`process.execPath`][]. + +### Single executable application creation process + +A tool aiming to create a single executable Node.js application must +inject the contents of a JavaScript file into: + +* a resource named `NODE_JS_CODE` if the `node` binary is a [PE][] file +* a section named `NODE_JS_CODE` in the `NODE_JS` segment if the `node` binary + is a [Mach-O][] file +* a note named `NODE_JS_CODE` if the `node` binary is an [ELF][] file + +Search the binary for the +`NODE_JS_FUSE_fce680ab2cc467b6e072b8b5df1996b2:0` [fuse][] string and flip the +last character to `1` to indicate that a resource has been injected. + +### Platform support + +Single-executable support is tested regularly on CI only on the following +platforms: + +* Windows +* macOS +* Linux (AMD64 only) + +This is due to a lack of better tools to generate single-executables that can be +used to test this feature on other platforms. + +Suggestions for other resource injection tools/workflows are welcomed. Please +start a discussion at +to help us document them. + +[CommonJS]: modules.md#modules-commonjs-modules +[ELF]: https://en.wikipedia.org/wiki/Executable_and_Linkable_Format +[Mach-O]: https://en.wikipedia.org/wiki/Mach-O +[PE]: https://en.wikipedia.org/wiki/Portable_Executable +[`process.execPath`]: process.md#processexecpath +[`require()`]: modules.md#requireid +[`require.main`]: modules.md#accessing-the-main-module +[fuse]: https://www.electronjs.org/docs/latest/tutorial/fuses +[postject]: https://github.com/nodejs/postject +[single executable applications]: https://github.com/nodejs/single-executable diff --git a/doc/contributing/maintaining-single-executable-application-support.md b/doc/contributing/maintaining-single-executable-application-support.md new file mode 100644 index 00000000000000..e3957230f3001e --- /dev/null +++ b/doc/contributing/maintaining-single-executable-application-support.md @@ -0,0 +1,81 @@ +# Maintaining Single Executable Applications support + +Support for [single executable applications][] is one of the key technical +priorities identified for the success of Node.js. + +## High level strategy + +From the [Next-10 discussions][] there are 2 approaches the project believes are +important to support: + +### Compile with Node.js into executable + +This is the approach followed by [boxednode][]. + +No additional code within the Node.js project is needed to support the +option of compiling a bundled application along with Node.js into a single +executable application. + +### Bundle into existing Node.js executable + +This is the approach followed by [pkg][]. + +The project does not plan to provide the complete solution but instead the key +elements which are required in the Node.js executable in order to enable +bundling with the pre-built Node.js binaries. This includes: + +* Looking for a segment within the executable that holds bundled code. +* Running the bundled code when such a segment is found. + +It is left up to external tools/solutions to: + +* Bundle code into a single script. +* Generate a command line with appropriate options. +* Add a segment to an existing Node.js executable which contains + the command line and appropriate headers. +* Re-generate or removing signatures on the resulting executable +* Provide a virtual file system, and hooking it in if needed to + support native modules or reading file contents. + +However, the project also maintains a separate tool, [postject][], for injecting +arbitrary read-only resources into the binary such as those needed for bundling +the application into the runtime. + +## Planning + +Planning for this feature takes place in the [single-executable repository][]. + +## Upcoming features + +Currently, only running a single embedded CommonJS file is supported but support +for the following features are in the list of work we'd like to get to: + +* Running an embedded ESM file. +* Running an archive of multiple files. +* Embedding [Node.js CLI options][] into the binary. +* [XCOFF][] executable format. +* Run tests on Linux architectures/distributions other than AMD64 Ubuntu. + +## Disabling single executable application support + +To disable single executable application support, build Node.js with the +`--disable-single-executable-application` configuration option. + +## Implementation + +When built with single executable application support, the Node.js process uses +[`postject-api.h`][] to check if the `NODE_JS_CODE` section exists in the +binary. If it is found, it passes the buffer to +[`single_executable_application.js`][], which executes the contents of the +embedded script. + +[Next-10 discussions]: https://github.com/nodejs/next-10/blob/main/meetings/summit-nov-2021.md#single-executable-applications +[Node.js CLI options]: https://nodejs.org/api/cli.html +[XCOFF]: https://www.ibm.com/docs/en/aix/7.2?topic=formats-xcoff-object-file-format +[`postject-api.h`]: https://github.com/nodejs/node/blob/71951a0e86da9253d7c422fa2520ee9143e557fa/test/fixtures/postject-copy/node_modules/postject/dist/postject-api.h +[`single_executable_application.js`]: https://github.com/nodejs/node/blob/main/lib/internal/main/single_executable_application.js +[boxednode]: https://github.com/mongodb-js/boxednode +[pkg]: https://github.com/vercel/pkg +[postject]: https://github.com/nodejs/postject +[single executable applications]: https://github.com/nodejs/node/blob/main/doc/contributing/technical-priorities.md#single-executable-applications +[single-executable repository]: https://github.com/nodejs/single-executable diff --git a/lib/internal/main/single_executable_application.js b/lib/internal/main/single_executable_application.js new file mode 100644 index 00000000000000..d9604cff720d2f --- /dev/null +++ b/lib/internal/main/single_executable_application.js @@ -0,0 +1,55 @@ +'use strict'; +const { + prepareMainThreadExecution, + markBootstrapComplete, +} = require('internal/process/pre_execution'); +const { getSingleExecutableCode } = internalBinding('sea'); +const { emitExperimentalWarning } = require('internal/util'); +const { Module, wrapSafe } = require('internal/modules/cjs/loader'); +const { codes: { ERR_UNKNOWN_BUILTIN_MODULE } } = require('internal/errors'); + +prepareMainThreadExecution(false, true); +markBootstrapComplete(); + +emitExperimentalWarning('Single executable application'); + +// This is roughly the same as: +// +// const mod = new Module(filename); +// mod._compile(contents, filename); +// +// but the code has been duplicated because currently there is no way to set the +// value of require.main to module. +// +// TODO(RaisinTen): Find a way to deduplicate this. + +const filename = process.execPath; +const contents = getSingleExecutableCode(); +const compiledWrapper = wrapSafe(filename, contents); + +const customModule = new Module(filename, null); +customModule.filename = filename; +customModule.paths = Module._nodeModulePaths(customModule.path); + +const customExports = customModule.exports; + +function customRequire(path) { + if (!Module.isBuiltin(path)) { + throw new ERR_UNKNOWN_BUILTIN_MODULE(path); + } + + return require(path); +} + +customRequire.main = customModule; + +const customFilename = customModule.filename; + +const customDirname = customModule.path; + +compiledWrapper( + customExports, + customRequire, + customModule, + customFilename, + customDirname); diff --git a/node.gyp b/node.gyp index 621b0f2bd34780..e7b0d968e9d4bf 100644 --- a/node.gyp +++ b/node.gyp @@ -153,7 +153,8 @@ 'include_dirs': [ 'src', - 'deps/v8/include' + 'deps/v8/include', + 'deps/postject' ], 'sources': [ @@ -458,6 +459,7 @@ 'include_dirs': [ 'src', + 'deps/postject', '<(SHARED_INTERMEDIATE_DIR)' # for node_natives.h ], 'dependencies': [ @@ -531,6 +533,7 @@ 'src/node_report.cc', 'src/node_report_module.cc', 'src/node_report_utils.cc', + 'src/node_sea.cc', 'src/node_serdes.cc', 'src/node_shadow_realm.cc', 'src/node_snapshotable.cc', @@ -641,6 +644,7 @@ 'src/node_report.h', 'src/node_revert.h', 'src/node_root_certs.h', + 'src/node_sea.h', 'src/node_shadow_realm.h', 'src/node_snapshotable.h', 'src/node_snapshot_builder.h', @@ -683,6 +687,7 @@ 'src/util-inl.h', # Dependency headers 'deps/v8/include/v8.h', + 'deps/postject/postject-api.h' # javascript files to make for an even more pleasant IDE experience '<@(library_files)', '<@(deps_files)', diff --git a/src/node.cc b/src/node.cc index e7724ba9c4155e..909de94b5d24b3 100644 --- a/src/node.cc +++ b/src/node.cc @@ -39,6 +39,7 @@ #include "node_realm-inl.h" #include "node_report.h" #include "node_revert.h" +#include "node_sea.h" #include "node_snapshot_builder.h" #include "node_v8_platform-inl.h" #include "node_version.h" @@ -126,6 +127,7 @@ #include #include +#include #include namespace node { @@ -321,6 +323,18 @@ MaybeLocal StartExecution(Environment* env, StartExecutionCallback cb) { first_argv = env->argv()[1]; } +#ifndef DISABLE_SINGLE_EXECUTABLE_APPLICATION + if (sea::IsSingleExecutable()) { + // TODO(addaleax): Find a way to reuse: + // + // LoadEnvironment(Environment*, const char*) + // + // instead and not add yet another main entry point here because this + // already duplicates existing code. + return StartExecution(env, "internal/main/single_executable_application"); + } +#endif + if (first_argv == "inspect") { return StartExecution(env, "internal/main/inspect"); } @@ -1187,6 +1201,10 @@ int LoadSnapshotDataAndRun(const SnapshotData** snapshot_data_ptr, } int Start(int argc, char** argv) { +#ifndef DISABLE_SINGLE_EXECUTABLE_APPLICATION + std::tie(argc, argv) = sea::FixupArgsForSEA(argc, argv); +#endif + CHECK_GT(argc, 0); // Hack around with the argv pointer. Used for process.title = "blah". diff --git a/src/node_binding.cc b/src/node_binding.cc index c7ae1c26fe2bba..60b5eea61cf053 100644 --- a/src/node_binding.cc +++ b/src/node_binding.cc @@ -68,6 +68,7 @@ V(process_wrap) \ V(process_methods) \ V(report) \ + V(sea) \ V(serdes) \ V(signal_wrap) \ V(spawn_sync) \ diff --git a/src/node_external_reference.h b/src/node_external_reference.h index c12fa8f0e2924b..954bb233e941f8 100644 --- a/src/node_external_reference.h +++ b/src/node_external_reference.h @@ -86,6 +86,7 @@ class ExternalReferenceRegistry { V(url) \ V(util) \ V(pipe_wrap) \ + V(sea) \ V(serdes) \ V(string_decoder) \ V(stream_wrap) \ diff --git a/src/node_options.cc b/src/node_options.cc index 6eabf78f1b67c8..6e156fa1ba448f 100644 --- a/src/node_options.cc +++ b/src/node_options.cc @@ -5,6 +5,7 @@ #include "node_binding.h" #include "node_external_reference.h" #include "node_internals.h" +#include "node_sea.h" #if HAVE_OPENSSL #include "openssl/opensslv.h" #endif @@ -308,6 +309,10 @@ void Parse( // TODO(addaleax): Make that unnecessary. DebugOptionsParser::DebugOptionsParser() { +#ifndef DISABLE_SINGLE_EXECUTABLE_APPLICATION + if (sea::IsSingleExecutable()) return; +#endif + AddOption("--inspect-port", "set host:port for inspector", &DebugOptions::host_port, diff --git a/src/node_sea.cc b/src/node_sea.cc new file mode 100644 index 00000000000000..18b661ce4ff31d --- /dev/null +++ b/src/node_sea.cc @@ -0,0 +1,130 @@ +#include "node_sea.h" + +#include "env-inl.h" +#include "node_external_reference.h" +#include "node_internals.h" +#include "node_union_bytes.h" +#include "simdutf.h" +#include "v8.h" + +// The POSTJECT_SENTINEL_FUSE macro is a string of random characters selected by +// the Node.js project that is present only once in the entire binary. It is +// used by the postject_has_resource() function to efficiently detect if a +// resource has been injected. See +// https://github.com/nodejs/postject/blob/35343439cac8c488f2596d7c4c1dddfec1fddcae/postject-api.h#L42-L45. +#define POSTJECT_SENTINEL_FUSE "NODE_JS_FUSE_fce680ab2cc467b6e072b8b5df1996b2" +#include "postject-api.h" +#undef POSTJECT_SENTINEL_FUSE + +#include +#include +#include +#include + +#if !defined(DISABLE_SINGLE_EXECUTABLE_APPLICATION) + +using v8::Context; +using v8::FunctionCallbackInfo; +using v8::Local; +using v8::Object; +using v8::Value; + +namespace { + +const std::string_view FindSingleExecutableCode() { + static const std::string_view sea_code = []() -> std::string_view { + size_t size; +#ifdef __APPLE__ + postject_options options; + postject_options_init(&options); + options.macho_segment_name = "NODE_JS"; + const char* code = static_cast( + postject_find_resource("NODE_JS_CODE", &size, &options)); +#else + const char* code = static_cast( + postject_find_resource("NODE_JS_CODE", &size, nullptr)); +#endif + return {code, size}; + }(); + return sea_code; +} + +void GetSingleExecutableCode(const FunctionCallbackInfo& args) { + node::Environment* env = node::Environment::GetCurrent(args); + + static const std::string_view sea_code = FindSingleExecutableCode(); + + if (sea_code.empty()) { + return; + } + + // TODO(joyeecheung): Use one-byte strings for ASCII-only source to save + // memory/binary size - using UTF16 by default results in twice of the size + // than necessary. + static const node::UnionBytes sea_code_union_bytes = + []() -> node::UnionBytes { + size_t expected_u16_length = + simdutf::utf16_length_from_utf8(sea_code.data(), sea_code.size()); + auto out = std::make_shared>(expected_u16_length); + size_t u16_length = simdutf::convert_utf8_to_utf16( + sea_code.data(), + sea_code.size(), + reinterpret_cast(out->data())); + out->resize(u16_length); + return node::UnionBytes{out}; + }(); + + args.GetReturnValue().Set( + sea_code_union_bytes.ToStringChecked(env->isolate())); +} + +} // namespace + +namespace node { +namespace sea { + +bool IsSingleExecutable() { + return postject_has_resource(); +} + +std::tuple FixupArgsForSEA(int argc, char** argv) { + // Repeats argv[0] at position 1 on argv as a replacement for the missing + // entry point file path. + if (IsSingleExecutable()) { + char** new_argv = new char*[argc + 2]; + int new_argc = 0; + new_argv[new_argc++] = argv[0]; + new_argv[new_argc++] = argv[0]; + + for (int i = 1; i < argc; ++i) { + new_argv[new_argc++] = argv[i]; + } + + new_argv[new_argc] = nullptr; + + argc = new_argc; + argv = new_argv; + } + + return {argc, argv}; +} + +void Initialize(Local target, + Local unused, + Local context, + void* priv) { + SetMethod( + context, target, "getSingleExecutableCode", GetSingleExecutableCode); +} + +void RegisterExternalReferences(ExternalReferenceRegistry* registry) { + registry->Register(GetSingleExecutableCode); +} + +} // namespace sea +} // namespace node + +NODE_BINDING_CONTEXT_AWARE_INTERNAL(sea, node::sea::Initialize) +NODE_BINDING_EXTERNAL_REFERENCE(sea, node::sea::RegisterExternalReferences) + +#endif // !defined(DISABLE_SINGLE_EXECUTABLE_APPLICATION) diff --git a/src/node_sea.h b/src/node_sea.h new file mode 100644 index 00000000000000..97bf0115e0f0d4 --- /dev/null +++ b/src/node_sea.h @@ -0,0 +1,23 @@ +#ifndef SRC_NODE_SEA_H_ +#define SRC_NODE_SEA_H_ + +#if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS + +#if !defined(DISABLE_SINGLE_EXECUTABLE_APPLICATION) + +#include + +namespace node { +namespace sea { + +bool IsSingleExecutable(); +std::tuple FixupArgsForSEA(int argc, char** argv); + +} // namespace sea +} // namespace node + +#endif // !defined(DISABLE_SINGLE_EXECUTABLE_APPLICATION) + +#endif // defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS + +#endif // SRC_NODE_SEA_H_ diff --git a/test/fixtures/sea.js b/test/fixtures/sea.js new file mode 100644 index 00000000000000..efdc32708b9898 --- /dev/null +++ b/test/fixtures/sea.js @@ -0,0 +1,35 @@ +const { Module: { createRequire } } = require('module'); +const createdRequire = createRequire(__filename); + +// Although, require('../common') works locally, that couldn't be used here +// because we set NODE_TEST_DIR=/Users/iojs/node-tmp on Jenkins CI. +const { expectWarning } = createdRequire(process.env.COMMON_DIRECTORY); + +expectWarning('ExperimentalWarning', + 'Single executable application is an experimental feature and ' + + 'might change at any time'); + +const { deepStrictEqual, strictEqual, throws } = require('assert'); +const { dirname } = require('path'); + +deepStrictEqual(process.argv, [process.execPath, process.execPath, '-a', '--b=c', 'd']); + +strictEqual(require.cache, undefined); +strictEqual(require.extensions, undefined); +strictEqual(require.main, module); +strictEqual(require.resolve, undefined); + +strictEqual(__filename, process.execPath); +strictEqual(__dirname, dirname(process.execPath)); +strictEqual(module.exports, exports); + +throws(() => require('./requirable.js'), { + code: 'ERR_UNKNOWN_BUILTIN_MODULE', +}); + +const requirable = createdRequire('./requirable.js'); +deepStrictEqual(requirable, { + hello: 'world', +}); + +console.log('Hello, world! 😊');