From 43ddf7631e8660b8a672eca5ee13484d86507812 Mon Sep 17 00:00:00 2001 From: Joyee Cheung Date: Sat, 11 Feb 2023 17:48:17 +0100 Subject: [PATCH 1/2] benchmark: split `Buffer.byteLength` benchmark PR-URL: https://github.com/nodejs/node/pull/46616 Reviewed-By: Anna Henningsen Reviewed-By: Robert Nagy Reviewed-By: Yagiz Nizipli Reviewed-By: James M Snell --- benchmark/buffers/buffer-bytelength-buffer.js | 22 +++++++++ benchmark/buffers/buffer-bytelength-string.js | 40 +++++++++++++++ benchmark/buffers/buffer-bytelength.js | 49 ------------------- 3 files changed, 62 insertions(+), 49 deletions(-) create mode 100644 benchmark/buffers/buffer-bytelength-buffer.js create mode 100644 benchmark/buffers/buffer-bytelength-string.js delete mode 100644 benchmark/buffers/buffer-bytelength.js diff --git a/benchmark/buffers/buffer-bytelength-buffer.js b/benchmark/buffers/buffer-bytelength-buffer.js new file mode 100644 index 00000000000000..756a6b8db73b42 --- /dev/null +++ b/benchmark/buffers/buffer-bytelength-buffer.js @@ -0,0 +1,22 @@ +'use strict'; +const common = require('../common'); + +const bench = common.createBenchmark(main, { + len: [2, 16, 256], // x16 + n: [4e6], +}); + +function main({ n, len }) { + const data = Buffer.alloc(len * 16, 'a'); + const expected = Buffer.byteLength(data, 'buffer'); + let changed = false; + bench.start(); + for (let i = 0; i < n; i++) { + const actual = Buffer.byteLength(data, 'buffer'); + if (expected !== actual) { changed = true; } + } + bench.end(n); + if (changed) { + throw new Error('Result changed during iteration'); + } +} diff --git a/benchmark/buffers/buffer-bytelength-string.js b/benchmark/buffers/buffer-bytelength-string.js new file mode 100644 index 00000000000000..fc0c005e7f9e6a --- /dev/null +++ b/benchmark/buffers/buffer-bytelength-string.js @@ -0,0 +1,40 @@ +'use strict'; +const common = require('../common'); + +const bench = common.createBenchmark(main, { + type: ['one_byte', 'two_bytes', 'three_bytes', 'four_bytes'], + encoding: ['utf8', 'base64'], + repeat: [1, 2, 16, 256], // x16 + n: [4e6], +}); + +// 16 chars each +const chars = { + one_byte: 'hello brendan!!!', + two_bytes: 'ΰαβγδεζηθικλμνξο', + three_bytes: '挰挱挲挳挴挵挶挷挸挹挺挻挼挽挾挿', + four_bytes: '𠜎𠜱𠝹𠱓𠱸𠲖𠳏𠳕𠴕𠵼𠵿𠸎𠸏𠹷𠺝𠺢', +}; + +function getInput(type, repeat, encoding) { + const original = (repeat === 1) ? chars[type] : chars[type].repeat(repeat); + if (encoding === 'base64') { + Buffer.from(original, 'utf8').toString('base64'); + } + return original; +} + +function main({ n, repeat, encoding, type }) { + const data = getInput(type, repeat, encoding); + const expected = Buffer.byteLength(data, encoding); + let changed = false; + bench.start(); + for (let i = 0; i < n; i++) { + const actual = Buffer.byteLength(data, encoding); + if (expected !== actual) { changed = true; } + } + bench.end(n); + if (changed) { + throw new Error('Result changed during iteration'); + } +} diff --git a/benchmark/buffers/buffer-bytelength.js b/benchmark/buffers/buffer-bytelength.js deleted file mode 100644 index 0f24df0111342f..00000000000000 --- a/benchmark/buffers/buffer-bytelength.js +++ /dev/null @@ -1,49 +0,0 @@ -'use strict'; -const common = require('../common'); - -const bench = common.createBenchmark(main, { - encoding: ['utf8', 'base64', 'buffer'], - len: [2, 16, 256], // x16 - n: [4e6], -}); - -// 16 chars each -const chars = [ - 'hello brendan!!!', // 1 byte - 'ΰαβγδεζηθικλμνξο', // 2 bytes - '挰挱挲挳挴挵挶挷挸挹挺挻挼挽挾挿', // 3 bytes - '𠜎𠜱𠝹𠱓𠱸𠲖𠳏𠳕𠴕𠵼𠵿𠸎𠸏𠹷𠺝𠺢', // 4 bytes -]; - -function main({ n, len, encoding }) { - let strings = []; - let results = [len * 16]; - if (encoding === 'buffer') { - strings = [Buffer.alloc(len * 16, 'a')]; - } else { - for (const string of chars) { - // Strings must be built differently, depending on encoding - const data = string.repeat(len); - if (encoding === 'utf8') { - strings.push(data); - } else if (encoding === 'base64') { - // Base64 strings will be much longer than their UTF8 counterparts - strings.push(Buffer.from(data, 'utf8').toString('base64')); - } - } - - // Check the result to ensure it is *properly* optimized - results = strings.map((val) => Buffer.byteLength(val, encoding)); - } - - bench.start(); - for (let i = 0; i < n; i++) { - const index = n % strings.length; - // Go! - const r = Buffer.byteLength(strings[index], encoding); - - if (r !== results[index]) - throw new Error('incorrect return value'); - } - bench.end(n); -} From ee1ce1872ff38fc5a2fd3b2e3a97600e5d6b2e14 Mon Sep 17 00:00:00 2001 From: Joyee Cheung Date: Sat, 11 Feb 2023 17:49:32 +0100 Subject: [PATCH 2/2] buffer: use v8 fast API calls for `Buffer.byteLength` implementation Use v8 fast API calls for Buffer.byteLength with sequential one-byte strings. PR-URL: https://github.com/nodejs/node/pull/46616 Reviewed-By: Anna Henningsen Reviewed-By: Robert Nagy Reviewed-By: Yagiz Nizipli Reviewed-By: James M Snell --- src/node_buffer.cc | 28 +++++++++++++++++++++++++--- src/node_external_reference.h | 3 +++ 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/src/node_buffer.cc b/src/node_buffer.cc index b144b95ab41a25..fec1c96634aaec 100644 --- a/src/node_buffer.cc +++ b/src/node_buffer.cc @@ -32,6 +32,7 @@ #include "string_bytes.h" #include "string_search.h" #include "util-inl.h" +#include "v8-fast-api-calls.h" #include "v8.h" #include @@ -786,7 +787,7 @@ void StringWrite(const FunctionCallbackInfo& args) { args.GetReturnValue().Set(written); } -void ByteLengthUtf8(const FunctionCallbackInfo &args) { +void SlowByteLengthUtf8(const FunctionCallbackInfo& args) { Environment* env = Environment::GetCurrent(args); CHECK(args[0]->IsString()); @@ -794,6 +795,21 @@ void ByteLengthUtf8(const FunctionCallbackInfo &args) { args.GetReturnValue().Set(args[0].As()->Utf8Length(env->isolate())); } +uint32_t FastByteLengthUtf8(Local receiver, + const v8::FastOneByteString& source) { + uint32_t result = 0; + uint32_t length = source.length; + const uint8_t* data = reinterpret_cast(source.data); + for (uint32_t i = 0; i < length; ++i) { + result += (data[i] >> 7); + } + result += length; + return result; +} + +static v8::CFunction fast_byte_length_utf8( + v8::CFunction::Make(FastByteLengthUtf8)); + // Normalize val to be an integer in the range of [1, -1] since // implementations of memcmp() can vary by platform. static int normalizeCompareVal(int val, size_t a_length, size_t b_length) { @@ -1368,7 +1384,11 @@ void Initialize(Local target, SetMethodNoSideEffect(context, target, "createFromString", CreateFromString); SetMethodNoSideEffect(context, target, "decodeUTF8", DecodeUTF8); - SetMethodNoSideEffect(context, target, "byteLengthUtf8", ByteLengthUtf8); + SetFastMethodNoSideEffect(context, + target, + "byteLengthUtf8", + SlowByteLengthUtf8, + &fast_byte_length_utf8); SetMethod(context, target, "copy", Copy); SetMethodNoSideEffect(context, target, "compare", Compare); SetMethodNoSideEffect(context, target, "compareOffset", CompareOffset); @@ -1429,7 +1449,9 @@ void RegisterExternalReferences(ExternalReferenceRegistry* registry) { registry->Register(CreateFromString); registry->Register(DecodeUTF8); - registry->Register(ByteLengthUtf8); + registry->Register(SlowByteLengthUtf8); + registry->Register(fast_byte_length_utf8.GetTypeInfo()); + registry->Register(FastByteLengthUtf8); registry->Register(Copy); registry->Register(Compare); registry->Register(CompareOffset); diff --git a/src/node_external_reference.h b/src/node_external_reference.h index 789770c956aa39..2eb3e3bf3cd458 100644 --- a/src/node_external_reference.h +++ b/src/node_external_reference.h @@ -10,6 +10,8 @@ namespace node { +using CFunctionCallbackWithOneByteString = + uint32_t (*)(v8::Local, const v8::FastOneByteString&); using CFunctionCallback = void (*)(v8::Local receiver); // This class manages the external references from the V8 heap @@ -20,6 +22,7 @@ class ExternalReferenceRegistry { #define ALLOWED_EXTERNAL_REFERENCE_TYPES(V) \ V(CFunctionCallback) \ + V(CFunctionCallbackWithOneByteString) \ V(const v8::CFunctionInfo*) \ V(v8::FunctionCallback) \ V(v8::AccessorGetterCallback) \