Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

encoding: use AliasedUint32Array for encodeInto results #46658

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 22 additions & 17 deletions benchmark/util/text-encoder.js
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
'use strict';

const common = require('../common.js');
const assert = require('assert');

const bench = common.createBenchmark(main, {
len: [16, 32, 256, 1024, 1024 * 32],
n: [1e4],
len: [32, 256, 1024, 1024 * 8],
n: [1e6],
type: ['one-byte-string', 'two-byte-string', 'ascii'],
op: ['encode', 'encodeInto'],
});
Expand All @@ -26,20 +27,24 @@ function main({ n, op, len, type }) {
}

const input = base.repeat(len);
const subarray = new Uint8Array(len);

bench.start();
switch (op) {
case 'encode': {
for (let i = 0; i < n; i++)
encoder.encode(input);
break;
}
case 'encodeInto': {
for (let i = 0; i < n; i++)
encoder.encodeInto(input, subarray);
break;
}
if (op === 'encode') {
const expected = encoder.encode(input);
let result;
bench.start();
for (let i = 0; i < n; i++)
result = encoder.encode(input);
bench.end(n);
assert.deepStrictEqual(result, expected);
} else {
const expected = new Uint8Array(len);
const subarray = new Uint8Array(len);
const expectedStats = encoder.encodeInto(input, expected);
let result;
bench.start();
for (let i = 0; i < n; i++)
result = encoder.encodeInto(input, subarray);
bench.end(n);
assert.deepStrictEqual(subarray, expected);
assert.deepStrictEqual(result, expectedStats);
}
bench.end(n);
}
15 changes: 8 additions & 7 deletions lib/internal/encoding.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ const {
StringPrototypeSlice,
Symbol,
SymbolToStringTag,
Uint32Array,
Uint8Array,
} = primordials;

Expand Down Expand Up @@ -49,12 +48,12 @@ const {
validateString,
validateObject,
} = require('internal/validators');

const binding = internalBinding('encoding_binding');
const {
encodeInto,
encodeUtf8String,
decodeUTF8,
} = internalBinding('buffer');
} = binding;

const { Buffer } = require('buffer');

Expand Down Expand Up @@ -318,8 +317,6 @@ function getEncodingFromLabel(label) {
return encodings.get(trimAsciiWhitespace(label.toLowerCase()));
}

const encodeIntoResults = new Uint32Array(2);
anonrig marked this conversation as resolved.
Show resolved Hide resolved

class TextEncoder {
constructor() {
this[kEncoder] = true;
Expand All @@ -340,8 +337,12 @@ class TextEncoder {
validateString(src, 'src');
if (!dest || !isUint8Array(dest))
throw new ERR_INVALID_ARG_TYPE('dest', 'Uint8Array', dest);
encodeInto(src, dest, encodeIntoResults);
return { read: encodeIntoResults[0], written: encodeIntoResults[1] };

encodeInto(src, dest);
// We need to read from the binding here since the buffer gets refreshed
// from the snapshot.
const { 0: read, 1: written } = binding.encodeIntoResults;
return { read, written };
}

[inspect](depth, opts) {
Expand Down
2 changes: 2 additions & 0 deletions node.gyp
Original file line number Diff line number Diff line change
Expand Up @@ -479,6 +479,7 @@
'src/connection_wrap.cc',
'src/dataqueue/queue.cc',
'src/debug_utils.cc',
'src/encoding_binding.cc',
'src/env.cc',
'src/fs_event_wrap.cc',
'src/handle_wrap.cc',
Expand Down Expand Up @@ -589,6 +590,7 @@
'src/dataqueue/queue.h',
'src/debug_utils.h',
'src/debug_utils-inl.h',
'src/encoding_binding.h',
'src/env_properties.h',
'src/env.h',
'src/env-inl.h',
Expand Down
1 change: 1 addition & 0 deletions src/base_object_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ namespace node {
// what the class passes to SET_BINDING_ID(), the second argument should match
// the C++ class name.
#define SERIALIZABLE_BINDING_TYPES(V) \
V(encoding_binding_data, encoding_binding::BindingData) \
V(fs_binding_data, fs::BindingData) \
V(v8_binding_data, v8_utils::BindingData) \
V(blob_binding_data, BlobBindingData) \
Expand Down
213 changes: 213 additions & 0 deletions src/encoding_binding.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
#include "encoding_binding.h"
#include "env-inl.h"
#include "node_errors.h"
#include "node_external_reference.h"
#include "simdutf.h"
#include "string_bytes.h"
#include "v8.h"

#include <cstdint>

namespace node {
namespace encoding_binding {

using v8::ArrayBuffer;
using v8::BackingStore;
using v8::Context;
using v8::Function;
using v8::FunctionCallbackInfo;
using v8::Isolate;
using v8::Local;
using v8::MaybeLocal;
using v8::Object;
using v8::String;
using v8::Uint32Array;
using v8::Uint8Array;
using v8::Value;

void BindingData::MemoryInfo(MemoryTracker* tracker) const {
tracker->TrackField("encode_into_results_buffer",
encode_into_results_buffer_);
}

BindingData::BindingData(Realm* realm, v8::Local<v8::Object> object)
: SnapshotableObject(realm, object, type_int),
encode_into_results_buffer_(realm->isolate(), kEncodeIntoResultsLength) {
object
->Set(realm->context(),
FIXED_ONE_BYTE_STRING(realm->isolate(), "encodeIntoResults"),
encode_into_results_buffer_.GetJSArray())
.Check();
}

bool BindingData::PrepareForSerialization(Local<Context> context,
v8::SnapshotCreator* creator) {
// We'll just re-initialize the buffers in the constructor since their
// contents can be thrown away once consumed in the previous call.
encode_into_results_buffer_.Release();
// Return true because we need to maintain the reference to the binding from
// JS land.
return true;
}

InternalFieldInfoBase* BindingData::Serialize(int index) {
DCHECK_EQ(index, BaseObject::kEmbedderType);
InternalFieldInfo* info =
InternalFieldInfoBase::New<InternalFieldInfo>(type());
return info;
}

void BindingData::Deserialize(Local<Context> context,
Local<Object> holder,
int index,
InternalFieldInfoBase* info) {
DCHECK_EQ(index, BaseObject::kEmbedderType);
v8::HandleScope scope(context->GetIsolate());
Realm* realm = Realm::GetCurrent(context);
// Recreate the buffer in the constructor.
BindingData* binding = realm->AddBindingData<BindingData>(context, holder);
CHECK_NOT_NULL(binding);
}

void BindingData::EncodeInto(const FunctionCallbackInfo<Value>& args) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Out of curiosity I exploring places where fast API could be applied, came across this function and PR, I experimented a bit it seems v8::FastApiTypedArray<uint8_t>& is indeed supported and it does get caught in fast call, but I am not sure how one could extract ->Buffer() from v8::FastApiTypedArray<uint8_t>&, is that even possible? just wondering do let me know your thoughts cc @anonrig @joyeecheung

Thank You!

Environment* env = Environment::GetCurrent(args);
Isolate* isolate = env->isolate();
CHECK_GE(args.Length(), 2);
CHECK(args[0]->IsString());
CHECK(args[1]->IsUint8Array());
BindingData* binding_data = Realm::GetBindingData<BindingData>(args);

Local<String> source = args[0].As<String>();

Local<Uint8Array> dest = args[1].As<Uint8Array>();
Local<ArrayBuffer> buf = dest->Buffer();
char* write_result = static_cast<char*>(buf->Data()) + dest->ByteOffset();
size_t dest_length = dest->ByteLength();

int nchars;
int written = source->WriteUtf8(
isolate,
write_result,
dest_length,
&nchars,
String::NO_NULL_TERMINATION | String::REPLACE_INVALID_UTF8);

binding_data->encode_into_results_buffer_[0] = nchars;
binding_data->encode_into_results_buffer_[1] = written;
}

// Encode a single string to a UTF-8 Uint8Array (not Buffer).
// Used in TextEncoder.prototype.encode.
void BindingData::EncodeUtf8String(const FunctionCallbackInfo<Value>& args) {
Environment* env = Environment::GetCurrent(args);
Isolate* isolate = env->isolate();
CHECK_GE(args.Length(), 1);
CHECK(args[0]->IsString());

Local<String> str = args[0].As<String>();
size_t length = str->Utf8Length(isolate);

Local<ArrayBuffer> ab;
{
NoArrayBufferZeroFillScope no_zero_fill_scope(env->isolate_data());
std::unique_ptr<BackingStore> bs =
ArrayBuffer::NewBackingStore(isolate, length);

CHECK(bs);

str->WriteUtf8(isolate,
static_cast<char*>(bs->Data()),
-1, // We are certain that `data` is sufficiently large
nullptr,
String::NO_NULL_TERMINATION | String::REPLACE_INVALID_UTF8);

ab = ArrayBuffer::New(isolate, std::move(bs));
}

auto array = Uint8Array::New(ab, 0, length);
args.GetReturnValue().Set(array);
}

// Convert the input into an encoded string
void BindingData::DecodeUTF8(const FunctionCallbackInfo<Value>& args) {
Environment* env = Environment::GetCurrent(args); // list, flags

CHECK_GE(args.Length(), 1);

if (!(args[0]->IsArrayBuffer() || args[0]->IsSharedArrayBuffer() ||
args[0]->IsArrayBufferView())) {
return node::THROW_ERR_INVALID_ARG_TYPE(
env->isolate(),
"The \"list\" argument must be an instance of SharedArrayBuffer, "
"ArrayBuffer or ArrayBufferView.");
}

ArrayBufferViewContents<char> buffer(args[0]);

bool ignore_bom = args[1]->IsTrue();
bool has_fatal = args[2]->IsTrue();

const char* data = buffer.data();
size_t length = buffer.length();

if (has_fatal) {
auto result = simdutf::validate_utf8_with_errors(data, length);

if (result.error) {
return node::THROW_ERR_ENCODING_INVALID_ENCODED_DATA(
env->isolate(), "The encoded data was not valid for encoding utf-8");
}
}

if (!ignore_bom && length >= 3) {
if (memcmp(data, "\xEF\xBB\xBF", 3) == 0) {
data += 3;
length -= 3;
}
}

if (length == 0) return args.GetReturnValue().SetEmptyString();

Local<Value> error;
MaybeLocal<Value> maybe_ret =
StringBytes::Encode(env->isolate(), data, length, UTF8, &error);
Local<Value> ret;

if (!maybe_ret.ToLocal(&ret)) {
CHECK(!error.IsEmpty());
env->isolate()->ThrowException(error);
return;
}

args.GetReturnValue().Set(ret);
}

void BindingData::Initialize(Local<Object> target,
Local<Value> unused,
Local<Context> context,
void* priv) {
Realm* realm = Realm::GetCurrent(context);
BindingData* const binding_data =
realm->AddBindingData<BindingData>(context, target);
if (binding_data == nullptr) return;

SetMethod(context, target, "encodeInto", EncodeInto);
SetMethodNoSideEffect(context, target, "encodeUtf8String", EncodeUtf8String);
SetMethodNoSideEffect(context, target, "decodeUTF8", DecodeUTF8);
}

void BindingData::RegisterTimerExternalReferences(
ExternalReferenceRegistry* registry) {
registry->Register(EncodeInto);
registry->Register(EncodeUtf8String);
registry->Register(DecodeUTF8);
}

} // namespace encoding_binding
} // namespace node

NODE_BINDING_CONTEXT_AWARE_INTERNAL(
encoding_binding, node::encoding_binding::BindingData::Initialize)
NODE_BINDING_EXTERNAL_REFERENCE(
encoding_binding,
node::encoding_binding::BindingData::RegisterTimerExternalReferences)
50 changes: 50 additions & 0 deletions src/encoding_binding.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#ifndef SRC_ENCODING_BINDING_H_
#define SRC_ENCODING_BINDING_H_

#if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS

#include <cinttypes>
#include "aliased_buffer.h"
#include "node_snapshotable.h"
#include "v8-fast-api-calls.h"

namespace node {
class ExternalReferenceRegistry;

namespace encoding_binding {
class BindingData : public SnapshotableObject {
public:
BindingData(Realm* realm, v8::Local<v8::Object> obj);

using InternalFieldInfo = InternalFieldInfoBase;

SERIALIZABLE_OBJECT_METHODS()
SET_BINDING_ID(encoding_binding_data)

void MemoryInfo(MemoryTracker* tracker) const override;
SET_SELF_SIZE(BindingData)
SET_MEMORY_INFO_NAME(BindingData)

static void EncodeInto(const v8::FunctionCallbackInfo<v8::Value>& args);
static void EncodeUtf8String(const v8::FunctionCallbackInfo<v8::Value>& args);
static void DecodeUTF8(const v8::FunctionCallbackInfo<v8::Value>& args);

static void Initialize(v8::Local<v8::Object> target,
v8::Local<v8::Value> unused,
v8::Local<v8::Context> context,
void* priv);
static void RegisterTimerExternalReferences(
ExternalReferenceRegistry* registry);

private:
static constexpr size_t kEncodeIntoResultsLength = 2;
AliasedUint32Array encode_into_results_buffer_;
};

} // namespace encoding_binding

} // namespace node

#endif // defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS

#endif // SRC_ENCODING_BINDING_H_
1 change: 1 addition & 0 deletions src/node_binding.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
V(config) \
V(contextify) \
V(credentials) \
V(encoding_binding) \
V(errors) \
V(fs) \
V(fs_dir) \
Expand Down
Loading