From ae842a40b586fcdc53e5c9d747b1fb48267cda22 Mon Sep 17 00:00:00 2001 From: Yagiz Nizipli Date: Thu, 15 Dec 2022 18:11:07 -0500 Subject: [PATCH] util: add fast path for text-decoder fatal flag PR-URL: https://github.com/nodejs/node/pull/45803 Reviewed-By: Robert Nagy Reviewed-By: Matteo Collina Reviewed-By: Anna Henningsen Reviewed-By: Michael Dawson --- benchmark/util/text-decoder.js | 11 ++++++++--- lib/internal/encoding.js | 12 ++++++------ src/node_buffer.cc | 11 +++++++++++ 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/benchmark/util/text-decoder.js b/benchmark/util/text-decoder.js index 3d1ccc34bb8995..a6695028603f8a 100644 --- a/benchmark/util/text-decoder.js +++ b/benchmark/util/text-decoder.js @@ -5,13 +5,14 @@ const common = require('../common.js'); const bench = common.createBenchmark(main, { encoding: ['utf-8', 'latin1', 'iso-8859-3'], ignoreBOM: [0, 1], + fatal: [0, 1], len: [256, 1024 * 16, 1024 * 512], n: [1e2], type: ['SharedArrayBuffer', 'ArrayBuffer', 'Buffer'] }); -function main({ encoding, len, n, ignoreBOM, type }) { - const decoder = new TextDecoder(encoding, { ignoreBOM }); +function main({ encoding, len, n, ignoreBOM, type, fatal }) { + const decoder = new TextDecoder(encoding, { ignoreBOM, fatal }); let buf; switch (type) { @@ -31,7 +32,11 @@ function main({ encoding, len, n, ignoreBOM, type }) { bench.start(); for (let i = 0; i < n; i++) { - decoder.decode(buf); + try { + decoder.decode(buf); + } catch { + // eslint-disable no-empty + } } bench.end(n); } diff --git a/lib/internal/encoding.js b/lib/internal/encoding.js index 5cf20ea04c98ff..e14a8c74918623 100644 --- a/lib/internal/encoding.js +++ b/lib/internal/encoding.js @@ -29,6 +29,7 @@ const kFlags = Symbol('flags'); const kEncoding = Symbol('encoding'); const kDecoder = Symbol('decoder'); const kEncoder = Symbol('encoder'); +const kFatal = Symbol('kFatal'); const kUTF8FastPath = Symbol('kUTF8FastPath'); const kIgnoreBOM = Symbol('kIgnoreBOM'); @@ -396,17 +397,16 @@ function makeTextDecoderICU() { flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0; } - // Only support fast path for UTF-8 without FATAL flag - const fastPathAvailable = enc === 'utf-8' && !(options?.fatal); - this[kDecoder] = true; this[kFlags] = flags; this[kEncoding] = enc; this[kIgnoreBOM] = Boolean(options?.ignoreBOM); - this[kUTF8FastPath] = fastPathAvailable; + this[kFatal] = Boolean(options?.fatal); + // Only support fast path for UTF-8. + this[kUTF8FastPath] = enc === 'utf-8'; this[kHandle] = undefined; - if (!fastPathAvailable) { + if (!this[kUTF8FastPath]) { this.#prepareConverter(); } } @@ -425,7 +425,7 @@ function makeTextDecoderICU() { this[kUTF8FastPath] &&= !(options?.stream); if (this[kUTF8FastPath]) { - return decodeUTF8(input, this[kIgnoreBOM]); + return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]); } this.#prepareConverter(); diff --git a/src/node_buffer.cc b/src/node_buffer.cc index 9179d21befd5de..dcafb9703b1022 100644 --- a/src/node_buffer.cc +++ b/src/node_buffer.cc @@ -28,6 +28,7 @@ #include "node_internals.h" #include "env-inl.h" +#include "simdutf.h" #include "string_bytes.h" #include "string_search.h" #include "util-inl.h" @@ -583,10 +584,20 @@ void DecodeUTF8(const FunctionCallbackInfo& args) { ArrayBufferViewContents buffer(args[0]); bool ignore_bom = args[1]->IsTrue(); + bool has_fatal = args[2]->IsTrue(); const char* data = buffer.data(); size_t length = buffer.length(); + if (has_fatal) { + auto result = simdutf::validate_utf8_with_errors(data, length); + + if (result.error) { + return node::THROW_ERR_ENCODING_INVALID_ENCODED_DATA( + env->isolate(), "The encoded data was not valid for encoding utf-8"); + } + } + if (!ignore_bom && length >= 3) { if (memcmp(data, "\xEF\xBB\xBF", 3) == 0) { data += 3;