Skip to content

Commit

Permalink
win,src: revert simdutf to icu for win arm64
Browse files Browse the repository at this point in the history
Many tests started failing on ARM64 Windows after migrating from icu to
simdutf. This change reverts those changes for the problematic platform.

Refs: nodejs#46471
Refs: nodejs#46472
Refs: nodejs#46548
Refs: simdutf/simdutf#216
  • Loading branch information
StefanStojanovic committed Feb 22, 2023
1 parent 7a5b9d0 commit b7fbc9b
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 0 deletions.
14 changes: 14 additions & 0 deletions src/inspector/main_thread_interface.cc
Original file line number Diff line number Diff line change
@@ -1,8 +1,15 @@
#include "main_thread_interface.h"

#include "env-inl.h"
#if !defined(_WIN32) || !defined(_M_ARM64)
#include "simdutf.h"
#endif
#include "v8-inspector.h"
#if defined(_WIN32) && defined(_M_ARM64)
#include "util-inl.h"

#include <unicode/unistr.h>
#endif

#include <functional>
#include <memory>
Expand Down Expand Up @@ -286,12 +293,19 @@ Deletable* MainThreadInterface::GetObjectIfExists(int id) {
}

std::unique_ptr<StringBuffer> Utf8ToStringView(const std::string_view message) {
#if defined(_WIN32) && defined(_M_ARM64)
icu::UnicodeString utf16 = icu::UnicodeString::fromUTF8(
icu::StringPiece(message.data(), message.length()));
StringView view(reinterpret_cast<const uint16_t*>(utf16.getBuffer()),
utf16.length());
#else
size_t expected_u16_length =
simdutf::utf16_length_from_utf8(message.data(), message.length());
MaybeStackBuffer<char16_t> buffer(expected_u16_length);
size_t utf16_length = simdutf::convert_utf8_to_utf16(
message.data(), message.length(), buffer.out());
StringView view(reinterpret_cast<uint16_t*>(buffer.out()), utf16_length);
#endif
return StringBuffer::create(view);
}

Expand Down
55 changes: 55 additions & 0 deletions src/inspector/node_string.cc
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
#include "node_string.h"
#include "node/inspector/protocol/Protocol.h"
#include "node_util.h"
#if !defined(_WIN32) || !defined(_M_ARM64)
#include "simdutf.h"
#endif

#if defined(_WIN32) && defined(_M_ARM64)
#include <unicode/unistr.h>
#endif

namespace node {
namespace inspector {
Expand All @@ -15,6 +21,13 @@ void builderAppendQuotedString(StringBuilder& builder,
const std::string_view string) {
builder.put('"');
if (!string.empty()) {
#if defined(_WIN32) && defined(_M_ARM64)
icu::UnicodeString utf16 = icu::UnicodeString::fromUTF8(
icu::StringPiece(string.data(), string.length()));
escapeWideStringForJSON(
reinterpret_cast<const uint16_t*>(utf16.getBuffer()), utf16.length(),
&builder);
#else
size_t expected_utf16_length =
simdutf::utf16_length_from_utf8(string.data(), string.length());
MaybeStackBuffer<char16_t> buffer(expected_utf16_length);
Expand All @@ -24,13 +37,21 @@ void builderAppendQuotedString(StringBuilder& builder,
escapeWideStringForJSON(reinterpret_cast<const uint16_t*>(buffer.out()),
utf16_length,
&builder);
#endif
}
builder.put('"');
}

std::unique_ptr<Value> parseJSON(const std::string_view string) {
if (string.empty())
return nullptr;
#if defined(_WIN32) && defined(_M_ARM64)
icu::UnicodeString utf16 =
icu::UnicodeString::fromUTF8(icu::StringPiece(string.data(),
string.length()));
return parseJSONCharacters(
reinterpret_cast<const uint16_t*>(utf16.getBuffer()), utf16.length());
#else
size_t expected_utf16_length =
simdutf::utf16_length_from_utf8(string.data(), string.length());
MaybeStackBuffer<char16_t> buffer(expected_utf16_length);
Expand All @@ -39,6 +60,7 @@ std::unique_ptr<Value> parseJSON(const std::string_view string) {
CHECK_EQ(expected_utf16_length, utf16_length);
return parseJSONCharacters(reinterpret_cast<const uint16_t*>(buffer.out()),
utf16_length);
#endif
}

std::unique_ptr<Value> parseJSON(v8_inspector::StringView string) {
Expand All @@ -56,6 +78,26 @@ String StringViewToUtf8(v8_inspector::StringView view) {
return std::string(reinterpret_cast<const char*>(view.characters8()),
view.length());
}
#if defined(_WIN32) && defined(_M_ARM64)
const uint16_t* source = view.characters16();
const UChar* unicodeSource = reinterpret_cast<const UChar*>(source);
static_assert(sizeof(*source) == sizeof(*unicodeSource),
"sizeof(*source) == sizeof(*unicodeSource)");

size_t result_length = view.length() * sizeof(*source);
std::string result(result_length, '\0');
icu::UnicodeString utf16(unicodeSource, view.length());
// ICU components for std::string compatibility are not enabled in build...
bool done = false;
while (!done) {
icu::CheckedArrayByteSink sink(&result[0], result_length);
utf16.toUTF8(sink);
result_length = sink.NumberOfBytesAppended();
result.resize(result_length);
done = !sink.Overflowed();
}
return result;
#else
const char16_t* source =
reinterpret_cast<const char16_t*>(view.characters16());
size_t expected_utf8_length =
Expand All @@ -65,6 +107,7 @@ String StringViewToUtf8(v8_inspector::StringView view) {
simdutf::convert_utf16_to_utf8(source, view.length(), buffer.out());
CHECK_EQ(expected_utf8_length, utf8_length);
return String(buffer.out(), utf8_length);
#endif
}

String fromDouble(double d) {
Expand Down Expand Up @@ -107,6 +150,11 @@ String fromUTF8(const uint8_t* data, size_t length) {
}

String fromUTF16(const uint16_t* data, size_t length) {
#if defined(_WIN32) && defined(_M_ARM64)
icu::UnicodeString utf16(reinterpret_cast<const char16_t*>(data), length);
std::string result;
return utf16.toUTF8String(result);
#else
auto casted_data = reinterpret_cast<const char16_t*>(data);
size_t expected_utf8_length =
simdutf::utf8_length_from_utf16(casted_data, length);
Expand All @@ -115,15 +163,22 @@ String fromUTF16(const uint16_t* data, size_t length) {
simdutf::convert_utf16_to_utf8(casted_data, length, buffer.out());
CHECK_EQ(expected_utf8_length, utf8_length);
return String(buffer.out(), utf8_length);
#endif
}

const uint8_t* CharactersUTF8(const std::string_view s) {
return reinterpret_cast<const uint8_t*>(s.data());
}

size_t CharacterCount(const std::string_view s) {
#if defined(_WIN32) && defined(_M_ARM64)
icu::UnicodeString utf16 =
icu::UnicodeString::fromUTF8(icu::StringPiece(s.data(), s.length()));
return utf16.countChar32();
#else
// TODO(@anonrig): Test to make sure CharacterCount returns correctly.
return simdutf::utf32_length_from_utf8(s.data(), s.length());
#endif
}

} // namespace StringUtil
Expand Down

0 comments on commit b7fbc9b

Please sign in to comment.