Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

src: remove icu usage from node_string.cc #46548

Merged
merged 1 commit into from
Feb 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 42 additions & 39 deletions src/inspector/node_string.cc
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
#include "node_string.h"
#include "node/inspector/protocol/Protocol.h"
#include "node_util.h"
#include "simdutf.h"

#include <unicode/unistr.h>

namespace node {
namespace inspector {
namespace protocol {
Expand All @@ -12,27 +11,34 @@ namespace StringUtil {
size_t kNotFound = std::string::npos;

// NOLINTNEXTLINE(runtime/references) V8 API requirement
void builderAppendQuotedString(StringBuilder& builder, const String& string) {
void builderAppendQuotedString(StringBuilder& builder,
const std::string_view string) {
builder.put('"');
if (!string.empty()) {
icu::UnicodeString utf16 = icu::UnicodeString::fromUTF8(
icu::StringPiece(string.data(), string.length()));
escapeWideStringForJSON(
reinterpret_cast<const uint16_t*>(utf16.getBuffer()), utf16.length(),
&builder);
size_t expected_utf16_length =
simdutf::utf16_length_from_utf8(string.data(), string.length());
MaybeStackBuffer<char16_t> buffer(expected_utf16_length);
size_t utf16_length = simdutf::convert_utf8_to_utf16(
string.data(), string.length(), buffer.out());
anonrig marked this conversation as resolved.
Show resolved Hide resolved
CHECK_EQ(expected_utf16_length, utf16_length);
escapeWideStringForJSON(reinterpret_cast<const uint16_t*>(buffer.out()),
utf16_length,
&builder);
}
builder.put('"');
}

std::unique_ptr<Value> parseJSON(const String& string) {
std::unique_ptr<Value> parseJSON(const std::string_view string) {
if (string.empty())
return nullptr;

icu::UnicodeString utf16 =
icu::UnicodeString::fromUTF8(icu::StringPiece(string.data(),
string.length()));
return parseJSONCharacters(
reinterpret_cast<const uint16_t*>(utf16.getBuffer()), utf16.length());
size_t expected_utf16_length =
simdutf::utf16_length_from_utf8(string.data(), string.length());
MaybeStackBuffer<char16_t> buffer(expected_utf16_length);
size_t utf16_length = simdutf::convert_utf8_to_utf16(
string.data(), string.length(), buffer.out());
CHECK_EQ(expected_utf16_length, utf16_length);
return parseJSONCharacters(reinterpret_cast<const uint16_t*>(buffer.out()),
utf16_length);
}

std::unique_ptr<Value> parseJSON(v8_inspector::StringView string) {
Expand All @@ -50,24 +56,15 @@ String StringViewToUtf8(v8_inspector::StringView view) {
return std::string(reinterpret_cast<const char*>(view.characters8()),
view.length());
}
const uint16_t* source = view.characters16();
const UChar* unicodeSource = reinterpret_cast<const UChar*>(source);
static_assert(sizeof(*source) == sizeof(*unicodeSource),
"sizeof(*source) == sizeof(*unicodeSource)");

size_t result_length = view.length() * sizeof(*source);
std::string result(result_length, '\0');
icu::UnicodeString utf16(unicodeSource, view.length());
// ICU components for std::string compatibility are not enabled in build...
bool done = false;
while (!done) {
icu::CheckedArrayByteSink sink(&result[0], result_length);
utf16.toUTF8(sink);
result_length = sink.NumberOfBytesAppended();
result.resize(result_length);
done = !sink.Overflowed();
}
return result;
const char16_t* source =
reinterpret_cast<const char16_t*>(view.characters16());
size_t expected_utf8_length =
simdutf::utf8_length_from_utf16(source, view.length());
MaybeStackBuffer<char> buffer(expected_utf8_length);
size_t utf8_length =
simdutf::convert_utf16_to_utf8(source, view.length(), buffer.out());
CHECK_EQ(expected_utf8_length, utf8_length);
return String(buffer.out(), utf8_length);
}

String fromDouble(double d) {
Expand All @@ -86,7 +83,8 @@ double toDouble(const char* buffer, size_t length, bool* ok) {
return d;
}

std::unique_ptr<Value> parseMessage(const std::string& message, bool binary) {
std::unique_ptr<Value> parseMessage(const std::string_view message,
bool binary) {
if (binary) {
return Value::parseBinary(
reinterpret_cast<const uint8_t*>(message.data()),
Expand All @@ -109,16 +107,21 @@ String fromUTF8(const uint8_t* data, size_t length) {
}

String fromUTF16(const uint16_t* data, size_t length) {
icu::UnicodeString utf16(reinterpret_cast<const char16_t*>(data), length);
std::string result;
return utf16.toUTF8String(result);
auto casted_data = reinterpret_cast<const char16_t*>(data);
size_t expected_utf8_length =
simdutf::utf8_length_from_utf16(casted_data, length);
MaybeStackBuffer<char> buffer(expected_utf8_length);
size_t utf8_length =
simdutf::convert_utf16_to_utf8(casted_data, length, buffer.out());
CHECK_EQ(expected_utf8_length, utf8_length);
return String(buffer.out(), utf8_length);
}

const uint8_t* CharactersUTF8(const String& s) {
const uint8_t* CharactersUTF8(const std::string_view s) {
return reinterpret_cast<const uint8_t*>(s.data());
}

size_t CharacterCount(const String& s) {
size_t CharacterCount(const std::string_view s) {
// TODO(@anonrig): Test to make sure CharacterCount returns correctly.
return simdutf::utf32_length_from_utf8(s.data(), s.length());
}
Expand Down
21 changes: 13 additions & 8 deletions src/inspector/node_string.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,21 +64,26 @@ double toDouble(const char* buffer, size_t length, bool* ok);
String StringViewToUtf8(v8_inspector::StringView view);

// NOLINTNEXTLINE(runtime/references)
void builderAppendQuotedString(StringBuilder& builder, const String&);
std::unique_ptr<Value> parseJSON(const String&);
void builderAppendQuotedString(StringBuilder& builder, const std::string_view);
std::unique_ptr<Value> parseJSON(const std::string_view);
std::unique_ptr<Value> parseJSON(v8_inspector::StringView view);

std::unique_ptr<Value> parseMessage(const std::string& message, bool binary);
std::unique_ptr<Value> parseMessage(const std::string_view message,
bool binary);
ProtocolMessage jsonToMessage(String message);
ProtocolMessage binaryToMessage(std::vector<uint8_t> message);
String fromUTF8(const uint8_t* data, size_t length);
String fromUTF16(const uint16_t* data, size_t length);
const uint8_t* CharactersUTF8(const String& s);
size_t CharacterCount(const String& s);
const uint8_t* CharactersUTF8(const std::string_view s);
size_t CharacterCount(const std::string_view s);

// Unimplemented. The generated code will fall back to CharactersUTF8().
inline uint8_t* CharactersLatin1(const String& s) { return nullptr; }
inline const uint16_t* CharactersUTF16(const String& s) { return nullptr; }
inline uint8_t* CharactersLatin1(const std::string_view s) {
return nullptr;
}
inline const uint16_t* CharactersUTF16(const std::string_view s) {
return nullptr;
}

extern size_t kNotFound;
} // namespace StringUtil
Expand All @@ -92,7 +97,7 @@ class Binary {
const uint8_t* data() const { UNREACHABLE(); }
size_t size() const { UNREACHABLE(); }
String toBase64() const { UNREACHABLE(); }
static Binary fromBase64(const String& base64, bool* success) {
static Binary fromBase64(const std::string_view base64, bool* success) {
UNREACHABLE();
}
static Binary fromSpan(const uint8_t* data, size_t size) { UNREACHABLE(); }
Expand Down