From ec7edfbdb7e3845e801499160a45cbb00e107075 Mon Sep 17 00:00:00 2001 From: "Node.js GitHub Bot" Date: Sun, 19 Feb 2023 07:06:49 +0100 Subject: [PATCH] deps: update simdutf to 3.2.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR-URL: https://github.com/nodejs/node/pull/46621 Reviewed-By: Yagiz Nizipli Reviewed-By: Luigi Pinca Reviewed-By: Mohammed Keyvanzadeh Reviewed-By: Colin Ihrig Reviewed-By: Tobias Nießen --- deps/simdutf/simdutf.cpp | 221 ++++++++++++++++++++++++++------------- deps/simdutf/simdutf.h | 10 +- 2 files changed, 156 insertions(+), 75 deletions(-) diff --git a/deps/simdutf/simdutf.cpp b/deps/simdutf/simdutf.cpp index c987f9378bec66..ea0d95f422facf 100644 --- a/deps/simdutf/simdutf.cpp +++ b/deps/simdutf/simdutf.cpp @@ -1,4 +1,4 @@ -/* auto-generated on 2023-01-18 12:43:26 -0500. Do not edit! */ +/* auto-generated on 2023-02-10 14:42:58 -0500. Do not edit! */ // dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf.cpp /* begin file src/simdutf.cpp */ #include "simdutf.h" @@ -9947,7 +9947,8 @@ namespace simdutf { namespace scalar { namespace { namespace ascii { - +#if SIMDUTF_IMPLEMENTATION_FALLBACK +// Only used by the fallback kernel. inline simdutf_warn_unused bool validate(const char *buf, size_t len) noexcept { const uint8_t *data = reinterpret_cast(buf); uint64_t pos = 0; @@ -9966,6 +9967,7 @@ inline simdutf_warn_unused bool validate(const char *buf, size_t len) noexcept { } return true; } +#endif inline simdutf_warn_unused result validate_with_errors(const char *buf, size_t len) noexcept { const uint8_t *data = reinterpret_cast(buf); @@ -10006,6 +10008,8 @@ namespace simdutf { namespace scalar { namespace { namespace utf8 { +#if SIMDUTF_IMPLEMENTATION_FALLBACK +// only used by the fallback kernel. // credit: based on code from Google Fuchsia (Apache Licensed) inline simdutf_warn_unused bool validate(const char *buf, size_t len) noexcept { const uint8_t *data = reinterpret_cast(buf); @@ -10071,6 +10075,7 @@ inline simdutf_warn_unused bool validate(const char *buf, size_t len) noexcept { } return true; } +#endif inline simdutf_warn_unused result validate_with_errors(const char *buf, size_t len) noexcept { const uint8_t *data = reinterpret_cast(buf); @@ -10177,16 +10182,6 @@ inline size_t utf16_length_from_utf8(const char* buf, size_t len) { return counter; } -inline size_t utf32_length_from_utf8(const char* buf, size_t len) { - const int8_t * p = reinterpret_cast(buf); - size_t counter{0}; - for(size_t i = 0; i < len; i++) { - // -65 is 0b10111111, anything larger in two-complement's should start a new code point. - if(p[i] > -65) { counter++; } - } - return counter; -} - } // utf8 namespace } // unnamed namespace } // namespace scalar @@ -10393,6 +10388,8 @@ namespace scalar { namespace { namespace utf32_to_utf8 { +#if SIMDUTF_IMPLEMENTATION_FALLBACK || SIMDUTF_IMPLEMENTATION_PPC64 +// only used by the fallback and POWER kernel inline size_t convert_valid(const char32_t* buf, size_t len, char* utf8_output) { const uint32_t *data = reinterpret_cast(buf); size_t pos = 0; @@ -10439,6 +10436,7 @@ inline size_t convert_valid(const char32_t* buf, size_t len, char* utf8_output) } return utf8_output - start; } +#endif // SIMDUTF_IMPLEMENTATION_FALLBACK || SIMDUTF_IMPLEMENTATION_PPC64 } // utf32_to_utf8 namespace } // unnamed namespace @@ -11284,20 +11282,49 @@ inline result convert_with_errors(const char* buf, size_t len, char16_t* utf16_o return result(error_code::SUCCESS, utf16_output - start); } +/** + * When rewind_and_convert_with_errors is called, we are pointing at 'buf' and we have + * up to len input bytes left, and we encountered some error. It is possible that + * the error is at 'buf' exactly, but it could also be in the previous bytes (up to 3 bytes back). + * + * prior_bytes indicates how many bytes, prior to 'buf' may belong to the current memory section + * and can be safely accessed. We prior_bytes to access safely up to three bytes before 'buf'. + * + * The caller is responsible to ensure that len > 0. + * + * If the error is believed to have occured prior to 'buf', the count value contain in the result + * will be SIZE_T - 1, SIZE_T - 2, or SIZE_T - 3. + */ template -inline result rewind_and_convert_with_errors(const char* buf, size_t len, char16_t* utf16_output) { +inline result rewind_and_convert_with_errors(size_t prior_bytes, const char* buf, size_t len, char16_t* utf16_output) { size_t extra_len{0}; - // A leading byte cannot be further than 4 bytes away - for(int i = 0; i < 5; i++) { - unsigned char byte = *buf; - if ((byte & 0b11000000) != 0b10000000) { + // We potentially need to go back in time and find a leading byte. + size_t how_far_back = 3; // 3 bytes in the past + current position + if(how_far_back >= prior_bytes) { how_far_back = prior_bytes; } + bool found_leading_bytes{false}; + // important: it is i <= how_far_back and not 'i < how_far_back'. + for(size_t i = 0; i <= how_far_back; i++) { + unsigned char byte = buf[-i]; + found_leading_bytes = ((byte & 0b11000000) != 0b10000000); + if(found_leading_bytes) { + buf -= i; + extra_len = i; break; - } else { - buf--; - extra_len++; } } - + // + // It is possible for this function to return a negative count in its result. + // C++ Standard Section 18.1 defines size_t is in which is described in C Standard as . + // C Standard Section 4.1.5 defines size_t as an unsigned integral type of the result of the sizeof operator + // + // An unsigned type will simply wrap round arithmetically (well defined). + // + if(!found_leading_bytes) { + // If how_far_back == 3, we may have four consecutive continuation bytes!!! + // [....] [continuation] [continuation] [continuation] | [buf is continuation] + // Or we possibly have a stream that does not start with a leading byte. + return result(error_code::TOO_LONG, -how_far_back); + } result res = convert_with_errors(buf, len + extra_len, utf16_output); if (res.error) { res.count -= extra_len; @@ -11534,18 +11561,48 @@ inline result convert_with_errors(const char* buf, size_t len, char32_t* utf32_o return result(error_code::SUCCESS, utf32_output - start); } -inline result rewind_and_convert_with_errors(const char* buf, size_t len, char32_t* utf32_output) { +/** + * When rewind_and_convert_with_errors is called, we are pointing at 'buf' and we have + * up to len input bytes left, and we encountered some error. It is possible that + * the error is at 'buf' exactly, but it could also be in the previous bytes location (up to 3 bytes back). + * + * prior_bytes indicates how many bytes, prior to 'buf' may belong to the current memory section + * and can be safely accessed. We prior_bytes to access safely up to three bytes before 'buf'. + * + * The caller is responsible to ensure that len > 0. + * + * If the error is believed to have occured prior to 'buf', the count value contain in the result + * will be SIZE_T - 1, SIZE_T - 2, or SIZE_T - 3. + */ +inline result rewind_and_convert_with_errors(size_t prior_bytes, const char* buf, size_t len, char32_t* utf32_output) { size_t extra_len{0}; - // A leading byte cannot be further than 4 bytes away - for(int i = 0; i < 5; i++) { - unsigned char byte = *buf; - if ((byte & 0b11000000) != 0b10000000) { + // We potentially need to go back in time and find a leading byte. + size_t how_far_back = 3; // 3 bytes in the past + current position + if(how_far_back > prior_bytes) { how_far_back = prior_bytes; } + bool found_leading_bytes{false}; + // important: it is i <= how_far_back and not 'i < how_far_back'. + for(size_t i = 0; i <= how_far_back; i++) { + unsigned char byte = buf[-i]; + found_leading_bytes = ((byte & 0b11000000) != 0b10000000); + if(found_leading_bytes) { + buf -= i; + extra_len = i; break; - } else { - buf--; - extra_len++; } } + // + // It is possible for this function to return a negative count in its result. + // C++ Standard Section 18.1 defines size_t is in which is described in C Standard as . + // C Standard Section 4.1.5 defines size_t as an unsigned integral type of the result of the sizeof operator + // + // An unsigned type will simply wrap round arithmetically (well defined). + // + if(!found_leading_bytes) { + // If how_far_back == 3, we may have four consecutive continuation bytes!!! + // [....] [continuation] [continuation] [continuation] | [buf is continuation] + // Or we possibly have a stream that does not start with a leading byte. + return result(error_code::TOO_LONG, -how_far_back); + } result res = convert_with_errors(buf, len + extra_len, utf32_output); if (res.error) { @@ -14405,7 +14462,9 @@ using namespace simd; this->check_utf8_bytes(input.chunks[3], input.chunks[2]); } if (errors()) { - result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors(in + pos, size - pos, utf16_output); + // rewind_and_convert_with_errors will seek a potential error from in+pos onward, + // with the ability to go back up to pos bytes, and read size-pos bytes forward. + result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf16_output); res.count += pos; return res; } @@ -14439,12 +14498,16 @@ using namespace simd; } } if(errors()) { - result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors(in + pos, size - pos, utf16_output); + // rewind_and_convert_with_errors will seek a potential error from in+pos onward, + // with the ability to go back up to pos bytes, and read size-pos bytes forward. + result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf16_output); res.count += pos; return res; } if(pos < size) { - result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors(in + pos, size - pos, utf16_output); + // rewind_and_convert_with_errors will seek a potential error from in+pos onward, + // with the ability to go back up to pos bytes, and read size-pos bytes forward. + result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf16_output); if (res.error) { // In case of error, we want the error position res.count += pos; return res; @@ -14728,7 +14791,7 @@ using namespace simd; this->check_utf8_bytes(input.chunks[3], input.chunks[2]); } if (errors()) { - result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(in + pos, size - pos, utf32_output); + result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output); res.count += pos; return res; } @@ -14762,12 +14825,12 @@ using namespace simd; } } if(errors()) { - result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(in + pos, size - pos, utf32_output); + result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output); res.count += pos; return res; } if(pos < size) { - result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(in + pos, size - pos, utf32_output); + result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output); if (res.error) { // In case of error, we want the error position res.count += pos; return res; @@ -15617,7 +15680,7 @@ simdutf_warn_unused size_t implementation::utf16_length_from_utf32(const char32_ } simdutf_warn_unused size_t implementation::utf32_length_from_utf8(const char * input, size_t length) const noexcept { - return scalar::utf8::utf32_length_from_utf8(input, length); + return scalar::utf8::count_code_points(input, length); } } // namespace fallback @@ -16766,7 +16829,9 @@ simdutf::result fast_avx512_convert_utf8_to_utf16_with_errors(const char *in, si } else { break; } } if(!result) { - simdutf::result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors(in, final_in - in, out); + // rewind_and_convert_with_errors will seek a potential error from in onward, + // with the ability to go back up to in - init_in bytes, and read final_in - in bytes forward. + simdutf::result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors(in - init_in, in, final_in - in, out); res.count += (in - init_in); return res; } else { @@ -17762,7 +17827,7 @@ std::pair avx512_convert_utf32_to_utf16_with_errors(const cha /* begin file src/icelake/icelake_ascii_validation.inl.cpp */ // file included directly -const char* validate_ascii(const char* buf, size_t len) { +bool validate_ascii(const char* buf, size_t len) { const char* end = buf + len; const __m512i ascii = _mm512_set1_epi8((uint8_t)0x80); __m512i running_or = _mm512_setzero_si512(); @@ -17770,11 +17835,11 @@ const char* validate_ascii(const char* buf, size_t len) { const __m512i utf8 = _mm512_loadu_si512((const __m512i*)buf); running_or = _mm512_ternarylogic_epi32(running_or, utf8, ascii, 0xf8); // running_or | (utf8 & ascii) } - if (_mm512_test_epi8_mask(running_or, running_or) != 0) { - return nullptr; - } else { - return buf; + if(buf < end) { + const __m512i utf8 = _mm512_maskz_loadu_epi8((uint64_t(1) << (end-buf)) - 1,(const __m512i*)buf); + running_or = _mm512_ternarylogic_epi32(running_or, utf8, ascii, 0xf8); // running_or | (utf8 & ascii) } + return (_mm512_test_epi8_mask(running_or, running_or) == 0); } /* end file src/icelake/icelake_ascii_validation.inl.cpp */ // dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=icelake/icelake_utf32_validation.inl.cpp @@ -18197,12 +18262,7 @@ simdutf_warn_unused result implementation::validate_utf8_with_errors(const char } simdutf_warn_unused bool implementation::validate_ascii(const char *buf, size_t len) const noexcept { - const char* tail = icelake::validate_ascii(buf, len); - if (tail) { - return scalar::ascii::validate(tail, len - (tail - buf)); - } else { - return false; - } + return icelake::validate_ascii(buf, len); } simdutf_warn_unused result implementation::validate_ascii_with_errors(const char *buf, size_t len) const noexcept { @@ -18576,7 +18636,10 @@ simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors(con uint32_t * utf32_output = reinterpret_cast(utf32); auto ret = icelake::validating_utf8_to_fixed_length_with_constant_checks(buf, len, utf32_output); if (!std::get<2>(ret)) { - result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(std::get<0>(ret), len - (std::get<0>(ret) - buf), reinterpret_cast(std::get<1>(ret))); + auto new_buf = std::get<0>(ret); + // rewind_and_convert_with_errors will seek a potential error from new_buf onward, + // with the ability to go back up to new_buf - buf bytes, and read len - (new_buf - buf) bytes forward. + result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(new_buf - buf, new_buf, len - (new_buf - buf), reinterpret_cast(std::get<1>(ret))); res.count += (std::get<0>(ret) - buf); return res; } @@ -21978,7 +22041,9 @@ using namespace simd; this->check_utf8_bytes(input.chunks[3], input.chunks[2]); } if (errors()) { - result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors(in + pos, size - pos, utf16_output); + // rewind_and_convert_with_errors will seek a potential error from in+pos onward, + // with the ability to go back up to pos bytes, and read size-pos bytes forward. + result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf16_output); res.count += pos; return res; } @@ -22012,12 +22077,16 @@ using namespace simd; } } if(errors()) { - result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors(in + pos, size - pos, utf16_output); + // rewind_and_convert_with_errors will seek a potential error from in+pos onward, + // with the ability to go back up to pos bytes, and read size-pos bytes forward. + result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf16_output); res.count += pos; return res; } if(pos < size) { - result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors(in + pos, size - pos, utf16_output); + // rewind_and_convert_with_errors will seek a potential error from in+pos onward, + // with the ability to go back up to pos bytes, and read size-pos bytes forward. + result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf16_output); if (res.error) { // In case of error, we want the error position res.count += pos; return res; @@ -22301,7 +22370,7 @@ using namespace simd; this->check_utf8_bytes(input.chunks[3], input.chunks[2]); } if (errors()) { - result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(in + pos, size - pos, utf32_output); + result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output); res.count += pos; return res; } @@ -22335,12 +22404,12 @@ using namespace simd; } } if(errors()) { - result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(in + pos, size - pos, utf32_output); + result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output); res.count += pos; return res; } if(pos < size) { - result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(in + pos, size - pos, utf32_output); + result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output); if (res.error) { // In case of error, we want the error position res.count += pos; return res; @@ -22931,7 +23000,7 @@ simdutf_warn_unused size_t implementation::utf16_length_from_utf32(const char32_ } simdutf_warn_unused size_t implementation::utf32_length_from_utf8(const char * input, size_t length) const noexcept { - return utf8::utf32_length_from_utf8(input, length); + return scalar::utf8::count_code_points(input, length); } } // namespace haswell @@ -23701,7 +23770,9 @@ using namespace simd; this->check_utf8_bytes(input.chunks[3], input.chunks[2]); } if (errors()) { - result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors(in + pos, size - pos, utf16_output); + // rewind_and_convert_with_errors will seek a potential error from in+pos onward, + // with the ability to go back up to pos bytes, and read size-pos bytes forward. + result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf16_output); res.count += pos; return res; } @@ -23735,12 +23806,16 @@ using namespace simd; } } if(errors()) { - result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors(in + pos, size - pos, utf16_output); + // rewind_and_convert_with_errors will seek a potential error from in+pos onward, + // with the ability to go back up to pos bytes, and read size-pos bytes forward. + result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf16_output); res.count += pos; return res; } if(pos < size) { - result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors(in + pos, size - pos, utf16_output); + // rewind_and_convert_with_errors will seek a potential error from in+pos onward, + // with the ability to go back up to pos bytes, and read size-pos bytes forward. + result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf16_output); if (res.error) { // In case of error, we want the error position res.count += pos; return res; @@ -24024,7 +24099,7 @@ using namespace simd; this->check_utf8_bytes(input.chunks[3], input.chunks[2]); } if (errors()) { - result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(in + pos, size - pos, utf32_output); + result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output); res.count += pos; return res; } @@ -24058,12 +24133,12 @@ using namespace simd; } } if(errors()) { - result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(in + pos, size - pos, utf32_output); + result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output); res.count += pos; return res; } if(pos < size) { - result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(in + pos, size - pos, utf32_output); + result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output); if (res.error) { // In case of error, we want the error position res.count += pos; return res; @@ -24425,7 +24500,7 @@ simdutf_warn_unused size_t implementation::utf16_length_from_utf32(const char32_ } simdutf_warn_unused size_t implementation::utf32_length_from_utf8(const char * input, size_t length) const noexcept { - return scalar::utf8::utf32_length_from_utf8(input, length); + return scalar::utf8::count_code_points(input, length); } } // namespace ppc64 @@ -27296,7 +27371,9 @@ using namespace simd; this->check_utf8_bytes(input.chunks[3], input.chunks[2]); } if (errors()) { - result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors(in + pos, size - pos, utf16_output); + // rewind_and_convert_with_errors will seek a potential error from in+pos onward, + // with the ability to go back up to pos bytes, and read size-pos bytes forward. + result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf16_output); res.count += pos; return res; } @@ -27330,12 +27407,16 @@ using namespace simd; } } if(errors()) { - result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors(in + pos, size - pos, utf16_output); + // rewind_and_convert_with_errors will seek a potential error from in+pos onward, + // with the ability to go back up to pos bytes, and read size-pos bytes forward. + result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf16_output); res.count += pos; return res; } if(pos < size) { - result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors(in + pos, size - pos, utf16_output); + // rewind_and_convert_with_errors will seek a potential error from in+pos onward, + // with the ability to go back up to pos bytes, and read size-pos bytes forward. + result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf16_output); if (res.error) { // In case of error, we want the error position res.count += pos; return res; @@ -27619,7 +27700,7 @@ using namespace simd; this->check_utf8_bytes(input.chunks[3], input.chunks[2]); } if (errors()) { - result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(in + pos, size - pos, utf32_output); + result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output); res.count += pos; return res; } @@ -27653,12 +27734,12 @@ using namespace simd; } } if(errors()) { - result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(in + pos, size - pos, utf32_output); + result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output); res.count += pos; return res; } if(pos < size) { - result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(in + pos, size - pos, utf32_output); + result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output); if (res.error) { // In case of error, we want the error position res.count += pos; return res; @@ -28253,7 +28334,7 @@ simdutf_warn_unused size_t implementation::utf16_length_from_utf32(const char32_ } simdutf_warn_unused size_t implementation::utf32_length_from_utf8(const char * input, size_t length) const noexcept { - return utf8::utf32_length_from_utf8(input, length); + return scalar::utf8::count_code_points(input, length); } } // namespace westmere diff --git a/deps/simdutf/simdutf.h b/deps/simdutf/simdutf.h index cf236fe3d1c8a9..0a57a69f732ca6 100644 --- a/deps/simdutf/simdutf.h +++ b/deps/simdutf/simdutf.h @@ -1,4 +1,4 @@ -/* auto-generated on 2023-01-18 12:43:26 -0500. Do not edit! */ +/* auto-generated on 2023-02-10 14:42:58 -0500. Do not edit! */ // dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/include, filename=simdutf.h /* begin file include/simdutf.h */ #ifndef SIMDUTF_H @@ -538,7 +538,7 @@ enum error_code { HEADER_BITS, // Any byte must have fewer than 5 header bits. TOO_SHORT, // The leading byte must be followed by N-1 continuation bytes, where N is the UTF-8 character length // This is also the error when the input is truncated. - TOO_LONG, // The leading byte must not be a continuation byte. + TOO_LONG, // We either have too many consecutive continuation bytes or the string starts with a continuation byte. OVERLONG, // The decoded character must be above U+7F for two-byte characters, U+7FF for three-byte characters, // and U+FFFF for four-byte characters. TOO_LARGE, // The decoded character must be less than or equal to U+10FFFF OR less than or equal than U+7F for ASCII. @@ -572,7 +572,7 @@ SIMDUTF_DISABLE_UNDESIRED_WARNINGS #define SIMDUTF_SIMDUTF_VERSION_H /** The version of simdutf being used (major.minor.revision) */ -#define SIMDUTF_VERSION "3.1.0" +#define SIMDUTF_VERSION "3.2.0" namespace simdutf { enum { @@ -583,7 +583,7 @@ enum { /** * The minor version (major.MINOR.revision) of simdutf being used. */ - SIMDUTF_VERSION_MINOR = 1, + SIMDUTF_VERSION_MINOR = 2, /** * The revision (major.minor.REVISION) of simdutf being used. */ @@ -2724,7 +2724,7 @@ class detect_best_supported_implementation_on_first_use; } // namespace internal /** - * The list of available implementations compiled into simdjson. + * The list of available implementations compiled into simdutf. */ extern SIMDUTF_DLLIMPORTEXPORT const internal::available_implementation_list& get_available_implementations();