From 0bc4c17e5705ea042cda392343d7301bb91873ed Mon Sep 17 00:00:00 2001 From: Yagiz Nizipli Date: Fri, 27 Jan 2023 15:49:13 -0500 Subject: [PATCH] deps: add ada as a dependency PR-URL: https://github.com/nodejs/node/pull/46410 Backport-PR-URL: https://github.com/nodejs/node/pull/47435 Reviewed-By: James M Snell Reviewed-By: Benjamin Gruenbaum Reviewed-By: Tiancheng "Timothy" Gu Reviewed-By: Matteo Collina Reviewed-By: Rafael Gonzaga Reviewed-By: Robert Nagy --- .github/workflows/tools.yml | 10 + LICENSE | 22 + Makefile | 2 +- deps/ada/README.md | 12 + deps/ada/ada.cpp | 2504 +++++++++++++++++ deps/ada/ada.gyp | 32 + deps/ada/ada.h | 4388 ++++++++++++++++++++++++++++++ node.gyp | 4 + tools/dep_updaters/update-ada.sh | 52 + tools/license-builder.sh | 2 + 10 files changed, 7027 insertions(+), 1 deletion(-) create mode 100644 deps/ada/README.md create mode 100644 deps/ada/ada.cpp create mode 100644 deps/ada/ada.gyp create mode 100644 deps/ada/ada.h create mode 100644 tools/dep_updaters/update-ada.sh diff --git a/.github/workflows/tools.yml b/.github/workflows/tools.yml index f5091a872a6b7e..52dc634ee846a2 100644 --- a/.github/workflows/tools.yml +++ b/.github/workflows/tools.yml @@ -161,6 +161,16 @@ jobs: cat temp-output tail -n1 temp-output | grep "NEW_VERSION=" >> "$GITHUB_ENV" || true rm temp-output + - id: ada + subsystem: deps + label: dependencies + run: | + NEW_VERSION=$(gh api repos/ada-url/ada/releases/latest -q '.tag_name|ltrimstr("v")') + CURRENT_VERSION=$(grep "#define ADA_VERSION" ./deps/ada/ada.h | sed -n "s/^.*VERSION \(.*\)/\1/p") + if [ "$NEW_VERSION" != "$CURRENT_VERSION" ]; then + echo "NEW_VERSION=$NEW_VERSION" >> $GITHUB_ENV + ./tools/dep_updaters/update-ada.sh "$NEW_VERSION" + fi steps: - uses: actions/checkout@v3 with: diff --git a/LICENSE b/LICENSE index f8fa687202dcb9..26221cb042bdd1 100644 --- a/LICENSE +++ b/LICENSE @@ -1338,6 +1338,28 @@ The externally maintained libraries used by Node.js are: CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ +- ada, located at deps/ada, is licensed as follows: + """ + Copyright 2023 Ada authors + + Permission is hereby granted, free of charge, to any person obtaining a copy of + this software and associated documentation files (the "Software"), to deal in + the Software without restriction, including without limitation the rights to + use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + the Software, and to permit persons to whom the Software is furnished to do so, + subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + """ + - npm, located at deps/npm, is licensed as follows: """ The npm application diff --git a/Makefile b/Makefile index a6485b87f69767..0be0659d372d2d 100644 --- a/Makefile +++ b/Makefile @@ -170,7 +170,7 @@ with-code-cache test-code-cache: out/Makefile: config.gypi common.gypi node.gyp \ deps/uv/uv.gyp deps/llhttp/llhttp.gyp deps/zlib/zlib.gyp \ - deps/simdutf/simdutf.gyp \ + deps/simdutf/simdutf.gyp deps/ada/ada.gyp \ tools/v8_gypfiles/toolchain.gypi tools/v8_gypfiles/features.gypi \ tools/v8_gypfiles/inspector.gypi tools/v8_gypfiles/v8.gyp $(PYTHON) tools/gyp_node.py -f make diff --git a/deps/ada/README.md b/deps/ada/README.md new file mode 100644 index 00000000000000..018262c9519f0a --- /dev/null +++ b/deps/ada/README.md @@ -0,0 +1,12 @@ +# ada + +This project implements WHATWG URL specification in a performant way. + +The source is pulled from: https://github.com/ada-url/ada + +Active development occurs in the default branch (currently named `main`). + +## Updating + +See [tools/dep_updaters/README.md#ada](../../tools/dep_updaters/README.md#ada) +for instructions. diff --git a/deps/ada/ada.cpp b/deps/ada/ada.cpp new file mode 100644 index 00000000000000..e09f93bb225c60 --- /dev/null +++ b/deps/ada/ada.cpp @@ -0,0 +1,2504 @@ +/* auto-generated on 2023-02-06 08:25:59 -0500. Do not edit! */ +// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/src, filename=ada.cpp +/* begin file src/ada.cpp */ +#include "ada.h" +// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/src, filename=checkers.cpp +/* begin file src/checkers.cpp */ +#include + +namespace ada::checkers { + + ada_really_inline ada_constexpr bool is_ipv4(std::string_view view) noexcept { + size_t last_dot = view.rfind('.'); + if(last_dot == view.size() - 1) { + view.remove_suffix(1); + last_dot = view.rfind('.'); + } + std::string_view number = (last_dot == std::string_view::npos) ? view : view.substr(last_dot+1); + if(number.empty()) { return false; } + /** Optimization opportunity: we have basically identified the last number of the + ipv4 if we return true here. We might as well parse it and have at least one + number parsed when we get to parse_ipv4. */ + if(std::all_of(number.begin(), number.end(), ada::checkers::is_digit)) { return true; } + return (checkers::has_hex_prefix(number) && std::all_of(number.begin()+2, number.end(), ada::unicode::is_lowercase_hex)); + } + + + // for use with path_signature + static constexpr uint8_t path_signature_table[256] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + + ada_really_inline constexpr uint8_t path_signature(std::string_view input) noexcept { + size_t i = 0; + uint8_t accumulator{}; + for (; i + 7 < input.size(); i += 8) { + accumulator |= uint8_t(path_signature_table[uint8_t(input[i])] | + path_signature_table[uint8_t(input[i + 1])] | + path_signature_table[uint8_t(input[i + 2])] | + path_signature_table[uint8_t(input[i + 3])] | + path_signature_table[uint8_t(input[i + 4])] | + path_signature_table[uint8_t(input[i + 5])] | + path_signature_table[uint8_t(input[i + 6])] | + path_signature_table[uint8_t(input[i + 7])]); + } + for (; i < input.size(); i++) { + accumulator |= path_signature_table[uint8_t(input[i])]; + } + return accumulator; + } + + + ada_really_inline constexpr bool verify_dns_length(std::string_view input) noexcept { + if(input.back() == '.') { + if(input.size() > 254) return false; + } else if (input.size() > 253) return false; + + size_t start = 0; + while (start < input.size()) { + auto dot_location = input.find('.', start); + // If not found, it's likely the end of the domain + if(dot_location == std::string_view::npos) dot_location = input.size(); + + auto label_size = dot_location - start; + if (label_size > 63 || label_size == 0) return false; + + start = dot_location + 1; + } + + return true; + } +} // namespace ada::checkers +/* end file src/checkers.cpp */ +// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/src, filename=unicode.cpp +/* begin file src/unicode.cpp */ + +#include +#if ADA_HAS_ICU +// We are good. +#else + +#if defined(_WIN32) && ADA_WINDOWS_TO_ASCII_FALLBACK + +#ifndef __wtypes_h__ +#include +#endif // __wtypes_h__ + +#ifndef __WINDEF_ +#include +#endif // __WINDEF_ + +#include +#endif //defined(_WIN32) && ADA_WINDOWS_TO_ASCII_FALLBACK + +#endif // ADA_HAS_ICU + +namespace ada::unicode { + + constexpr bool to_lower_ascii(char * input, size_t length) noexcept { + auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; }; + uint64_t broadcast_80 = broadcast(0x80); + uint64_t broadcast_Ap = broadcast(128 - 'A'); + uint64_t broadcast_Zp = broadcast(128 - 'Z'); + uint64_t non_ascii = 0; + size_t i = 0; + + for (; i + 7 < length; i += 8) { + uint64_t word{}; + memcpy(&word, input + i, sizeof(word)); + non_ascii |= (word & broadcast_80); + word ^= (((word+broadcast_Ap)^(word+broadcast_Zp))&broadcast_80)>>2; + memcpy(input + i, &word, sizeof(word)); + } + if (i < length) { + uint64_t word{}; + memcpy(&word, input + i, length - i); + non_ascii |= (word & broadcast_80); + word ^= (((word+broadcast_Ap)^(word+broadcast_Zp))&broadcast_80)>>2; + memcpy(input + i, &word, length - i); + } + return non_ascii == 0; + } + + ada_really_inline constexpr bool has_tabs_or_newline(std::string_view user_input) noexcept { + auto has_zero_byte = [](uint64_t v) { + return ((v - 0x0101010101010101) & ~(v)&0x8080808080808080); + }; + auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; }; + size_t i = 0; + uint64_t mask1 = broadcast('\r'); + uint64_t mask2 = broadcast('\n'); + uint64_t mask3 = broadcast('\t'); + uint64_t running{0}; + for (; i + 7 < user_input.size(); i += 8) { + uint64_t word{}; + memcpy(&word, user_input.data() + i, sizeof(word)); + uint64_t xor1 = word ^ mask1; + uint64_t xor2 = word ^ mask2; + uint64_t xor3 = word ^ mask3; + running |= has_zero_byte(xor1) | has_zero_byte(xor2) | has_zero_byte(xor3); + } + if (i < user_input.size()) { + uint64_t word{}; + memcpy(&word, user_input.data() + i, user_input.size() - i); + uint64_t xor1 = word ^ mask1; + uint64_t xor2 = word ^ mask2; + uint64_t xor3 = word ^ mask3; + running |= has_zero_byte(xor1) | has_zero_byte(xor2) | has_zero_byte(xor3); + } + return running; + } + + // A forbidden host code point is U+0000 NULL, U+0009 TAB, U+000A LF, U+000D CR, U+0020 SPACE, U+0023 (#), + // U+002F (/), U+003A (:), U+003C (<), U+003E (>), U+003F (?), U+0040 (@), U+005B ([), U+005C (\), U+005D (]), + // U+005E (^), or U+007C (|). + constexpr static bool is_forbidden_host_code_point_table[] = { + 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + static_assert(sizeof(is_forbidden_host_code_point_table) == 256); + + ada_really_inline constexpr bool is_forbidden_host_code_point(const char c) noexcept { + return is_forbidden_host_code_point_table[uint8_t(c)]; + } + + static_assert(unicode::is_forbidden_host_code_point('\0')); + static_assert(unicode::is_forbidden_host_code_point('\t')); + static_assert(unicode::is_forbidden_host_code_point('\n')); + static_assert(unicode::is_forbidden_host_code_point('\r')); + static_assert(unicode::is_forbidden_host_code_point(' ')); + static_assert(unicode::is_forbidden_host_code_point('#')); + static_assert(unicode::is_forbidden_host_code_point('/')); + static_assert(unicode::is_forbidden_host_code_point(':')); + static_assert(unicode::is_forbidden_host_code_point('?')); + static_assert(unicode::is_forbidden_host_code_point('@')); + static_assert(unicode::is_forbidden_host_code_point('[')); + static_assert(unicode::is_forbidden_host_code_point('?')); + static_assert(unicode::is_forbidden_host_code_point('<')); + static_assert(unicode::is_forbidden_host_code_point('>')); + static_assert(unicode::is_forbidden_host_code_point('\\')); + static_assert(unicode::is_forbidden_host_code_point(']')); + static_assert(unicode::is_forbidden_host_code_point('^')); + static_assert(unicode::is_forbidden_host_code_point('|')); + +constexpr static uint8_t is_forbidden_domain_code_point_table[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + + static_assert(sizeof(is_forbidden_domain_code_point_table) == 256); + + ada_really_inline constexpr bool is_forbidden_domain_code_point(const char c) noexcept { + return is_forbidden_domain_code_point_table[uint8_t(c)]; + } + + ada_really_inline constexpr bool contains_forbidden_domain_code_point(char * input, size_t length) noexcept { + size_t i = 0; + uint8_t accumulator{}; + for(; i + 4 <= length; i+=4) { + accumulator |= is_forbidden_domain_code_point_table[uint8_t(input[i])]; + accumulator |= is_forbidden_domain_code_point_table[uint8_t(input[i+1])]; + accumulator |= is_forbidden_domain_code_point_table[uint8_t(input[i+2])]; + accumulator |= is_forbidden_domain_code_point_table[uint8_t(input[i+3])]; + } + for(; i < length; i++) { + accumulator |= is_forbidden_domain_code_point_table[uint8_t(input[i])]; + } + return accumulator; + } + + static_assert(unicode::is_forbidden_domain_code_point('%')); + static_assert(unicode::is_forbidden_domain_code_point('\x7f')); + static_assert(unicode::is_forbidden_domain_code_point('\0')); + static_assert(unicode::is_forbidden_domain_code_point('\t')); + static_assert(unicode::is_forbidden_domain_code_point('\n')); + static_assert(unicode::is_forbidden_domain_code_point('\r')); + static_assert(unicode::is_forbidden_domain_code_point(' ')); + static_assert(unicode::is_forbidden_domain_code_point('#')); + static_assert(unicode::is_forbidden_domain_code_point('/')); + static_assert(unicode::is_forbidden_domain_code_point(':')); + static_assert(unicode::is_forbidden_domain_code_point('?')); + static_assert(unicode::is_forbidden_domain_code_point('@')); + static_assert(unicode::is_forbidden_domain_code_point('[')); + static_assert(unicode::is_forbidden_domain_code_point('?')); + static_assert(unicode::is_forbidden_domain_code_point('<')); + static_assert(unicode::is_forbidden_domain_code_point('>')); + static_assert(unicode::is_forbidden_domain_code_point('\\')); + static_assert(unicode::is_forbidden_domain_code_point(']')); + static_assert(unicode::is_forbidden_domain_code_point('^')); + static_assert(unicode::is_forbidden_domain_code_point('|')); + + constexpr static bool is_alnum_plus_table[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + + static_assert(sizeof(is_alnum_plus_table) == 256); + + ada_really_inline constexpr bool is_alnum_plus(const char c) noexcept { + return is_alnum_plus_table[uint8_t(c)]; + // A table is almost surely much faster than the + // following under most compilers: return + // return (std::isalnum(c) || c == '+' || c == '-' || c == '.'); + } + static_assert(unicode::is_alnum_plus('+')); + static_assert(unicode::is_alnum_plus('-')); + static_assert(unicode::is_alnum_plus('.')); + static_assert(unicode::is_alnum_plus('0')); + static_assert(unicode::is_alnum_plus('1')); + static_assert(unicode::is_alnum_plus('a')); + static_assert(unicode::is_alnum_plus('b')); + + ada_really_inline constexpr bool is_ascii_hex_digit(const char c) noexcept { + return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c<= 'f'); + } + + ada_really_inline constexpr bool is_c0_control_or_space(const char c) noexcept { + return (unsigned char) c <= ' '; + } + + ada_really_inline constexpr bool is_ascii_tab_or_newline(const char c) noexcept { + return c == '\t' || c == '\n' || c == '\r'; + } + + constexpr std::string_view table_is_double_dot_path_segment[] = {"..", "%2e.", ".%2e", "%2e%2e"}; + + ada_really_inline ada_constexpr bool is_double_dot_path_segment(std::string_view input) noexcept { + // This will catch most cases: + // The length must be 2,4 or 6. + // We divide by two and require + // that the result be between 1 and 3 inclusively. + uint64_t half_length = uint64_t(input.size())/2; + if(half_length - 1 > 2) { return false; } + // We have a string of length 2, 4 or 6. + // We now check the first character: + if((input[0] != '.') && (input[0] != '%')) { return false; } + // We are unlikely the get beyond this point. + int hash_value = (input.size() + (unsigned)(input[0])) & 3; + const std::string_view target = table_is_double_dot_path_segment[hash_value]; + if(target.size() != input.size()) { return false; } + // We almost never get here. + // Optimizing the rest is relatively unimportant. + auto prefix_equal_unsafe = [](std::string_view a, std::string_view b) { + uint16_t A, B; + memcpy(&A,a.data(), sizeof(A)); + memcpy(&B,b.data(), sizeof(B)); + return A == B; + }; + if(!prefix_equal_unsafe(input,target)) { return false; } + for(size_t i = 2; i < input.size(); i++) { + char c = input[i]; + if((uint8_t((c|0x20) - 0x61) <= 25 ? (c|0x20) : c) != target[i]) { return false; } + } + return true; + // The above code might be a bit better than the code below. Compilers + // are not stupid and may use the fact that these strings have length 2,4 and 6 + // and other tricks. + //return input == ".." || + // input == ".%2e" || input == ".%2E" || + // input == "%2e." || input == "%2E." || + // input == "%2e%2e" || input == "%2E%2E" || input == "%2E%2e" || input == "%2e%2E"; + } + + ada_really_inline constexpr bool is_single_dot_path_segment(std::string_view input) noexcept { + return input == "." || input == "%2e" || input == "%2E"; + } + + ada_really_inline constexpr bool is_lowercase_hex(const char c) noexcept { + return (c >= '0' && c <= '9') || (c >= 'a' && c<= 'f'); + } + + unsigned constexpr convert_hex_to_binary(const char c) noexcept { + // this code can be optimized. + if (c <= '9') { return c - '0'; } + char del = c >= 'a' ? 'a' : 'A'; + return 10 + (c - del); + } + + std::string percent_decode(const std::string_view input, size_t first_percent) { + // next line is for safety only, we expect users to avoid calling percent_decode + // when first_percent is outside the range. + if(first_percent == std::string_view::npos) { return std::string(input); } + std::string dest(input.substr(0, first_percent)); + dest.reserve(input.length()); + const char* pointer = input.data() + first_percent; + const char* end = input.data() + input.size(); + // Optimization opportunity: if the following code gets + // called often, it can be optimized quite a bit. + while (pointer < end) { + const char ch = pointer[0]; + size_t remaining = end - pointer - 1; + if (ch != '%' || remaining < 2 || + (//ch == '%' && // It is unnecessary to check that ch == '%'. + (!is_ascii_hex_digit(pointer[1]) || + !is_ascii_hex_digit(pointer[2])))) { + dest += ch; + pointer++; + continue; + } else { + unsigned a = convert_hex_to_binary(pointer[1]); + unsigned b = convert_hex_to_binary(pointer[2]); + char c = static_cast(a * 16 + b); + dest += c; + pointer += 3; + } + } + return dest; + } + + std::string percent_encode(const std::string_view input, const uint8_t character_set[]) { + auto pointer = std::find_if(input.begin(), input.end(), [character_set](const char c) { + return character_sets::bit_at(character_set, c); + }); + // Optimization: Don't iterate if percent encode is not required + if (pointer == input.end()) { return std::string(input); } + + std::string result(input.substr(0,std::distance(input.begin(), pointer))); + result.reserve(input.length()); // in the worst case, percent encoding might produce 3 characters. + + for (;pointer != input.end(); pointer++) { + if (character_sets::bit_at(character_set, *pointer)) { + result.append(character_sets::hex + uint8_t(*pointer) * 4, 3); + } else { + result += *pointer; + } + } + + return result; + } + + + bool percent_encode(const std::string_view input, const uint8_t character_set[], std::string &out) { + auto pointer = std::find_if(input.begin(), input.end(), [character_set](const char c) { + return character_sets::bit_at(character_set, c); + }); + // Optimization: Don't iterate if percent encode is not required + if (pointer == input.end()) { return false; } + out.clear(); + out.append(input.data(), std::distance(input.begin(), pointer)); + + for (;pointer != input.end(); pointer++) { + if (character_sets::bit_at(character_set, *pointer)) { + out.append(character_sets::hex + uint8_t(*pointer) * 4, 3); + } else { + out += *pointer; + } + } + return true; + } + + bool to_ascii(std::optional& out, const std::string_view plain, const bool be_strict, size_t first_percent) { + std::string percent_decoded_buffer; + std::string_view input = plain; + if(first_percent != std::string_view::npos) { + percent_decoded_buffer = unicode::percent_decode(plain, first_percent); + input = percent_decoded_buffer; + } +#if ADA_HAS_ICU + out = std::string(255, 0); + + UErrorCode status = U_ZERO_ERROR; + uint32_t options = UIDNA_CHECK_BIDI | UIDNA_CHECK_CONTEXTJ | UIDNA_NONTRANSITIONAL_TO_ASCII; + + if (be_strict) { + options |= UIDNA_USE_STD3_RULES; + } + + UIDNA* uidna = uidna_openUTS46(options, &status); + if (U_FAILURE(status)) { + return false; + } + + UIDNAInfo info = UIDNA_INFO_INITIALIZER; + // RFC 1035 section 2.3.4. + // The domain name must be at most 255 octets. + // It cannot contain a label longer than 63 octets. + // Thus we should never need more than 255 octets, if we + // do the domain name is in error. + int32_t length = uidna_nameToASCII_UTF8(uidna, + input.data(), + int32_t(input.length()), + out.value().data(), 255, + &info, + &status); + + if (status == U_BUFFER_OVERFLOW_ERROR) { + status = U_ZERO_ERROR; + out.value().resize(length); + // When be_strict is true, this should not be allowed! + length = uidna_nameToASCII_UTF8(uidna, + input.data(), + int32_t(input.length()), + out.value().data(), length, + &info, + &status); + } + + // A label contains hyphen-minus ('-') in the third and fourth positions. + info.errors &= ~UIDNA_ERROR_HYPHEN_3_4; + // A label starts with a hyphen-minus ('-'). + info.errors &= ~UIDNA_ERROR_LEADING_HYPHEN; + // A label ends with a hyphen-minus ('-'). + info.errors &= ~UIDNA_ERROR_TRAILING_HYPHEN; + + if (!be_strict) { // This seems to violate RFC 1035 section 2.3.4. + // A non-final domain name label (or the whole domain name) is empty. + info.errors &= ~UIDNA_ERROR_EMPTY_LABEL; + // A domain name label is longer than 63 bytes. + info.errors &= ~UIDNA_ERROR_LABEL_TOO_LONG; + // A domain name is longer than 255 bytes in its storage form. + info.errors &= ~UIDNA_ERROR_DOMAIN_NAME_TOO_LONG; + } + + uidna_close(uidna); + + if (U_FAILURE(status) || info.errors != 0 || length == 0) { + out = std::nullopt; + return false; + } + out.value().resize(length); // we possibly want to call :shrink_to_fit otherwise we use 255 bytes. + out.value().shrink_to_fit(); +#elif defined(_WIN32) && ADA_WINDOWS_TO_ASCII_FALLBACK + (void)be_strict; // unused. + // Fallback on the system if ICU is not available. + // Windows function assumes UTF-16. + std::unique_ptr buffer(new char16_t[input.size()]); + auto convert = [](const char* buf, size_t len, char16_t* utf16_output) { + const uint8_t *data = reinterpret_cast(buf); + size_t pos = 0; + char16_t* start{utf16_output}; + while (pos < len) { + // try to convert the next block of 16 ASCII bytes + if (pos + 16 <= len) { // if it is safe to read 16 more bytes, check that they are ascii + uint64_t v1; + ::memcpy(&v1, data + pos, sizeof(uint64_t)); + uint64_t v2; + ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); + uint64_t v{v1 | v2}; + if ((v & 0x8080808080808080) == 0) { + size_t final_pos = pos + 16; + while(pos < final_pos) { + *utf16_output++ = char16_t(buf[pos]); + pos++; + } + continue; + } + } + uint8_t leading_byte = data[pos]; // leading byte + if (leading_byte < 0b10000000) { + // converting one ASCII byte !!! + *utf16_output++ = char16_t(leading_byte); + pos++; + } else if ((leading_byte & 0b11100000) == 0b11000000) { + // We have a two-byte UTF-8, it should become + // a single UTF-16 word. + if(pos + 1 >= len) { return 0; } // minimal bound checking + if ((data[pos + 1] & 0b11000000) != 0b10000000) { return 0; } + // range check + uint32_t code_point = (leading_byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); + if (code_point < 0x80 || 0x7ff < code_point) { return 0; } + *utf16_output++ = char16_t(code_point); + pos += 2; + } else if ((leading_byte & 0b11110000) == 0b11100000) { + // We have a three-byte UTF-8, it should become + // a single UTF-16 word. + if(pos + 2 >= len) { return 0; } // minimal bound checking + + if ((data[pos + 1] & 0b11000000) != 0b10000000) { return 0; } + if ((data[pos + 2] & 0b11000000) != 0b10000000) { return 0; } + // range check + uint32_t code_point = (leading_byte & 0b00001111) << 12 | + (data[pos + 1] & 0b00111111) << 6 | + (data[pos + 2] & 0b00111111); + if (code_point < 0x800 || 0xffff < code_point || + (0xd7ff < code_point && code_point < 0xe000)) { + return 0; + } + *utf16_output++ = char16_t(code_point); + pos += 3; + } else if ((leading_byte & 0b11111000) == 0b11110000) { // 0b11110000 + // we have a 4-byte UTF-8 word. + if(pos + 3 >= len) { return 0; } // minimal bound checking + if ((data[pos + 1] & 0b11000000) != 0b10000000) { return 0; } + if ((data[pos + 2] & 0b11000000) != 0b10000000) { return 0; } + if ((data[pos + 3] & 0b11000000) != 0b10000000) { return 0; } + + // range check + uint32_t code_point = + (leading_byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 | + (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111); + if (code_point <= 0xffff || 0x10ffff < code_point) { return 0; } + code_point -= 0x10000; + uint16_t high_surrogate = uint16_t(0xD800 + (code_point >> 10)); + uint16_t low_surrogate = uint16_t(0xDC00 + (code_point & 0x3FF)); + *utf16_output++ = char16_t(high_surrogate); + *utf16_output++ = char16_t(low_surrogate); + pos += 4; + } else { + return 0; + } + } + return int(utf16_output - start); + }; + size_t codepoints = convert(input.data(), input.size(), buffer.get()); + if(codepoints == 0) { + out = std::nullopt; + return false; + } + int required_buffer_size = IdnToAscii(IDN_ALLOW_UNASSIGNED, (LPCWSTR)buffer.get(), codepoints, NULL, 0); + + if(required_buffer_size == 0) { + out = std::nullopt; + return false; + } + + out = std::string(required_buffer_size, 0); + std::unique_ptr ascii_buffer(new char16_t[required_buffer_size]); + + required_buffer_size = IdnToAscii(IDN_ALLOW_UNASSIGNED, (LPCWSTR)buffer.get(), codepoints, (LPWSTR)ascii_buffer.get(), required_buffer_size); + if(required_buffer_size == 0) { + out = std::nullopt; + return false; + } + // This will not validate the punycode, so let us work it in reverse. + int test_reverse = IdnToUnicode(IDN_ALLOW_UNASSIGNED, (LPCWSTR)ascii_buffer.get(), required_buffer_size, NULL, 0); + if(test_reverse == 0) { + out = std::nullopt; + return false; + } + out = std::string(required_buffer_size, 0); + for(size_t i = 0; i < required_buffer_size; i++) { (*out)[i] = char(ascii_buffer.get()[i]); } +#else + (void)be_strict; // unused. + out = input; // We cannot do much more for now. +#endif + return true; + } + +} // namespace ada::unicode +/* end file src/unicode.cpp */ +// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/src, filename=serializers.cpp +/* begin file src/serializers.cpp */ + +#include +#include + +namespace ada::serializers { + + void find_longest_sequence_of_ipv6_pieces(const std::array& address, size_t& compress, size_t& compress_length) noexcept { + for (size_t i = 0; i < 8; i++) { + if (address[i] == 0) { + size_t next = i + 1; + while (next != 8 && address[next] == 0) ++next; + const size_t count = next - i; + if (compress_length < count) { + compress_length = count; + compress = i; + if (next == 8) break; + i = next; + } + } + } + } + + std::string ipv6(const std::array& address) noexcept { + size_t compress_length = 0; + size_t compress = 0; + find_longest_sequence_of_ipv6_pieces(address, compress, compress_length); + + if (compress_length <= 1) { + // Optimization opportunity: Find a faster way then snprintf for imploding and return here. + compress = compress_length = 8; + } + + std::string output{}; + size_t piece_index = 0; + char buf[5]; + + while (true) { + if (piece_index == compress) { + output.append("::", piece_index == 0 ? 2 : 1); + if ((piece_index = piece_index + compress_length) == 8) break; + } + + // Optimization opportunity: Get rid of snprintf. + snprintf(buf, 5, "%x", address[piece_index]); + output += buf; + if (++piece_index == 8) break; + output.push_back(':'); + } + + return "[" + output + "]"; + } + + std::string ipv4(const uint64_t address) noexcept { + std::string output(15, '\0'); + char *point = output.data(); + char *point_end = output.data() + output.size(); + point = std::to_chars(point, point_end, uint8_t(address >> 24)).ptr; + for (int i = 2; i >= 0; i--) { + *point++ = '.'; + point = std::to_chars(point, point_end, uint8_t(address >> (i * 8))).ptr; + } + output.resize(point - output.data()); + return output; + } + +} // namespace ada::serializers +/* end file src/serializers.cpp */ +// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/src, filename=implementation.cpp +/* begin file src/implementation.cpp */ +#include + + +namespace ada { + + ada_warn_unused tl::expected parse(std::string_view input, + const ada::url* base_url, + ada::encoding_type encoding) { + if(encoding != encoding_type::UTF8) { + // @todo Add support for non UTF8 input + } + ada::url u = ada::parser::parse_url(input, base_url, encoding); + if(!u.is_valid) { return tl::unexpected(errors::generic_error); } + return u; + } + + std::string href_from_file(std::string_view input) { + // This is going to be much faster than constructing a URL. + std::string tmp_buffer; + std::string_view internal_input; + if(unicode::has_tabs_or_newline(input)) { + tmp_buffer = input; + helpers::remove_ascii_tab_or_newline(tmp_buffer); + internal_input = tmp_buffer; + } else { + internal_input = input; + } + std::string path; + if(internal_input.empty()) { + path = "/"; + } else if((internal_input[0] == '/') ||(internal_input[0] == '\\')){ + helpers::parse_prepared_path(internal_input.substr(1), ada::scheme::type::FILE, path); + } else { + helpers::parse_prepared_path(internal_input, ada::scheme::type::FILE, path); + } + return "file://" + path; + } + + ada_warn_unused std::string to_string(ada::encoding_type type) { + switch(type) { + case ada::encoding_type::UTF8 : return "UTF-8"; + case ada::encoding_type::UTF_16LE : return "UTF-16LE"; + case ada::encoding_type::UTF_16BE : return "UTF-16BE"; + default: unreachable(); + } + } + +} // namespace ada +/* end file src/implementation.cpp */ +// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/src, filename=helpers.cpp +/* begin file src/helpers.cpp */ + +#include +#include +#include +#include + +namespace ada::helpers { + + template + void encode_json(std::string_view view, out_iter out) { + // trivial implementation. could be faster. + const char * hexvalues = "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f"; + for(uint8_t c : view) { + if(c == '\\') { + *out++ = '\\'; *out++ = '\\'; + } else if(c == '"') { + *out++ = '\\'; *out++ = '"'; + } else if(c <= 0x1f) { + *out++ = '\\'; *out++= 'u'; *out++= '0'; *out++= '0'; + *out++ = hexvalues[2*c]; + *out++ = hexvalues[2*c+1]; + } else { + *out++ = c; + } + } + } + + ada_unused std::string get_state(ada::state s) { + switch (s) { + case ada::state::AUTHORITY: return "Authority"; + case ada::state::SCHEME_START: return "Scheme Start"; + case ada::state::SCHEME: return "Scheme"; + case ada::state::HOST: return "Host"; + case ada::state::NO_SCHEME: return "No Scheme"; + case ada::state::FRAGMENT: return "Fragment"; + case ada::state::RELATIVE_SCHEME: return "Relative Scheme"; + case ada::state::RELATIVE_SLASH: return "Relative Slash"; + case ada::state::FILE: return "File"; + case ada::state::FILE_HOST: return "File Host"; + case ada::state::FILE_SLASH: return "File Slash"; + case ada::state::PATH_OR_AUTHORITY: return "Path or Authority"; + case ada::state::SPECIAL_AUTHORITY_IGNORE_SLASHES: return "Special Authority Ignore Slashes"; + case ada::state::SPECIAL_AUTHORITY_SLASHES: return "Special Authority Slashes"; + case ada::state::SPECIAL_RELATIVE_OR_AUTHORITY: return "Special Relative or Authority"; + case ada::state::QUERY: return "Query"; + case ada::state::PATH: return "Path"; + case ada::state::PATH_START: return "Path Start"; + case ada::state::OPAQUE_PATH: return "Opaque Path"; + case ada::state::PORT: return "Port"; + default: return "unknown state"; + } + } + + ada_really_inline std::optional prune_fragment(std::string_view& input) noexcept { + // compiles down to 20--30 instructions including a class to memchr (C function). + // this function should be quite fast. + size_t location_of_first = input.find('#'); + if(location_of_first == std::string_view::npos) { return std::nullopt; } + std::string_view fragment = input; + fragment.remove_prefix(location_of_first+1); + input.remove_suffix(input.size() - location_of_first); + return fragment; + } + + ada_really_inline void shorten_path(std::string& path, ada::scheme::type type) noexcept { + size_t first_delimiter = path.find_first_of('/', 1); + + // Let path be url’s path. + // If url’s scheme is "file", path’s size is 1, and path[0] is a normalized Windows drive letter, then return. + if (type == ada::scheme::type::FILE && first_delimiter == std::string_view::npos) { + if (checkers::is_normalized_windows_drive_letter(std::string_view(path.data() + 1, first_delimiter - 1))) { + return; + } + } + + // Remove path’s last item, if any. + if (!path.empty()) { + path.erase(path.rfind('/')); + } + } + + ada_really_inline void remove_ascii_tab_or_newline(std::string& input) noexcept { + // if this ever becomes a performance issue, we could use an approach similar to has_tabs_or_newline + input.erase(std::remove_if(input.begin(), input.end(), [](char c) { + return ada::unicode::is_ascii_tab_or_newline(c); + }), input.end()); + } + + ada_really_inline std::string_view substring(std::string_view input, size_t pos) noexcept { + ada_log("substring(", input, " [", input.size() ,"bytes],", pos, ")"); + return pos > input.size() ? std::string_view() : input.substr(pos); + } + + ada_really_inline size_t get_host_delimiter_location(const ada::url& url, std::string_view& view, bool& inside_brackets) noexcept { + size_t location = url.is_special() ? view.find_first_of(":[/?\\") : view.find_first_of(":[/?"); + + // Next while loop is almost never taken! + while((location != std::string_view::npos) && (view[location] == '[')) { + location = view.find(']',location); + if(location == std::string_view::npos) { + inside_brackets = true; + /** + * TODO: Ok. So if we arrive here then view has an unclosed [, + * Is the URL valid??? + */ + } else { + location = url.is_special() ? view.find_first_of(":[/?\\#", location) : view.find_first_of(":[/?#", location); + } + } + + if (location != std::string_view::npos) { + view.remove_suffix(view.size() - location); + } + return location; + } + + ada_really_inline void trim_c0_whitespace(std::string_view& input) noexcept { + while(!input.empty() && ada::unicode::is_c0_control_or_space(input.front())) { input.remove_prefix(1); } + while(!input.empty() && ada::unicode::is_c0_control_or_space(input.back())) { input.remove_suffix(1); } + } + + + ada_really_inline bool parse_prepared_path(std::string_view input, ada::scheme::type type, std::string& path) { + ada_log("parse_path ", input); + uint8_t accumulator = checkers::path_signature(input); + // Let us first detect a trivial case. + // If it is special, we check that we have no dot, no %, no \ and no + // character needing percent encoding. Otherwise, we check that we have no %, + // no dot, and no character needing percent encoding. + bool special = type != ada::scheme::NOT_SPECIAL; + bool trivial_path = + (special ? (accumulator == 0) : ((accumulator & 0b11111101) == 0)) && + (type != ada::scheme::type::FILE); + if (trivial_path) { + ada_log("parse_path trivial"); + path += '/'; + path += input; + return true; + } + // We are going to need to look a bit at the path, but let us see if we can + // ignore percent encoding *and* backslashes *and* percent characters. + // Except for the trivial case, this is likely to capture 99% of paths out + // there. + bool fast_path = (special && (accumulator & 0b11111011) == 0) && + (type != ada::scheme::type::FILE); + if (fast_path) { + ada_log("parse_path fast"); + // Here we don't need to worry about \ or percent encoding. + // We also do not have a file protocol. We might have dots, however, + // but dots must as appear as '.', and they cannot be encoded because + // the symbol '%' is not present. + size_t previous_location = 0; // We start at 0. + do { + size_t new_location = input.find('/', previous_location); + //std::string_view path_view = input; + // We process the last segment separately: + if (new_location == std::string_view::npos) { + std::string_view path_view = input.substr(previous_location); + if (path_view == "..") { // The path ends with .. + // e.g., if you receive ".." with an empty path, you go to "/". + if(path.empty()) { path = '/'; return true; } + // Fast case where we have nothing to do: + if(path.back() == '/') { return true; } + // If you have the path "/joe/myfriend", + // then you delete 'myfriend'. + path.resize(path.rfind('/') + 1); + return true; + } + path += '/'; + if (path_view != ".") { + path.append(path_view); + } + return true; + } else { + // This is a non-final segment. + std::string_view path_view = input.substr(previous_location, new_location - previous_location); + previous_location = new_location + 1; + if (path_view == "..") { + if(!path.empty()) { path.erase(path.rfind('/')); } + } else if (path_view != ".") { + path += '/'; + path.append(path_view); + } + } + } while (true); + } else { + ada_log("parse_path slow"); + // we have reached the general case + bool needs_percent_encoding = (accumulator & 1); + std::string path_buffer_tmp; + do { + size_t location = (special && (accumulator & 2)) + ? input.find_first_of("/\\") + : input.find('/'); + std::string_view path_view = input; + if (location != std::string_view::npos) { + path_view.remove_suffix(path_view.size() - location); + input.remove_prefix(location + 1); + } + // path_buffer is either path_view or it might point at a percent encoded temporary file. + std::string_view path_buffer = + (needs_percent_encoding + && ada::unicode::percent_encode(path_view, character_sets::PATH_PERCENT_ENCODE, path_buffer_tmp)) ? + path_buffer_tmp : + path_view; + if (unicode::is_double_dot_path_segment(path_buffer)) { + helpers::shorten_path(path, type); + if (location == std::string_view::npos) { + path += '/'; + } + } else if (unicode::is_single_dot_path_segment(path_buffer) && + (location == std::string_view::npos)) { + path += '/'; + } + // Otherwise, if path_buffer is not a single-dot path segment, then: + else if (!unicode::is_single_dot_path_segment(path_buffer)) { + // If url’s scheme is "file", url’s path is empty, and path_buffer is a + // Windows drive letter, then replace the second code point in + // path_buffer with U+003A (:). + if (type == ada::scheme::type::FILE && path.empty() && + checkers::is_windows_drive_letter(path_buffer)) { + path += '/'; + path += path_buffer[0]; + path += ':'; + path_buffer.remove_prefix(2); + path.append(path_buffer); + } else { + // Append path_buffer to url’s path. + path += '/'; + path.append(path_buffer); + } + } + if (location == std::string_view::npos) { + return true; + } + } while (true); + } + } +} // namespace ada::helpers + +namespace ada { + ada_warn_unused std::string to_string(ada::state state) { + return ada::helpers::get_state(state); + } +} +/* end file src/helpers.cpp */ +// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/src, filename=url.cpp +/* begin file src/url.cpp */ + +#include +#include +#include + +namespace ada { + ada_really_inline bool url::parse_path(std::string_view input) { + ada_log("parse_path ", input); + std::string tmp_buffer; + std::string_view internal_input; + if(unicode::has_tabs_or_newline(input)) { + tmp_buffer = input; + // Optimization opportunity: Instead of copying and then pruning, we could just directly + // build the string from user_input. + helpers::remove_ascii_tab_or_newline(tmp_buffer); + internal_input = tmp_buffer; + } else { + internal_input = input; + } + + // If url is special, then: + if (is_special()) { + if(internal_input.empty()) { + path = "/"; + } else if((internal_input[0] == '/') ||(internal_input[0] == '\\')){ + return helpers::parse_prepared_path(internal_input.substr(1), get_scheme_type(), path); + } else { + return helpers::parse_prepared_path(internal_input, get_scheme_type(), path); + } + } else if (!internal_input.empty()) { + if(internal_input[0] == '/') { + return helpers::parse_prepared_path(internal_input.substr(1), get_scheme_type(), path); + } else { + return helpers::parse_prepared_path(internal_input, get_scheme_type(), path); + } + } else { + if(!host.has_value()) { + path = "/"; + } + } + return true; + } + + bool url::parse_opaque_host(std::string_view input) { + ada_log("parse_opaque_host ", input, "[", input.size(), " bytes]"); + if (std::any_of(input.begin(), input.end(), ada::unicode::is_forbidden_host_code_point)) { + return is_valid = false; + } + + // Return the result of running UTF-8 percent-encode on input using the C0 control percent-encode set. + host = ada::unicode::percent_encode(input, ada::character_sets::C0_CONTROL_PERCENT_ENCODE); + return true; + } + + bool url::parse_ipv4(std::string_view input) { + ada_log("parse_ipv4 ", input, "[", input.size(), " bytes]"); + if(input.back()=='.') { + input.remove_suffix(1); + } + size_t digit_count{0}; + int pure_decimal_count = 0; // entries that are decimal + std::string_view original_input = input; // we might use this if pure_decimal_count == 4. + uint64_t ipv4{0}; + // we could unroll for better performance? + for(;(digit_count < 4) && !(input.empty()); digit_count++) { + uint32_t segment_result{}; // If any number exceeds 32 bits, we have an error. + bool is_hex = checkers::has_hex_prefix(input); + if(is_hex && ((input.length() == 2)|| ((input.length() > 2) && (input[2]=='.')))) { + // special case + segment_result = 0; + input.remove_prefix(2); + } else { + std::from_chars_result r; + if(is_hex) { + r = std::from_chars(input.data() + 2, input.data() + input.size(), segment_result, 16); + } else if ((input.length() >= 2) && input[0] == '0' && checkers::is_digit(input[1])) { + r = std::from_chars(input.data() + 1, input.data() + input.size(), segment_result, 8); + } else { + pure_decimal_count++; + r = std::from_chars(input.data(), input.data() + input.size(), segment_result, 10); + } + if (r.ec != std::errc()) { return is_valid = false; } + input.remove_prefix(r.ptr-input.data()); + } + if(input.empty()) { + // We have the last value. + // At this stage, ipv4 contains digit_count*8 bits. + // So we have 32-digit_count*8 bits left. + if(segment_result > (uint64_t(1)<<(32-digit_count*8))) { return is_valid = false; } + ipv4 <<=(32-digit_count*8); + ipv4 |= segment_result; + goto final; + } else { + // There is more, so that the value must no be larger than 255 + // and we must have a '.'. + if ((segment_result>255) || (input[0]!='.')) { return is_valid = false; } + ipv4 <<=8; + ipv4 |= segment_result; + input.remove_prefix(1); // remove '.' + } + } + if((digit_count != 4) || (!input.empty())) {return is_valid = false; } + final: + // We could also check r.ptr to see where the parsing ended. + if(pure_decimal_count == 4) { + host = original_input; // The original input was already all decimal and we validated it. + } else { + host = ada::serializers::ipv4(ipv4); // We have to reserialize the address. + } + return true; + } + + bool url::parse_ipv6(std::string_view input) { + ada_log("parse_ipv6 ", input, "[", input.size(), " bytes]"); + + if(input.empty()) { return is_valid = false; } + // Let address be a new IPv6 address whose IPv6 pieces are all 0. + std::array address{}; + + // Let pieceIndex be 0. + int piece_index = 0; + + // Let compress be null. + std::optional compress{}; + + // Let pointer be a pointer for input. + std::string_view::iterator pointer = input.begin(); + + // If c is U+003A (:), then: + if (input[0] == ':') { + // If remaining does not start with U+003A (:), validation error, return failure. + if(input.size() == 1 || input[1] != ':') { + ada_log("parse_ipv6 starts with : but the rest does not start with :"); + return is_valid = false; + } + + // Increase pointer by 2. + pointer += 2; + + // Increase pieceIndex by 1 and then set compress to pieceIndex. + compress = ++piece_index; + } + + // While c is not the EOF code point: + while (pointer != input.end()) { + // If pieceIndex is 8, validation error, return failure. + if (piece_index == 8) { + ada_log("parse_ipv6 piece_index == 8"); + return is_valid = false; + } + + // If c is U+003A (:), then: + if (*pointer == ':') { + // If compress is non-null, validation error, return failure. + if (compress.has_value()) { + ada_log("parse_ipv6 compress is non-null"); + return is_valid = false; + } + + // Increase pointer and pieceIndex by 1, set compress to pieceIndex, and then continue. + pointer++; + compress = ++piece_index; + continue; + } + + // Let value and length be 0. + uint16_t value = 0, length = 0; + + // While length is less than 4 and c is an ASCII hex digit, + // set value to value × 0x10 + c interpreted as hexadecimal number, and increase pointer and length by 1. + while (length < 4 && pointer != input.end() && unicode::is_ascii_hex_digit(*pointer)) { + // https://stackoverflow.com/questions/39060852/why-does-the-addition-of-two-shorts-return-an-int + value = uint16_t(value * 0x10 + unicode::convert_hex_to_binary(*pointer)); + pointer++; + length++; + } + + // If c is U+002E (.), then: + if (pointer != input.end() && *pointer == '.') { + // If length is 0, validation error, return failure. + if (length == 0) { + ada_log("parse_ipv6 length is 0"); + return is_valid = false; + } + + // Decrease pointer by length. + pointer -= length; + + // If pieceIndex is greater than 6, validation error, return failure. + if (piece_index > 6) { + ada_log("parse_ipv6 piece_index > 6"); + return is_valid = false; + } + + // Let numbersSeen be 0. + int numbers_seen = 0; + + // While c is not the EOF code point: + while (pointer != input.end()) { + // Let ipv4Piece be null. + std::optional ipv4_piece{}; + + // If numbersSeen is greater than 0, then: + if (numbers_seen > 0) { + // If c is a U+002E (.) and numbersSeen is less than 4, then increase pointer by 1. + if (*pointer == '.' && numbers_seen < 4) { + pointer++; + } + // Otherwise, validation error, return failure. + else { + ada_log("parse_ipv6 Otherwise, validation error, return failure"); + return is_valid = false; + } + } + + // If c is not an ASCII digit, validation error, return failure. + if (pointer == input.end() || !checkers::is_digit(*pointer)) { + ada_log("parse_ipv6 If c is not an ASCII digit, validation error, return failure"); + return is_valid = false; + } + + // While c is an ASCII digit: + while (pointer != input.end() && checkers::is_digit(*pointer)) { + // Let number be c interpreted as decimal number. + int number = *pointer - '0'; + + // If ipv4Piece is null, then set ipv4Piece to number. + if (!ipv4_piece.has_value()) { + ipv4_piece = number; + } + // Otherwise, if ipv4Piece is 0, validation error, return failure. + else if (ipv4_piece == 0) { + ada_log("parse_ipv6 if ipv4Piece is 0, validation error"); + return is_valid = false; + } + // Otherwise, set ipv4Piece to ipv4Piece × 10 + number. + else { + ipv4_piece = *ipv4_piece * 10 + number; + } + + // If ipv4Piece is greater than 255, validation error, return failure. + if (ipv4_piece > 255) { + ada_log("parse_ipv6 ipv4_piece > 255"); + return is_valid = false; + } + + // Increase pointer by 1. + pointer++; + } + + // Set address[pieceIndex] to address[pieceIndex] × 0x100 + ipv4Piece. + // https://stackoverflow.com/questions/39060852/why-does-the-addition-of-two-shorts-return-an-int + address[piece_index] = uint16_t(address[piece_index] * 0x100 + *ipv4_piece); + + // Increase numbersSeen by 1. + numbers_seen++; + + // If numbersSeen is 2 or 4, then increase pieceIndex by 1. + if (numbers_seen == 2 || numbers_seen == 4) { + piece_index++; + } + } + + // If numbersSeen is not 4, validation error, return failure. + if (numbers_seen != 4) { + return is_valid = false; + } + + // Break. + break; + } + // Otherwise, if c is U+003A (:): + else if ((pointer != input.end()) && (*pointer == ':')) { + // Increase pointer by 1. + pointer++; + + // If c is the EOF code point, validation error, return failure. + if (pointer == input.end()) { + ada_log("parse_ipv6 If c is the EOF code point, validation error, return failure"); + return is_valid = false; + } + } + // Otherwise, if c is not the EOF code point, validation error, return failure. + else if (pointer != input.end()) { + ada_log("parse_ipv6 Otherwise, if c is not the EOF code point, validation error, return failure"); + return is_valid = false; + } + + // Set address[pieceIndex] to value. + address[piece_index] = value; + + // Increase pieceIndex by 1. + piece_index++; + } + + // If compress is non-null, then: + if (compress.has_value()) { + // Let swaps be pieceIndex − compress. + int swaps = piece_index - *compress; + + // Set pieceIndex to 7. + piece_index = 7; + + // While pieceIndex is not 0 and swaps is greater than 0, + // swap address[pieceIndex] with address[compress + swaps − 1], and then decrease both pieceIndex and swaps by 1. + while (piece_index != 0 && swaps > 0) { + std::swap(address[piece_index], address[*compress + swaps - 1]); + piece_index--; + swaps--; + } + } + // Otherwise, if compress is null and pieceIndex is not 8, validation error, return failure. + else if (piece_index != 8) { + ada_log("parse_ipv6 if compress is null and pieceIndex is not 8, validation error, return failure"); + return is_valid = false; + } + host = ada::serializers::ipv6(address); + ada_log("parse_ipv6 ", *host); + return true; + } + + ada_really_inline bool url::parse_host(std::string_view input) { + ada_log("parse_host ", input, "[", input.size(), " bytes]"); + if(input.empty()) { return is_valid = false; } // technically unnecessary. + // If input starts with U+005B ([), then: + if (input[0] == '[') { + // If input does not end with U+005D (]), validation error, return failure. + if (input.back() != ']') { + return is_valid = false; + } + ada_log("parse_host ipv6"); + + // Return the result of IPv6 parsing input with its leading U+005B ([) and trailing U+005D (]) removed. + input.remove_prefix(1); + input.remove_suffix(1); + return parse_ipv6(input); + } + + // If isNotSpecial is true, then return the result of opaque-host parsing input. + if (!is_special()) { + return parse_opaque_host(input); + } + // Let domain be the result of running UTF-8 decode without BOM on the percent-decoding of input. + // Let asciiDomain be the result of running domain to ASCII with domain and false. + // The most common case is an ASCII input, in which case we do not need to call the expensive 'to_ascii' + // if a few conditions are met: no '%' and no 'xn-' subsequence. + std::string buffer = std::string(input); + // This next function checks that the result is ascii, but we are going to + // to check anyhow with is_forbidden. + // bool is_ascii = + unicode::to_lower_ascii(buffer.data(), buffer.size()); + bool is_forbidden = unicode::contains_forbidden_domain_code_point(buffer.data(), buffer.size()); + if (is_forbidden == 0 && buffer.find("xn-") == std::string_view::npos) { + // fast path + host = std::move(buffer); + if (checkers::is_ipv4(host.value())) { + ada_log("parse_host fast path ipv4"); + return parse_ipv4(host.value()); + } + ada_log("parse_host fast path ", *host); + return true; + } + ada_log("parse_host calling to_ascii"); + is_valid = ada::unicode::to_ascii(host, input, false, input.find('%')); + if (!is_valid) { + ada_log("parse_host to_ascii returns false"); + return is_valid = false; + } + + if(std::any_of(host.value().begin(), host.value().end(), ada::unicode::is_forbidden_domain_code_point)) { + host = std::nullopt; + return is_valid = false; + } + + // If asciiDomain ends in a number, then return the result of IPv4 parsing asciiDomain. + if(checkers::is_ipv4(host.value())) { + ada_log("parse_host got ipv4", *host); + return parse_ipv4(host.value()); + } + + return true; + } + + template + ada_really_inline bool url::parse_scheme(const std::string_view input) { + auto parsed_type = ada::scheme::get_scheme_type(input); + bool is_input_special = (parsed_type != ada::scheme::NOT_SPECIAL); + /** + * In the common case, we will immediately recognize a special scheme (e.g., http, https), + * in which case, we can go really fast. + **/ + if(is_input_special) { // fast path!!! + if (has_state_override) { + // If url’s scheme is not a special scheme and buffer is a special scheme, then return. + if (is_special() != is_input_special) { + return true; + } + + // If url includes credentials or has a non-null port, and buffer is "file", then return. + if ((includes_credentials() || port.has_value()) && parsed_type == ada::scheme::type::FILE) { + return true; + } + + // If url’s scheme is "file" and its host is an empty host, then return. + // An empty host is the empty string. + if (get_scheme_type() == ada::scheme::type::FILE && host.has_value() && host.value().empty()) { + return true; + } + } + + type = parsed_type; + + if (has_state_override) { + // This is uncommon. + uint16_t urls_scheme_port = get_special_port(); + + if (urls_scheme_port) { + // If url’s port is url’s scheme’s default port, then set url’s port to null. + if (port.has_value() && *port == urls_scheme_port) { + port = std::nullopt; + } + } + } + } else { // slow path + std::string _buffer = std::string(input); + // Next function is only valid if the input is ASCII and returns false + // otherwise, but it seems that we always have ascii content so we do not need + // to check the return value. + //bool is_ascii = + unicode::to_lower_ascii(_buffer.data(), _buffer.size()); + + if (has_state_override) { + // If url’s scheme is a special scheme and buffer is not a special scheme, then return. + // If url’s scheme is not a special scheme and buffer is a special scheme, then return. + if (is_special() != ada::scheme::is_special(_buffer)) { + return true; + } + + // If url includes credentials or has a non-null port, and buffer is "file", then return. + if ((includes_credentials() || port.has_value()) && _buffer == "file") { + return true; + } + + // If url’s scheme is "file" and its host is an empty host, then return. + // An empty host is the empty string. + if (get_scheme_type() == ada::scheme::type::FILE && host.has_value() && host.value().empty()) { + return true; + } + } + + set_scheme(std::move(_buffer)); + + if (has_state_override) { + // This is uncommon. + uint16_t urls_scheme_port = get_special_port(); + + if (urls_scheme_port) { + // If url’s port is url’s scheme’s default port, then set url’s port to null. + if (port.has_value() && *port == urls_scheme_port) { + port = std::nullopt; + } + } + } + } + + return true; + } + + std::string url::to_string() const { + if (!is_valid) { + return "null"; + } + std::string answer; + auto back = std::back_insert_iterator(answer); + answer.append("{\n"); + answer.append("\t\"scheme\":\""); + helpers::encode_json(get_scheme(), back); + answer.append("\",\n"); + if(includes_credentials()) { + answer.append("\t\"username\":\""); + helpers::encode_json(username, back); + answer.append("\",\n"); + answer.append("\t\"password\":\""); + helpers::encode_json(password, back); + answer.append("\",\n"); + } + if(host.has_value()) { + answer.append("\t\"host\":\""); + helpers::encode_json(host.value(), back); + answer.append("\",\n"); + } + if(port.has_value()) { + answer.append("\t\"port\":\""); + answer.append(std::to_string(port.value())); + answer.append("\",\n"); + } + answer.append("\t\"path\":\""); + helpers::encode_json(path, back); + answer.append("\",\n"); + answer.append("\t\"opaque path\":"); + answer.append((has_opaque_path ? "true" : "false")); + if(query.has_value()) { + answer.append(",\n"); + answer.append("\t\"query\":\""); + helpers::encode_json(query.value(), back); + answer.append("\""); + } + if(fragment.has_value()) { + answer.append(",\n"); + answer.append("\t\"fragment\":\""); + helpers::encode_json(fragment.value(), back); + answer.append("\""); + } + answer.append("\n}"); + return answer; + } + + [[nodiscard]] bool url::has_valid_domain() const noexcept { + if(!host.has_value()) { return false; } + return checkers::verify_dns_length(host.value()); + } +} // namespace ada +/* end file src/url.cpp */ +// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/src, filename=url-getters.cpp +/* begin file src/url-getters.cpp */ +/** + * @file url-getters.cpp + * Includes all the getters of `ada::url` + */ + +#include +#include + +namespace ada { + + [[nodiscard]] std::string url::get_href() const noexcept { + std::string output = get_protocol(); + size_t url_delimiter_count = std::count(path.begin(), path.end(), '/'); + + if (host.has_value()) { + output += "//"; + if (includes_credentials()) { + output += get_username(); + if (!get_password().empty()) { + output += ":" + get_password(); + } + output += "@"; + } + + output += get_host(); + } else if (!has_opaque_path && url_delimiter_count > 1 && path.length() >= 2 && path[0] == '/' && path[1] == '/') { + // If url’s host is null, url does not have an opaque path, url’s path’s size is greater than 1, + // and url’s path[0] is the empty string, then append U+002F (/) followed by U+002E (.) to output. + output += "/."; + } + + output += get_pathname() + // If query is non-null, then set this’s query object’s list to the result of parsing query. + + (query.has_value() ? "?" + query.value() : "") + // If url’s fragment is non-null, then append U+0023 (#), followed by url’s fragment, to output. + + (fragment.has_value() ? "#" + fragment.value() : ""); + return output; + } + + [[nodiscard]] std::string url::get_origin() const noexcept { + if (is_special()) { + // Return a new opaque origin. + if (get_scheme_type() == scheme::FILE) { return "null"; } + + return get_protocol() + "//" + get_host(); + } + + if (get_scheme() == "blob") { + if (path.length() > 0) { + url path_result = ada::parser::parse_url(get_pathname()); + if (path_result.is_valid) { + if (path_result.is_special()) { + return path_result.get_protocol() + "//" + path_result.get_host(); + } + } + } + } + + // Return a new opaque origin. + return "null"; + } + + [[nodiscard]] std::string url::get_protocol() const noexcept { + return std::string(get_scheme()) + ":"; + } + + [[nodiscard]] std::string url::get_host() const noexcept { + // If url’s host is null, then return the empty string. + // If url’s port is null, return url’s host, serialized. + // Return url’s host, serialized, followed by U+003A (:) and url’s port, serialized. + if (!host.has_value()) { return ""; } + return host.value() + (port.has_value() ? ":" + get_port() : ""); + } + + [[nodiscard]] std::string url::get_hostname() const noexcept { + return host.value_or(""); + } + + [[nodiscard]] std::string url::get_pathname() const noexcept { + return path; + } + + [[nodiscard]] std::string url::get_search() const noexcept { + // If this’s URL’s query is either null or the empty string, then return the empty string. + // Return U+003F (?), followed by this’s URL’s query. + return (!query.has_value() || (query.value().empty())) ? "" : "?" + query.value(); + } + + [[nodiscard]] std::string url::get_username() const noexcept { + return username; + } + + [[nodiscard]] std::string url::get_password() const noexcept { + return password; + } + + [[nodiscard]] std::string url::get_port() const noexcept { + return port.has_value() ? std::to_string(port.value()) : ""; + } + + [[nodiscard]] std::string url::get_hash() const noexcept { + // If this’s URL’s fragment is either null or the empty string, then return the empty string. + // Return U+0023 (#), followed by this’s URL’s fragment. + return (!fragment.has_value() || (fragment.value().empty())) ? "" : "#" + fragment.value(); + } + +} // namespace ada +/* end file src/url-getters.cpp */ +// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/src, filename=url-setters.cpp +/* begin file src/url-setters.cpp */ +/** + * @file url-setters.cpp + * Includes all the setters of `ada::url` + */ + +#include +#include + +namespace ada { + + bool url::set_username(const std::string_view input) { + if (cannot_have_credentials_or_port()) { return false; } + username = ada::unicode::percent_encode(input, character_sets::USERINFO_PERCENT_ENCODE); + return true; + } + + bool url::set_password(const std::string_view input) { + if (cannot_have_credentials_or_port()) { return false; } + password = ada::unicode::percent_encode(input, character_sets::USERINFO_PERCENT_ENCODE); + return true; + } + + bool url::set_port(const std::string_view input) { + if (cannot_have_credentials_or_port()) { return false; } + std::string trimmed(input); + helpers::remove_ascii_tab_or_newline(trimmed); + if (trimmed.empty()) { port = std::nullopt; return true; } + // Input should not start with control characters. + if (ada::unicode::is_c0_control_or_space(trimmed.front())) { return false; } + return parse_port(trimmed); + } + + void url::set_hash(const std::string_view input) { + if (input.empty()) { + fragment = std::nullopt; + // TODO: Potentially strip trailing spaces from an opaque path with this. + return; + } + + std::string new_value; + new_value = input[0] == '#' ? input.substr(1) : input; + helpers::remove_ascii_tab_or_newline(new_value); + fragment = unicode::percent_encode(new_value, ada::character_sets::FRAGMENT_PERCENT_ENCODE); + return; + } + + void url::set_search(const std::string_view input) { + if (input.empty()) { + query = std::nullopt; + // Empty this’s query object’s list. + // @todo Implement this if/when we have URLSearchParams. + // Potentially strip trailing spaces from an opaque path with this. + return; + } + + std::string new_value; + new_value = input[0] == '?' ? input.substr(1) : input; + helpers::remove_ascii_tab_or_newline(new_value); + + auto query_percent_encode_set = is_special() ? + ada::character_sets::SPECIAL_QUERY_PERCENT_ENCODE : + ada::character_sets::QUERY_PERCENT_ENCODE; + + query = ada::unicode::percent_encode(std::string_view(new_value), query_percent_encode_set); + + // Set this’s query object’s list to the result of parsing input. + // @todo Implement this if/when we have URLSearchParams. + return ; + } + + bool url::set_pathname(const std::string_view input) { + if (has_opaque_path) { return false; } + path = ""; + return parse_path(input); + } + + bool url::set_host(const std::string_view input) { + if (has_opaque_path) { return false; } + + std::optional previous_host = host; + std::optional previous_port = port; + + std::string_view::iterator _host_end = std::find(input.begin(), input.end(), '#'); + std::string _host(input.data(), std::distance(input.begin(), _host_end)); + helpers::remove_ascii_tab_or_newline(_host); + std::string_view new_host(_host); + + // If url's scheme is "file", then set state to file host state, instead of host state. + if (get_scheme_type() != ada::scheme::type::FILE) { + std::string_view host_view(_host.data(), _host.length()); + bool inside_brackets{false}; + size_t location = helpers::get_host_delimiter_location(*this, host_view, inside_brackets); + std::string_view::iterator pointer = (location != std::string_view::npos) ? new_host.begin() + location : new_host.end(); + + // Otherwise, if c is U+003A (:) and insideBrackets is false, then: + // Note: we cannot access *pointer safely if (pointer == pointer_end). + if ((pointer != new_host.end()) && (*pointer == ':') && !inside_brackets) { + // TODO: The next 2 lines is the only difference between set_host and set_hostname. Let's simplify it. + std::string_view buffer(&*(pointer + 1)); + if (!buffer.empty()) { set_port(buffer); } + } + // If url is special and host_view is the empty string, validation error, return failure. + // Otherwise, if state override is given, host_view is the empty string, + // and either url includes credentials or url’s port is non-null, return. + else if (host_view.empty() && (is_special() || includes_credentials() || port.has_value())) { + return false; + } + + // Let host be the result of host parsing host_view with url is not special. + if (host_view.empty()) { + host = ""; + return true; + } + + bool succeeded = parse_host(host_view); + if (!succeeded) { + host = previous_host; + port = previous_port; + } + return succeeded; + } + + size_t location = new_host.find_first_of("/\\?"); + if (location != std::string_view::npos) { new_host.remove_suffix(new_host.length() - location); } + + if (new_host.empty()) { + // Set url’s host to the empty string. + host = ""; + } + else { + // Let host be the result of host parsing buffer with url is not special. + if (!parse_host(new_host)) { + host = previous_host; + port = previous_port; + return false; + } + + // If host is "localhost", then set host to the empty string. + if (host.has_value() && host.value() == "localhost") { + host = ""; + } + } + return true; + } + + bool url::set_hostname(const std::string_view input) { + if (has_opaque_path) { return false; } + + std::optional previous_host = host; + + std::string_view::iterator input_pointer_end = std::find(input.begin(), input.end(), '#'); + std::string _host(input.data(), std::distance(input.begin(), input_pointer_end)); + helpers::remove_ascii_tab_or_newline(_host); + std::string_view new_host(_host); + + // If url's scheme is "file", then set state to file host state, instead of host state. + if (get_scheme_type() != ada::scheme::type::FILE) { + std::string_view host_view(_host.data(), _host.length()); + bool inside_brackets{false}; + size_t location = helpers::get_host_delimiter_location(*this, host_view, inside_brackets); + std::string_view::iterator pointer = (location != std::string_view::npos) ? new_host.begin() + location : new_host.end(); + + // Otherwise, if c is U+003A (:) and insideBrackets is false, then: + // Note: we cannot access *pointer safely if (pointer == pointer_end). + if ((pointer != new_host.end()) && (*pointer == ':') && !inside_brackets) { + // If buffer is the empty string, validation error, return failure. + return false; + } + // If url is special and host_view is the empty string, validation error, return failure. + else if (host_view.empty() && is_special()) { + return false; + } + // Otherwise, if state override is given, host_view is the empty string, + // and either url includes credentials or url’s port is non-null, return. + else if (host_view.empty() && (includes_credentials() || port.has_value())) { + return true; + } + + // Let host be the result of host parsing host_view with url is not special. + if (host_view.empty()) { + host = ""; + return true; + } + + bool succeeded = parse_host(host_view); + if (!succeeded) { host = previous_host; } + return succeeded; + } + + size_t location = new_host.find_first_of("/\\?"); + if (location != std::string_view::npos) { new_host.remove_suffix(new_host.length() - location); } + + if (new_host.empty()) { + // Set url’s host to the empty string. + host = ""; + } + else { + // Let host be the result of host parsing buffer with url is not special. + if (!parse_host(new_host)) { + host = previous_host; + return false; + } + + // If host is "localhost", then set host to the empty string. + if (host.has_value() && host.value() == "localhost") { + host = ""; + } + } + return true; + } + + bool url::set_protocol(const std::string_view input) { + std::string view(input); + helpers::remove_ascii_tab_or_newline(view); + if (view.empty()) { return true; } + + // Schemes should start with alpha values. + if (!checkers::is_alpha(view[0])) { return false; } + + view.append(":"); + + std::string::iterator pointer = std::find_if_not(view.begin(), view.end(), unicode::is_alnum_plus); + + if (pointer != view.end() && *pointer == ':') { + return parse_scheme(std::string_view(view.data(), pointer - view.begin())); + } + return false; + } + + bool url::set_href(const std::string_view input) { + ada::result out = ada::parse(input); + + if (out) { + set_protocol(out->get_protocol()); + set_username(out->get_username()); + set_password(out->get_password()); + set_host(out->get_host()); + set_hostname(out->get_hostname()); + set_port(out->get_port()); + set_pathname(out->get_pathname()); + set_hash(out->get_hash()); + set_search(out->get_search()); + } + + return out.has_value(); + } + +} // namespace ada +/* end file src/url-setters.cpp */ +// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/src, filename=parser.cpp +/* begin file src/parser.cpp */ + +#include + +#include +#include + +namespace ada::parser { + + url parse_url(std::string_view user_input, + const ada::url* base_url, + ada::encoding_type encoding) { + ada_log("ada::parser::parse_url('", user_input, + "' [", user_input.size()," bytes],", (base_url != nullptr ? base_url->to_string() : "null"), + ",", ada::to_string(encoding), ")"); + + ada::state state = ada::state::SCHEME_START; + ada::url url = ada::url(); + + // If we are provided with an invalid base, or the optional_url was invalid, + // we must return. + if(base_url != nullptr) { url.is_valid &= base_url->is_valid; } + if(!url.is_valid) { return url; } + + std::string tmp_buffer; + std::string_view internal_input; + if(unicode::has_tabs_or_newline(user_input)) { + tmp_buffer = user_input; + // Optimization opportunity: Instead of copying and then pruning, we could just directly + // build the string from user_input. + helpers::remove_ascii_tab_or_newline(tmp_buffer); + internal_input = tmp_buffer; + } else { + internal_input = user_input; + } + + // Leading and trailing control characters are uncommon and easy to deal with (no performance concern). + std::string_view url_data = internal_input; + helpers::trim_c0_whitespace(url_data); + + // Optimization opportunity. Most websites do not have fragment. + std::optional fragment = helpers::prune_fragment(url_data); + if(fragment.has_value()) { + url.fragment = unicode::percent_encode(*fragment, + ada::character_sets::FRAGMENT_PERCENT_ENCODE); + } + + // Here url_data no longer has its fragment. + // We are going to access the data from url_data (it is immutable). + // At any given time, we are pointing at byte 'input_position' in url_data. + // The input_position variable should range from 0 to input_size. + // It is illegal to access url_data at input_size. + size_t input_position = 0; + const size_t input_size = url_data.size(); + // Keep running the following state machine by switching on state. + // If after a run pointer points to the EOF code point, go to the next step. + // Otherwise, increase pointer by 1 and continue with the state machine. + // We never decrement input_position. + while(input_position <= input_size) { + switch (state) { + case ada::state::SCHEME_START: { + ada_log("SCHEME_START ", helpers::substring(url_data, input_position)); + // If c is an ASCII alpha, append c, lowercased, to buffer, and set state to scheme state. + if ((input_position != input_size) && checkers::is_alpha(url_data[input_position])) { + state = ada::state::SCHEME; + input_position++; + } else { + // Otherwise, if state override is not given, set state to no scheme state and decrease pointer by 1. + state = ada::state::NO_SCHEME; + } + break; + } + case ada::state::SCHEME: { + ada_log("SCHEME ", helpers::substring(url_data, input_position)); + // If c is an ASCII alphanumeric, U+002B (+), U+002D (-), or U+002E (.), append c, lowercased, to buffer. + while((input_position != input_size) && (ada::unicode::is_alnum_plus(url_data[input_position]))) { + input_position++; + } + // Otherwise, if c is U+003A (:), then: + if ((input_position != input_size) && (url_data[input_position] == ':')) { + ada_log("SCHEME the scheme should be ", url_data.substr(0,input_position)); + if(!url.parse_scheme(url_data.substr(0,input_position))) { return url; } + ada_log("SCHEME the scheme is ", url.get_scheme()); + + // If url’s scheme is "file", then: + if (url.get_scheme_type() == ada::scheme::type::FILE) { + // Set state to file state. + state = ada::state::FILE; + } + // Otherwise, if url is special, base is non-null, and base’s scheme is url’s scheme: + // Note: Doing base_url->scheme is unsafe if base_url != nullptr is false. + else if (url.is_special() && base_url != nullptr && base_url->get_scheme_type() == url.get_scheme_type()) { + // Set state to special relative or authority state. + state = ada::state::SPECIAL_RELATIVE_OR_AUTHORITY; + } + // Otherwise, if url is special, set state to special authority slashes state. + else if (url.is_special()) { + state = ada::state::SPECIAL_AUTHORITY_SLASHES; + } + // Otherwise, if remaining starts with an U+002F (/), set state to path or authority state + // and increase pointer by 1. + else if (input_position + 1 < input_size && url_data[input_position + 1] == '/') { + state = ada::state::PATH_OR_AUTHORITY; + input_position++; + } + // Otherwise, set url’s path to the empty string and set state to opaque path state. + else { + state = ada::state::OPAQUE_PATH; + } + } + // Otherwise, if state override is not given, set buffer to the empty string, state to no scheme state, + // and start over (from the first code point in input). + else { + state = ada::state::NO_SCHEME; + input_position = 0; + break; + } + input_position++; + break; + } + case ada::state::NO_SCHEME: { + ada_log("NO_SCHEME ", helpers::substring(url_data, input_position)); + // If base is null, or base has an opaque path and c is not U+0023 (#), validation error, return failure. + if (base_url == nullptr || (base_url->has_opaque_path && (input_position != input_size))) { + ada_log("NO_SCHEME validation error"); + url.is_valid = false; + return url; + } + // Otherwise, if base has an opaque path and c is U+0023 (#), + // set url’s scheme to base’s scheme, url’s path to base’s path, url’s query to base’s query, + // url’s fragment to the empty string, and set state to fragment state. + else if (base_url->has_opaque_path && url.fragment.has_value() && input_position == input_size) { + ada_log("NO_SCHEME opaque base with fragment"); + url.copy_scheme(*base_url); + url.path = base_url->path; + url.has_opaque_path = base_url->has_opaque_path; + url.query = base_url->query; + return url; + } + // Otherwise, if base’s scheme is not "file", set state to relative state and decrease pointer by 1. + else if (base_url->get_scheme_type() != ada::scheme::type::FILE) { + ada_log("NO_SCHEME non-file relative path"); + state = ada::state::RELATIVE_SCHEME; + } + // Otherwise, set state to file state and decrease pointer by 1. + else { + ada_log("NO_SCHEME file base type"); + state = ada::state::FILE; + } + break; + } + case ada::state::AUTHORITY: { + ada_log("AUTHORITY ", helpers::substring(url_data, input_position)); + // most URLs have no @. Having no @ tells us that we don't have to worry about AUTHORITY. Of course, + // we could have @ and still not have to worry about AUTHORITY. + // TODO: Instead of just collecting a bool, collect the location of the '@' and do something useful with it. + // TODO: We could do various processing early on, using a single pass over the string to collect + // information about it, e.g., telling us whether there is a @ and if so, where (or how many). + const bool contains_ampersand = (url_data.find('@', input_position) != std::string_view::npos); + + if(!contains_ampersand) { + state = ada::state::HOST; + break; + } + bool at_sign_seen{false}; + bool password_token_seen{false}; + do { + std::string_view view = helpers::substring(url_data, input_position); + size_t location = url.is_special() ? view.find_first_of("@/?\\") : view.find_first_of("@/?"); + std::string_view authority_view(view.data(), (location != std::string_view::npos) ? location : view.size()); + size_t end_of_authority = input_position + authority_view.size(); + // If c is U+0040 (@), then: + if ((end_of_authority != input_size) && (url_data[end_of_authority] == '@')) { + // If atSignSeen is true, then prepend "%40" to buffer. + if (at_sign_seen) { + if (password_token_seen) { + url.password += "%40"; + } else { + url.username += "%40"; + } + } + + at_sign_seen = true; + + if (!password_token_seen) { + size_t password_token_location = authority_view.find(':'); + password_token_seen = password_token_location != std::string_view::npos; + + if (!password_token_seen) { + url.username += unicode::percent_encode(authority_view, character_sets::USERINFO_PERCENT_ENCODE); + } else { + url.username += unicode::percent_encode(authority_view.substr(0,password_token_location), character_sets::USERINFO_PERCENT_ENCODE); + url.password += unicode::percent_encode(authority_view.substr(password_token_location+1), character_sets::USERINFO_PERCENT_ENCODE); + } + } + else { + url.password += unicode::percent_encode(authority_view, character_sets::USERINFO_PERCENT_ENCODE); + } + } + // Otherwise, if one of the following is true: + // - c is the EOF code point, U+002F (/), U+003F (?), or U+0023 (#) + // - url is special and c is U+005C (\) + else if (end_of_authority == input_size || url_data[end_of_authority] == '/' || url_data[end_of_authority] == '?' || (url.is_special() && url_data[end_of_authority] == '\\')) { + // If atSignSeen is true and authority_view is the empty string, validation error, return failure. + if (at_sign_seen && authority_view.empty()) { + url.is_valid = false; + return url; + } + state = ada::state::HOST; + break; + } + if(end_of_authority == input_size) { return url; } + input_position = end_of_authority + 1; + } while(true); + + break; + } + case ada::state::SPECIAL_RELATIVE_OR_AUTHORITY: { + ada_log("SPECIAL_RELATIVE_OR_AUTHORITY ", helpers::substring(url_data, input_position)); + + // If c is U+002F (/) and remaining starts with U+002F (/), + // then set state to special authority ignore slashes state and increase pointer by 1. + std::string_view view = helpers::substring(url_data, input_position); + if (ada::checkers::begins_with(view, "//")) { + state = ada::state::SPECIAL_AUTHORITY_IGNORE_SLASHES; + input_position += 2; + } else { + // Otherwise, validation error, set state to relative state and decrease pointer by 1. + state = ada::state::RELATIVE_SCHEME; + } + + break; + } + case ada::state::PATH_OR_AUTHORITY: { + ada_log("PATH_OR_AUTHORITY ", helpers::substring(url_data, input_position)); + + // If c is U+002F (/), then set state to authority state. + if ((input_position != input_size) && (url_data[input_position] == '/')) { + state = ada::state::AUTHORITY; + input_position++; + } else { + // Otherwise, set state to path state, and decrease pointer by 1. + state = ada::state::PATH; + } + + break; + } + case ada::state::RELATIVE_SCHEME: { + ada_log("RELATIVE_SCHEME ", helpers::substring(url_data, input_position)); + + // Set url’s scheme to base’s scheme. + url.copy_scheme(*base_url); + + // If c is U+002F (/), then set state to relative slash state. + if ((input_position != input_size) && (url_data[input_position] == '/')) { + ada_log("RELATIVE_SCHEME if c is U+002F (/), then set state to relative slash state"); + state = ada::state::RELATIVE_SLASH; + } else if (url.is_special() && (input_position != input_size) && (url_data[input_position] == '\\')) { + // Otherwise, if url is special and c is U+005C (\), validation error, set state to relative slash state. + ada_log("RELATIVE_SCHEME if url is special and c is U+005C, validation error, set state to relative slash state"); + state = ada::state::RELATIVE_SLASH; + } else { + ada_log("RELATIVE_SCHEME otherwise"); + // Set url’s username to base’s username, url’s password to base’s password, url’s host to base’s host, + // url’s port to base’s port, url’s path to a clone of base’s path, and url’s query to base’s query. + url.username = base_url->username; + url.password = base_url->password; + url.host = base_url->host; + url.port = base_url->port; + url.path = base_url->path; + url.has_opaque_path = base_url->has_opaque_path; + url.query = base_url->query; + + // If c is U+003F (?), then set url’s query to the empty string, and state to query state. + if ((input_position != input_size) && (url_data[input_position] == '?')) { + state = ada::state::QUERY; + } + // Otherwise, if c is not the EOF code point: + else if (input_position != input_size) { + // Set url’s query to null. + url.query = std::nullopt; + + // Shorten url’s path. + helpers::shorten_path(url.path, url.get_scheme_type()); + + // Set state to path state and decrease pointer by 1. + state = ada::state::PATH; + break; + } + } + input_position++; + break; + } + case ada::state::RELATIVE_SLASH: { + ada_log("RELATIVE_SLASH ", helpers::substring(url_data, input_position)); + + // If url is special and c is U+002F (/) or U+005C (\), then: + if (url.is_special() && (input_position != input_size) && (url_data[input_position] == '/' || url_data[input_position] =='\\')) { + // Set state to special authority ignore slashes state. + state = ada::state::SPECIAL_AUTHORITY_IGNORE_SLASHES; + } + // Otherwise, if c is U+002F (/), then set state to authority state. + else if ((input_position != input_size) && (url_data[input_position] == '/')) { + state = ada::state::AUTHORITY; + } + // Otherwise, set + // - url’s username to base’s username, + // - url’s password to base’s password, + // - url’s host to base’s host, + // - url’s port to base’s port, + // - state to path state, and then, decrease pointer by 1. + else { + url.username = base_url->username; + url.password = base_url->password; + url.host = base_url->host; + url.port = base_url->port; + state = ada::state::PATH; + break; + } + + input_position++; + break; + } + case ada::state::SPECIAL_AUTHORITY_SLASHES: { + ada_log("SPECIAL_AUTHORITY_SLASHES ", helpers::substring(url_data, input_position)); + + // If c is U+002F (/) and remaining starts with U+002F (/), + // then set state to special authority ignore slashes state and increase pointer by 1. + state = ada::state::SPECIAL_AUTHORITY_IGNORE_SLASHES; + std::string_view view = helpers::substring(url_data, input_position); + if (ada::checkers::begins_with(view, "//")) { + input_position += 2; + } + + [[fallthrough]]; + } + case ada::state::SPECIAL_AUTHORITY_IGNORE_SLASHES: { + ada_log("SPECIAL_AUTHORITY_IGNORE_SLASHES ", helpers::substring(url_data, input_position)); + + // If c is neither U+002F (/) nor U+005C (\), then set state to authority state and decrease pointer by 1. + while ((input_position != input_size) && ((url_data[input_position] == '/') || (url_data[input_position] == '\\'))) { + input_position++; + } + state = ada::state::AUTHORITY; + + break; + } + case ada::state::QUERY: { + ada_log("QUERY ", helpers::substring(url_data, input_position)); + // If encoding is not UTF-8 and one of the following is true: + // - url is not special + // - url’s scheme is "ws" or "wss" + if (encoding != ada::encoding_type::UTF8) { + if (!url.is_special() || url.get_scheme_type() == ada::scheme::type::WS || url.get_scheme_type() == ada::scheme::type::WSS) { + // then set encoding to UTF-8. + encoding = ada::encoding_type::UTF8; + } + } + // Let queryPercentEncodeSet be the special-query percent-encode set if url is special; + // otherwise the query percent-encode set. + auto query_percent_encode_set = url.is_special() ? + ada::character_sets::SPECIAL_QUERY_PERCENT_ENCODE : + ada::character_sets::QUERY_PERCENT_ENCODE; + + // Percent-encode after encoding, with encoding, buffer, and queryPercentEncodeSet, + // and append the result to url’s query. + url.query = ada::unicode::percent_encode(helpers::substring(url_data, input_position), query_percent_encode_set); + + return url; + } + case ada::state::HOST: { + ada_log("HOST ", helpers::substring(url_data, input_position)); + + std::string_view host_view = helpers::substring(url_data, input_position); + bool inside_brackets{false}; + size_t location = helpers::get_host_delimiter_location(url, host_view, inside_brackets); + input_position = (location != std::string_view::npos) ? input_position + location : input_size; + // Otherwise, if c is U+003A (:) and insideBrackets is false, then: + if ((input_position != input_size) && (url_data[input_position] == ':') && !inside_brackets) { + // If buffer is the empty string, validation error, return failure. + // Let host be the result of host parsing buffer with url is not special. + ada_log("HOST parsing ", host_view); + if(!url.parse_host(host_view)) { return url; } + ada_log("HOST parsing results in ", url.host.has_value() ? "none" : url.host.value()); + // Set url’s host to host, buffer to the empty string, and state to port state. + state = ada::state::PORT; + input_position++; + } + // Otherwise, if one of the following is true: + // - c is the EOF code point, U+002F (/), U+003F (?), or U+0023 (#) + // - url is special and c is U+005C (\) + else if (input_position == input_size || url_data[input_position] == '/' || url_data[input_position] == '?' || (url.is_special() && url_data[input_position] == '\\')) { + + // If url is special and host_view is the empty string, validation error, return failure. + if (url.is_special() && host_view.empty()) { + url.is_valid = false; + return url; + } + + // Let host be the result of host parsing host_view with url is not special. + if (host_view.empty()) { + url.host = ""; + } else { + if(!url.parse_host(host_view)) { return url; } + } + // Set url’s host to host, and state to path start state. + state = ada::state::PATH_START; + } + + break; + } + case ada::state::OPAQUE_PATH: { + ada_log("OPAQUE_PATH ", helpers::substring(url_data, input_position)); + std::string_view view = helpers::substring(url_data, input_position); + // If c is U+003F (?), then set url’s query to the empty string and state to query state. + size_t location = view.find('?'); + if(location != std::string_view::npos) { + view.remove_suffix(view.size() - location); + state = ada::state::QUERY; + input_position += location + 1; + } else { + input_position = input_size + 1; + } + url.has_opaque_path = true; + url.path = unicode::percent_encode(view, character_sets::C0_CONTROL_PERCENT_ENCODE); + break; + } + case ada::state::PORT: { + ada_log("PORT ", helpers::substring(url_data, input_position)); + std::string_view port_view = helpers::substring(url_data, input_position); + size_t consumed_bytes = url.parse_port(port_view, true); + input_position += consumed_bytes; + if(!url.is_valid) { return url; } + state = state::PATH_START; + [[fallthrough]]; + } + case ada::state::PATH_START: { + ada_log("PATH_START ", helpers::substring(url_data, input_position)); + + // If url is special, then: + if (url.is_special()) { + // Set state to path state. + state = ada::state::PATH; + + // Optimization: Avoiding going into PATH state improves the performance of urls ending with /. + if (input_position == input_size) { + url.path = "/"; + return url; + } + // If c is neither U+002F (/) nor U+005C (\), then decrease pointer by 1. + // We know that (input_position == input_size) is impossible here, because of the previous if-check. + if ((url_data[input_position] != '/') && (url_data[input_position] != '\\')) { + break; + } + } + // Otherwise, if state override is not given and c is U+003F (?), + // set url’s query to the empty string and state to query state. + else if ((input_position != input_size) && (url_data[input_position] == '?')) { + state = ada::state::QUERY; + } + // Otherwise, if c is not the EOF code point: + else if (input_position != input_size) { + // Set state to path state. + state = ada::state::PATH; + + // If c is not U+002F (/), then decrease pointer by 1. + if (url_data[input_position] != '/') { + break; + } + } + + input_position++; + break; + } + case ada::state::PATH: { + std::string_view view = helpers::substring(url_data, input_position); + ada_log("PATH ", helpers::substring(url_data, input_position)); + + // Most time, we do not need percent encoding. + // Furthermore, we can immediately locate the '?'. + size_t locofquestionmark = view.find('?'); + if(locofquestionmark != std::string_view::npos) { + state = ada::state::QUERY; + view.remove_suffix(view.size()-locofquestionmark); + input_position += locofquestionmark + 1; + } else { + input_position = input_size + 1; + } + if(!helpers::parse_prepared_path(view, url.get_scheme_type(), url.path)) { return url; } + break; + } + case ada::state::FILE_SLASH: { + ada_log("FILE_SLASH ", helpers::substring(url_data, input_position)); + + // If c is U+002F (/) or U+005C (\), then: + if ((input_position != input_size) && (url_data[input_position] == '/' || url_data[input_position] == '\\')) { + ada_log("FILE_SLASH c is U+002F or U+005C"); + // Set state to file host state. + state = ada::state::FILE_HOST; + input_position++; + } else { + ada_log("FILE_SLASH otherwise"); + // If base is non-null and base’s scheme is "file", then: + // Note: it is unsafe to do base_url->scheme unless you know that + // base_url_has_value() is true. + if (base_url != nullptr && base_url->get_scheme_type() == ada::scheme::type::FILE) { + // Set url’s host to base’s host. + url.host = base_url->host; + + // If the code point substring from pointer to the end of input does not start with + // a Windows drive letter and base’s path[0] is a normalized Windows drive letter, + // then append base’s path[0] to url’s path. + if (!base_url->path.empty()) { + if (!checkers::is_windows_drive_letter(helpers::substring(url_data, input_position))) { + std::string_view first_base_url_path = base_url->path; + first_base_url_path.remove_prefix(1); + size_t loc = first_base_url_path.find('/'); + if(loc != std::string_view::npos) { + first_base_url_path.remove_suffix(first_base_url_path.size() - loc); + } + if (checkers::is_normalized_windows_drive_letter(first_base_url_path)) { + url.path += '/'; + url.path += first_base_url_path; + } + } + } + } + + // Set state to path state, and decrease pointer by 1. + state = ada::state::PATH; + } + + break; + } + case ada::state::FILE_HOST: { + std::string_view view = helpers::substring(url_data, input_position); + ada_log("FILE_HOST ", helpers::substring(url_data, input_position)); + + size_t location = view.find_first_of("/\\?"); + std::string_view file_host_buffer(view.data(), (location != std::string_view::npos) ? location : view.size()); + + if (checkers::is_windows_drive_letter(file_host_buffer)) { + state = ada::state::PATH; + } else if (file_host_buffer.empty()) { + // Set url’s host to the empty string. + url.host = ""; + // Set state to path start state. + state = ada::state::PATH_START; + } else { + size_t consumed_bytes = file_host_buffer.size(); + input_position += consumed_bytes; + // Let host be the result of host parsing buffer with url is not special. + if(!url.parse_host(file_host_buffer)) { return url; } + + // If host is "localhost", then set host to the empty string. + if (url.host.has_value() && url.host.value() == "localhost") { + url.host = ""; + } + + // Set buffer to the empty string and state to path start state. + state = ada::state::PATH_START; + } + + break; + } + case ada::state::FILE: { + ada_log("FILE ", helpers::substring(url_data, input_position)); + std::string_view file_view = helpers::substring(url_data, input_position); + + // Set url’s scheme to "file". + url.set_scheme("file"); + + // Set url’s host to the empty string. + url.host = ""; + + // If c is U+002F (/) or U+005C (\), then: + if (input_position != input_size && (url_data[input_position] == '/' || url_data[input_position] == '\\')) { + ada_log("FILE c is U+002F or U+005C"); + // Set state to file slash state. + state = ada::state::FILE_SLASH; + } + // Otherwise, if base is non-null and base’s scheme is "file": + else if (base_url != nullptr && base_url->get_scheme_type() == ada::scheme::type::FILE) { + // Set url’s host to base’s host, url’s path to a clone of base’s path, and url’s query to base’s query. + ada_log("FILE base non-null"); + url.host = base_url->host; + url.path = base_url->path; + url.has_opaque_path = base_url->has_opaque_path; + url.query = base_url->query; + + // If c is U+003F (?), then set url’s query to the empty string and state to query state. + if (input_position != input_size && url_data[input_position] == '?') { + state = ada::state::QUERY; + } + // Otherwise, if c is not the EOF code point: + else if (input_position != input_size) { + // Set url’s query to null. + url.query = std::nullopt; + + // If the code point substring from pointer to the end of input does not start with a + // Windows drive letter, then shorten url’s path. + if (!checkers::is_windows_drive_letter(file_view)) { + helpers::shorten_path(url.path, url.get_scheme_type()); + } + // Otherwise: + else { + // Set url’s path to an empty list. + url.path.clear(); + url.has_opaque_path = true; + } + + // Set state to path state and decrease pointer by 1. + state = ada::state::PATH; + break; + } + } + // Otherwise, set state to path state, and decrease pointer by 1. + else { + ada_log("FILE go to path"); + state = ada::state::PATH; + break; + } + + input_position++; + break; + } + default: + ada::unreachable(); + } + } + ada_log("returning ", url.to_string()); + return url; + } + +} // namespace ada::parser +/* end file src/parser.cpp */ +/* end file src/ada.cpp */ diff --git a/deps/ada/ada.gyp b/deps/ada/ada.gyp new file mode 100644 index 00000000000000..1171e8750755e1 --- /dev/null +++ b/deps/ada/ada.gyp @@ -0,0 +1,32 @@ +{ + 'variables': { + 'v8_enable_i18n_support%': 1, + }, + 'targets': [ + { + 'target_name': 'ada', + 'type': 'static_library', + 'include_dirs': ['.'], + 'direct_dependent_settings': { + 'include_dirs': ['.'], + }, + 'sources': ['ada.cpp'], + 'conditions': [ + ['v8_enable_i18n_support==0', { + 'defines': ['ADA_HAS_ICU=0'], + }], + ['v8_enable_i18n_support==1', { + 'dependencies': [ + '<(icu_gyp_path):icui18n', + '<(icu_gyp_path):icuuc', + ], + }], + ['OS=="win" and v8_enable_i18n_support==1', { + 'dependencies': [ + '<(icu_gyp_path):icudata', + ], + }], + ] + }, + ] +} diff --git a/deps/ada/ada.h b/deps/ada/ada.h new file mode 100644 index 00000000000000..00203ccd00bbe3 --- /dev/null +++ b/deps/ada/ada.h @@ -0,0 +1,4388 @@ +/* auto-generated on 2023-02-06 08:25:59 -0500. Do not edit! */ +// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/include, filename=ada.h +/* begin file include/ada.h */ +/** + * @file ada.h + * @brief Includes all definitions for Ada. + */ +#ifndef ADA_H +#define ADA_H + +// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/include, filename=ada/character_sets-inl.h +/* begin file include/ada/character_sets-inl.h */ +/** + * @file character_sets-inl.h + * @brief Definitions of the character sets used by unicode functions. + * @author Node.js + * @see https://github.com/nodejs/node/blob/main/src/node_url_tables.cc + */ +#ifndef ADA_CHARACTER_SETS_INL_H +#define ADA_CHARACTER_SETS_INL_H + +// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/include, filename=ada/character_sets.h +/* begin file include/ada/character_sets.h */ +/** + * @file character_sets.h + * @brief Declaration of the character sets used by unicode functions. + * @author Node.js + * @see https://github.com/nodejs/node/blob/main/src/node_url_tables.cc + */ +#ifndef ADA_CHARACTER_SETS_H +#define ADA_CHARACTER_SETS_H + +// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/include, filename=ada/common_defs.h +/* begin file include/ada/common_defs.h */ +/** + * @file common_defs.h + * @brief Common definitions for cross-platform compiler support. + */ +#ifndef ADA_COMMON_DEFS_H +#define ADA_COMMON_DEFS_H + +#ifdef _MSC_VER +#define ADA_VISUAL_STUDIO 1 +/** + * We want to differentiate carefully between + * clang under visual studio and regular visual + * studio. + */ +#ifdef __clang__ +// clang under visual studio +#define ADA_CLANG_VISUAL_STUDIO 1 +#else +// just regular visual studio (best guess) +#define ADA_REGULAR_VISUAL_STUDIO 1 +#endif // __clang__ +#endif // _MSC_VER + + +#if defined(__GNUC__) + // Marks a block with a name so that MCA analysis can see it. + #define ADA_BEGIN_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-BEGIN " #name); + #define ADA_END_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-END " #name); + #define ADA_DEBUG_BLOCK(name, block) BEGIN_DEBUG_BLOCK(name); block; END_DEBUG_BLOCK(name); +#else + #define ADA_BEGIN_DEBUG_BLOCK(name) + #define ADA_END_DEBUG_BLOCK(name) + #define ADA_DEBUG_BLOCK(name, block) +#endif + +// Align to N-byte boundary +#define ADA_ROUNDUP_N(a, n) (((a) + ((n)-1)) & ~((n)-1)) +#define ADA_ROUNDDOWN_N(a, n) ((a) & ~((n)-1)) + +#define ADA_ISALIGNED_N(ptr, n) (((uintptr_t)(ptr) & ((n)-1)) == 0) + +#if defined(ADA_REGULAR_VISUAL_STUDIO) + + #define ada_really_inline __forceinline + #define ada_never_inline __declspec(noinline) + + #define ada_unused + #define ada_warn_unused + + #ifndef ada_likely + #define ada_likely(x) x + #endif + #ifndef ada_unlikely + #define ada_unlikely(x) x + #endif + + #define ADA_PUSH_DISABLE_WARNINGS __pragma(warning( push )) + #define ADA_PUSH_DISABLE_ALL_WARNINGS __pragma(warning( push, 0 )) + #define ADA_DISABLE_VS_WARNING(WARNING_NUMBER) __pragma(warning( disable : WARNING_NUMBER )) + // Get rid of Intellisense-only warnings (Code Analysis) + // Though __has_include is C++17, it is supported in Visual Studio 2017 or better (_MSC_VER>=1910). + #ifdef __has_include + #if __has_include() + #include + #define ADA_DISABLE_UNDESIRED_WARNINGS ADA_DISABLE_VS_WARNING(ALL_CPPCORECHECK_WARNINGS) + #endif + #endif + + #ifndef ADA_DISABLE_UNDESIRED_WARNINGS + #define ADA_DISABLE_UNDESIRED_WARNINGS + #endif + + #define ADA_DISABLE_DEPRECATED_WARNING ADA_DISABLE_VS_WARNING(4996) + #define ADA_DISABLE_STRICT_OVERFLOW_WARNING + #define ADA_POP_DISABLE_WARNINGS __pragma(warning( pop )) + +#else // ADA_REGULAR_VISUAL_STUDIO + + #define ada_really_inline inline __attribute__((always_inline)) + #define ada_never_inline inline __attribute__((noinline)) + + #define ada_unused __attribute__((unused)) + #define ada_warn_unused __attribute__((warn_unused_result)) + + #ifndef ada_likely + #define ada_likely(x) __builtin_expect(!!(x), 1) + #endif + #ifndef ada_unlikely + #define ada_unlikely(x) __builtin_expect(!!(x), 0) + #endif + + #define ADA_PUSH_DISABLE_WARNINGS _Pragma("GCC diagnostic push") + // gcc doesn't seem to disable all warnings with all and extra, add warnings here as necessary + #define ADA_PUSH_DISABLE_ALL_WARNINGS ADA_PUSH_DISABLE_WARNINGS \ + ADA_DISABLE_GCC_WARNING(-Weffc++) \ + ADA_DISABLE_GCC_WARNING(-Wall) \ + ADA_DISABLE_GCC_WARNING(-Wconversion) \ + ADA_DISABLE_GCC_WARNING(-Wextra) \ + ADA_DISABLE_GCC_WARNING(-Wattributes) \ + ADA_DISABLE_GCC_WARNING(-Wimplicit-fallthrough) \ + ADA_DISABLE_GCC_WARNING(-Wnon-virtual-dtor) \ + ADA_DISABLE_GCC_WARNING(-Wreturn-type) \ + ADA_DISABLE_GCC_WARNING(-Wshadow) \ + ADA_DISABLE_GCC_WARNING(-Wunused-parameter) \ + ADA_DISABLE_GCC_WARNING(-Wunused-variable) + #define ADA_PRAGMA(P) _Pragma(#P) + #define ADA_DISABLE_GCC_WARNING(WARNING) ADA_PRAGMA(GCC diagnostic ignored #WARNING) + #if defined(ADA_CLANG_VISUAL_STUDIO) + #define ADA_DISABLE_UNDESIRED_WARNINGS ADA_DISABLE_GCC_WARNING(-Wmicrosoft-include) + #else + #define ADA_DISABLE_UNDESIRED_WARNINGS + #endif + #define ADA_DISABLE_DEPRECATED_WARNING ADA_DISABLE_GCC_WARNING(-Wdeprecated-declarations) + #define ADA_DISABLE_STRICT_OVERFLOW_WARNING ADA_DISABLE_GCC_WARNING(-Wstrict-overflow) + #define ADA_POP_DISABLE_WARNINGS _Pragma("GCC diagnostic pop") + +#endif // MSC_VER + +#if defined(ADA_VISUAL_STUDIO) + /** + * It does not matter here whether you are using + * the regular visual studio or clang under visual + * studio. + */ + #if ADA_USING_LIBRARY + #define ADA_DLLIMPORTEXPORT __declspec(dllimport) + #else + #define ADA_DLLIMPORTEXPORT __declspec(dllexport) + #endif +#else + #define ADA_DLLIMPORTEXPORT +#endif + +/// If EXPR is an error, returns it. +#define ADA_TRY(EXPR) { auto _err = (EXPR); if (_err) { return _err; } } + +// __has_cpp_attribute is part of C++20 +#if !defined(__has_cpp_attribute) +#define __has_cpp_attribute(x) 0 +#endif + + +#if __has_cpp_attribute(gnu::noinline) +#define ADA_ATTRIBUTE_NOINLINE [[gnu::noinline]] +#else +#define ADA_ATTRIBUTE_NOINLINE +#endif + +namespace ada { + [[noreturn]] inline void unreachable() { +#ifdef __GNUC__ + __builtin_unreachable(); +#elif defined(_MSC_VER) + __assume(false); +#else +#endif + } +} + + + +#if defined(__GNUC__) && !defined(__clang__) +#if __GNUC__ <= 8 +#define ADA_OLD_GCC 1 +#endif // __GNUC__ <= 8 +#endif // defined(__GNUC__) && !defined(__clang__) + +#if ADA_OLD_GCC +#define ada_constexpr +#else +#define ada_constexpr constexpr +#endif + + #if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) + #define ADA_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) + #elif defined(_WIN32) + #define ADA_IS_BIG_ENDIAN 0 + #else + #if defined(__APPLE__) || defined(__FreeBSD__) // defined __BYTE_ORDER__ && defined __ORDER_BIG_ENDIAN__ + #include + #elif defined(sun) || defined(__sun) // defined(__APPLE__) || defined(__FreeBSD__) + #include + #else // defined(__APPLE__) || defined(__FreeBSD__) + + #ifdef __has_include + #if __has_include() + #include + #endif //__has_include() + #endif //__has_include + + #endif // defined(__APPLE__) || defined(__FreeBSD__) + + + #ifndef !defined(__BYTE_ORDER__) || !defined(__ORDER_LITTLE_ENDIAN__) + #define ADA_IS_BIG_ENDIAN 0 + #endif + + #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + #define ADA_IS_BIG_ENDIAN 0 + #else // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + #define ADA_IS_BIG_ENDIAN 1 + #endif // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + + #endif // defined __BYTE_ORDER__ && defined __ORDER_BIG_ENDIAN__ + + +#ifndef ADA_HAS_ICU +#if __has_include() +#define ADA_HAS_ICU 1 +#else +#define ADA_HAS_ICU 0 +#endif // __has_include() +#endif // ADA_HAS_ICU + +#if ADA_HAS_ICU +#include +#include +#include +#endif // ADA_HAS_ICU + +#define ADA_WINDOWS_TO_ASCII_FALLBACK 0 // we never use anything but ICU. No fallback. + +#endif // ADA_COMMON_DEFS_H +/* end file include/ada/common_defs.h */ +#include + +/** + * @namespace ada::character_sets + * @brief Includes the definitions for unicode character sets. + */ +namespace ada::character_sets { + ada_really_inline bool bit_at(const uint8_t a[], const uint8_t i); +} // namespace ada::character_sets + +#endif // ADA_CHARACTER_SETS_H +/* end file include/ada/character_sets.h */ + +namespace ada::character_sets { + + constexpr char hex[1024] = + "%00\0%01\0%02\0%03\0%04\0%05\0%06\0%07\0" + "%08\0%09\0%0A\0%0B\0%0C\0%0D\0%0E\0%0F\0" + "%10\0%11\0%12\0%13\0%14\0%15\0%16\0%17\0" + "%18\0%19\0%1A\0%1B\0%1C\0%1D\0%1E\0%1F\0" + "%20\0%21\0%22\0%23\0%24\0%25\0%26\0%27\0" + "%28\0%29\0%2A\0%2B\0%2C\0%2D\0%2E\0%2F\0" + "%30\0%31\0%32\0%33\0%34\0%35\0%36\0%37\0" + "%38\0%39\0%3A\0%3B\0%3C\0%3D\0%3E\0%3F\0" + "%40\0%41\0%42\0%43\0%44\0%45\0%46\0%47\0" + "%48\0%49\0%4A\0%4B\0%4C\0%4D\0%4E\0%4F\0" + "%50\0%51\0%52\0%53\0%54\0%55\0%56\0%57\0" + "%58\0%59\0%5A\0%5B\0%5C\0%5D\0%5E\0%5F\0" + "%60\0%61\0%62\0%63\0%64\0%65\0%66\0%67\0" + "%68\0%69\0%6A\0%6B\0%6C\0%6D\0%6E\0%6F\0" + "%70\0%71\0%72\0%73\0%74\0%75\0%76\0%77\0" + "%78\0%79\0%7A\0%7B\0%7C\0%7D\0%7E\0%7F\0" + "%80\0%81\0%82\0%83\0%84\0%85\0%86\0%87\0" + "%88\0%89\0%8A\0%8B\0%8C\0%8D\0%8E\0%8F\0" + "%90\0%91\0%92\0%93\0%94\0%95\0%96\0%97\0" + "%98\0%99\0%9A\0%9B\0%9C\0%9D\0%9E\0%9F\0" + "%A0\0%A1\0%A2\0%A3\0%A4\0%A5\0%A6\0%A7\0" + "%A8\0%A9\0%AA\0%AB\0%AC\0%AD\0%AE\0%AF\0" + "%B0\0%B1\0%B2\0%B3\0%B4\0%B5\0%B6\0%B7\0" + "%B8\0%B9\0%BA\0%BB\0%BC\0%BD\0%BE\0%BF\0" + "%C0\0%C1\0%C2\0%C3\0%C4\0%C5\0%C6\0%C7\0" + "%C8\0%C9\0%CA\0%CB\0%CC\0%CD\0%CE\0%CF\0" + "%D0\0%D1\0%D2\0%D3\0%D4\0%D5\0%D6\0%D7\0" + "%D8\0%D9\0%DA\0%DB\0%DC\0%DD\0%DE\0%DF\0" + "%E0\0%E1\0%E2\0%E3\0%E4\0%E5\0%E6\0%E7\0" + "%E8\0%E9\0%EA\0%EB\0%EC\0%ED\0%EE\0%EF\0" + "%F0\0%F1\0%F2\0%F3\0%F4\0%F5\0%F6\0%F7\0" + "%F8\0%F9\0%FA\0%FB\0%FC\0%FD\0%FE\0%FF"; + + constexpr uint8_t C0_CONTROL_PERCENT_ENCODE[32] = { + // 00 01 02 03 04 05 06 07 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 08 09 0A 0B 0C 0D 0E 0F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 10 11 12 13 14 15 16 17 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 18 19 1A 1B 1C 1D 1E 1F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 20 21 22 23 24 25 26 27 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 28 29 2A 2B 2C 2D 2E 2F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 30 31 32 33 34 35 36 37 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 38 39 3A 3B 3C 3D 3E 3F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 40 41 42 43 44 45 46 47 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 48 49 4A 4B 4C 4D 4E 4F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 50 51 52 53 54 55 56 57 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 58 59 5A 5B 5C 5D 5E 5F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 60 61 62 63 64 65 66 67 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 68 69 6A 6B 6C 6D 6E 6F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 70 71 72 73 74 75 76 77 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 78 79 7A 7B 7C 7D 7E 7F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80, + // 80 81 82 83 84 85 86 87 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 88 89 8A 8B 8C 8D 8E 8F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 90 91 92 93 94 95 96 97 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 98 99 9A 9B 9C 9D 9E 9F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A0 A1 A2 A3 A4 A5 A6 A7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A8 A9 AA AB AC AD AE AF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B0 B1 B2 B3 B4 B5 B6 B7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B8 B9 BA BB BC BD BE BF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C0 C1 C2 C3 C4 C5 C6 C7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C8 C9 CA CB CC CD CE CF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D0 D1 D2 D3 D4 D5 D6 D7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D8 D9 DA DB DC DD DE DF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E0 E1 E2 E3 E4 E5 E6 E7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E8 E9 EA EB EC ED EE EF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F0 F1 F2 F3 F4 F5 F6 F7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F8 F9 FA FB FC FD FE FF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 + }; + + constexpr uint8_t SPECIAL_QUERY_PERCENT_ENCODE[32] = { + // 00 01 02 03 04 05 06 07 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 08 09 0A 0B 0C 0D 0E 0F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 10 11 12 13 14 15 16 17 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 18 19 1A 1B 1C 1D 1E 1F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 20 21 22 23 24 25 26 27 + 0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x80, + // 28 29 2A 2B 2C 2D 2E 2F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 30 31 32 33 34 35 36 37 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 38 39 3A 3B 3C 3D 3E 3F + 0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x00, + // 40 41 42 43 44 45 46 47 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 48 49 4A 4B 4C 4D 4E 4F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 50 51 52 53 54 55 56 57 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 58 59 5A 5B 5C 5D 5E 5F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 60 61 62 63 64 65 66 67 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 68 69 6A 6B 6C 6D 6E 6F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 70 71 72 73 74 75 76 77 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 78 79 7A 7B 7C 7D 7E 7F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80, + // 80 81 82 83 84 85 86 87 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 88 89 8A 8B 8C 8D 8E 8F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 90 91 92 93 94 95 96 97 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 98 99 9A 9B 9C 9D 9E 9F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A0 A1 A2 A3 A4 A5 A6 A7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A8 A9 AA AB AC AD AE AF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B0 B1 B2 B3 B4 B5 B6 B7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B8 B9 BA BB BC BD BE BF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C0 C1 C2 C3 C4 C5 C6 C7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C8 C9 CA CB CC CD CE CF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D0 D1 D2 D3 D4 D5 D6 D7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D8 D9 DA DB DC DD DE DF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E0 E1 E2 E3 E4 E5 E6 E7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E8 E9 EA EB EC ED EE EF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F0 F1 F2 F3 F4 F5 F6 F7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F8 F9 FA FB FC FD FE FF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 + }; + + constexpr uint8_t QUERY_PERCENT_ENCODE[32] = { + // 00 01 02 03 04 05 06 07 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 08 09 0A 0B 0C 0D 0E 0F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 10 11 12 13 14 15 16 17 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 18 19 1A 1B 1C 1D 1E 1F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 20 21 22 23 24 25 26 27 + 0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00, + // 28 29 2A 2B 2C 2D 2E 2F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 30 31 32 33 34 35 36 37 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 38 39 3A 3B 3C 3D 3E 3F + 0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x00, + // 40 41 42 43 44 45 46 47 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 48 49 4A 4B 4C 4D 4E 4F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 50 51 52 53 54 55 56 57 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 58 59 5A 5B 5C 5D 5E 5F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 60 61 62 63 64 65 66 67 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 68 69 6A 6B 6C 6D 6E 6F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 70 71 72 73 74 75 76 77 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 78 79 7A 7B 7C 7D 7E 7F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80, + // 80 81 82 83 84 85 86 87 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 88 89 8A 8B 8C 8D 8E 8F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 90 91 92 93 94 95 96 97 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 98 99 9A 9B 9C 9D 9E 9F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A0 A1 A2 A3 A4 A5 A6 A7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A8 A9 AA AB AC AD AE AF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B0 B1 B2 B3 B4 B5 B6 B7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B8 B9 BA BB BC BD BE BF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C0 C1 C2 C3 C4 C5 C6 C7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C8 C9 CA CB CC CD CE CF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D0 D1 D2 D3 D4 D5 D6 D7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D8 D9 DA DB DC DD DE DF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E0 E1 E2 E3 E4 E5 E6 E7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E8 E9 EA EB EC ED EE EF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F0 F1 F2 F3 F4 F5 F6 F7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F8 F9 FA FB FC FD FE FF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 + }; + + constexpr uint8_t FRAGMENT_PERCENT_ENCODE[32] = { + // 00 01 02 03 04 05 06 07 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 08 09 0A 0B 0C 0D 0E 0F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 10 11 12 13 14 15 16 17 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 18 19 1A 1B 1C 1D 1E 1F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 20 21 22 23 24 25 26 27 + 0x01 | 0x00 | 0x04 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 28 29 2A 2B 2C 2D 2E 2F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 30 31 32 33 34 35 36 37 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 38 39 3A 3B 3C 3D 3E 3F + 0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x00, + // 40 41 42 43 44 45 46 47 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 48 49 4A 4B 4C 4D 4E 4F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 50 51 52 53 54 55 56 57 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 58 59 5A 5B 5C 5D 5E 5F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 60 61 62 63 64 65 66 67 + 0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 68 69 6A 6B 6C 6D 6E 6F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 70 71 72 73 74 75 76 77 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 78 79 7A 7B 7C 7D 7E 7F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80, + // 80 81 82 83 84 85 86 87 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 88 89 8A 8B 8C 8D 8E 8F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 90 91 92 93 94 95 96 97 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 98 99 9A 9B 9C 9D 9E 9F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A0 A1 A2 A3 A4 A5 A6 A7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A8 A9 AA AB AC AD AE AF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B0 B1 B2 B3 B4 B5 B6 B7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B8 B9 BA BB BC BD BE BF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C0 C1 C2 C3 C4 C5 C6 C7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C8 C9 CA CB CC CD CE CF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D0 D1 D2 D3 D4 D5 D6 D7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D8 D9 DA DB DC DD DE DF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E0 E1 E2 E3 E4 E5 E6 E7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E8 E9 EA EB EC ED EE EF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F0 F1 F2 F3 F4 F5 F6 F7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F8 F9 FA FB FC FD FE FF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 + }; + + constexpr uint8_t USERINFO_PERCENT_ENCODE[32] = { + // 00 01 02 03 04 05 06 07 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 08 09 0A 0B 0C 0D 0E 0F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 10 11 12 13 14 15 16 17 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 18 19 1A 1B 1C 1D 1E 1F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 20 21 22 23 24 25 26 27 + 0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00, + // 28 29 2A 2B 2C 2D 2E 2F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80, + // 30 31 32 33 34 35 36 37 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 38 39 3A 3B 3C 3D 3E 3F + 0x00 | 0x00 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 40 41 42 43 44 45 46 47 + 0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 48 49 4A 4B 4C 4D 4E 4F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 50 51 52 53 54 55 56 57 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 58 59 5A 5B 5C 5D 5E 5F + 0x00 | 0x00 | 0x00 | 0x08 | 0x10 | 0x20 | 0x40 | 0x00, + // 60 61 62 63 64 65 66 67 + 0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 68 69 6A 6B 6C 6D 6E 6F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 70 71 72 73 74 75 76 77 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 78 79 7A 7B 7C 7D 7E 7F + 0x00 | 0x00 | 0x00 | 0x08 | 0x10 | 0x20 | 0x00 | 0x80, + // 80 81 82 83 84 85 86 87 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 88 89 8A 8B 8C 8D 8E 8F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 90 91 92 93 94 95 96 97 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 98 99 9A 9B 9C 9D 9E 9F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A0 A1 A2 A3 A4 A5 A6 A7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A8 A9 AA AB AC AD AE AF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B0 B1 B2 B3 B4 B5 B6 B7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B8 B9 BA BB BC BD BE BF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C0 C1 C2 C3 C4 C5 C6 C7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C8 C9 CA CB CC CD CE CF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D0 D1 D2 D3 D4 D5 D6 D7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D8 D9 DA DB DC DD DE DF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E0 E1 E2 E3 E4 E5 E6 E7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E8 E9 EA EB EC ED EE EF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F0 F1 F2 F3 F4 F5 F6 F7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F8 F9 FA FB FC FD FE FF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 + }; + + constexpr uint8_t PATH_PERCENT_ENCODE[32] = { + // 00 01 02 03 04 05 06 07 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 08 09 0A 0B 0C 0D 0E 0F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 10 11 12 13 14 15 16 17 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 18 19 1A 1B 1C 1D 1E 1F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 20 21 22 23 24 25 26 27 + 0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00, + // 28 29 2A 2B 2C 2D 2E 2F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 30 31 32 33 34 35 36 37 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 38 39 3A 3B 3C 3D 3E 3F + 0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x80, + // 40 41 42 43 44 45 46 47 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 48 49 4A 4B 4C 4D 4E 4F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 50 51 52 53 54 55 56 57 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 58 59 5A 5B 5C 5D 5E 5F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 60 61 62 63 64 65 66 67 + 0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 68 69 6A 6B 6C 6D 6E 6F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 70 71 72 73 74 75 76 77 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 78 79 7A 7B 7C 7D 7E 7F + 0x00 | 0x00 | 0x00 | 0x08 | 0x00 | 0x20 | 0x00 | 0x80, + // 80 81 82 83 84 85 86 87 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 88 89 8A 8B 8C 8D 8E 8F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 90 91 92 93 94 95 96 97 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 98 99 9A 9B 9C 9D 9E 9F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A0 A1 A2 A3 A4 A5 A6 A7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A8 A9 AA AB AC AD AE AF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B0 B1 B2 B3 B4 B5 B6 B7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B8 B9 BA BB BC BD BE BF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C0 C1 C2 C3 C4 C5 C6 C7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C8 C9 CA CB CC CD CE CF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D0 D1 D2 D3 D4 D5 D6 D7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D8 D9 DA DB DC DD DE DF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E0 E1 E2 E3 E4 E5 E6 E7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E8 E9 EA EB EC ED EE EF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F0 F1 F2 F3 F4 F5 F6 F7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F8 F9 FA FB FC FD FE FF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 + }; + + ada_really_inline bool bit_at(const uint8_t a[], const uint8_t i) { + return !!(a[i >> 3] & (1 << (i & 7))); + } + +} // namespace ada::character_sets + +#endif // ADA_CHARACTER_SETS_H +/* end file include/ada/character_sets-inl.h */ +// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/include, filename=ada/checkers-inl.h +/* begin file include/ada/checkers-inl.h */ +/** + * @file checkers-inl.h + * @brief Definitions for URL specific checkers used within Ada. + */ +#ifndef ADA_CHECKERS_INL_H +#define ADA_CHECKERS_INL_H + + +#include +#include + +namespace ada::checkers { + + inline bool has_hex_prefix_unsafe(std::string_view input) { + // This is actualy efficient code, see has_hex_prefix for the assembly. + uint32_t value_one = 1; + bool is_little_endian = (reinterpret_cast(&value_one)[0] == 1); + uint16_t word0x{}; + std::memcpy(&word0x, "0x", 2); // we would use bit_cast in C++20 and the function could be constexpr. + uint16_t two_first_bytes{}; + std::memcpy(&two_first_bytes, input.data(),2); + if(is_little_endian) { two_first_bytes |= 0x2000; } else { two_first_bytes |= 0x020; } + return two_first_bytes == word0x; + } + + inline bool has_hex_prefix(std::string_view input) { + return input.size() >=2 && has_hex_prefix_unsafe(input); + } + + constexpr bool is_digit(char x) noexcept { return (x >= '0') & (x <= '9'); } + + constexpr char to_lower(char x) noexcept { return (x | 0x20); } + + constexpr bool is_alpha(char x) noexcept { return (to_lower(x) >= 'a') && (to_lower(x) <= 'z'); } + + inline constexpr bool is_windows_drive_letter(std::string_view input) noexcept { + return input.size() >= 2 && (is_alpha(input[0]) && ((input[1] == ':') || (input[1] == '|'))) + && ((input.size() == 2) || (input[2] == '/' || input[2] == '\\' || input[2] == '?' || input[2] == '#')); + } + + inline constexpr bool is_normalized_windows_drive_letter(std::string_view input) noexcept { + return input.size() >= 2 && (is_alpha(input[0]) && (input[1] == ':')); + } + + ada_really_inline constexpr bool begins_with(std::string_view view, std::string_view prefix) { + // in C++20, you have view.begins_with(prefix) + return view.size() >= prefix.size() && (view.substr(0, prefix.size()) == prefix); + } + +} // namespace ada::checkers + +#endif //ADA_CHECKERS_H +/* end file include/ada/checkers-inl.h */ +// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/include, filename=ada/log.h +/* begin file include/ada/log.h */ +/** + * @file log.h + * @brief Includes the definitions for logging. + * @private Excluded from docs through the doxygen file. + */ +#ifndef ADA_LOG_H +#define ADA_LOG_H + +#include +// To enable logging, set ADA_LOGGING to 1: +#ifndef ADA_LOGGING +#define ADA_LOGGING 0 +#endif + +namespace ada { + +/** + * Private function used for logging messages. + * @private + */ +template +ada_really_inline void inner_log([[maybe_unused]] T t) { +#if ADA_LOGGING + std::cout << t << std::endl; +#endif +} + + +/** + * Private function used for logging messages. + * @private + */ +template +ada_really_inline void inner_log([[maybe_unused]] T t, [[maybe_unused]] Args... args) { +#if ADA_LOGGING + std::cout << t; + inner_log(args...) ; +#endif +} + + +/** + * Log a message. + * @private + */ +template +ada_really_inline void log([[maybe_unused]] T t, [[maybe_unused]] Args... args) { +#if ADA_LOGGING + std::cout << "ADA_LOG: " << t; + inner_log(args...) ; +#endif +} + +/** + * Log a message. + * @private + */ +template +ada_really_inline void log([[maybe_unused]] T t) { +#if ADA_LOGGING + std::cout << "ADA_LOG: " << t << std::endl; +#endif + +} +} + +#if ADA_LOGGING + +#ifndef ada_log +#define ada_log(...) do { \ + ada::log(__VA_ARGS__); \ +} while(0) +#endif // ada_log +#else +#define ada_log(...) +#endif // ADA_LOGGING + +#endif // ADA_LOG_H +/* end file include/ada/log.h */ +// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/include, filename=ada/encoding_type.h +/* begin file include/ada/encoding_type.h */ +/** + * @file encoding_type.h + * @brief Definition for supported encoding types. + */ +#ifndef ADA_ENCODING_TYPE_H +#define ADA_ENCODING_TYPE_H + +#include + +namespace ada { + + /** + * This specification defines three encodings with the same names as encoding schemes defined + * in the Unicode standard: UTF-8, UTF-16LE, and UTF-16BE. + * + * @see https://encoding.spec.whatwg.org/#encodings + */ + enum class encoding_type { + UTF8, + UTF_16LE, + UTF_16BE, + }; + + /** + * Convert a encoding_type to string. + */ + ada_warn_unused std::string to_string(encoding_type type); + +} // ada namespace + +#endif // ADA_ENCODING_TYPE_H +/* end file include/ada/encoding_type.h */ +// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/include, filename=ada/helpers.h +/* begin file include/ada/helpers.h */ +/** + * @file helpers.h + * @brief Definitions for helper functions used within Ada. + */ +#ifndef ADA_HELPERS_H +#define ADA_HELPERS_H + +// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/include, filename=ada/url.h +/* begin file include/ada/url.h */ +/** + * @file url.h + * @brief Declaration for the URL + */ +#ifndef ADA_URL_H +#define ADA_URL_H + +// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/include, filename=ada/checkers.h +/* begin file include/ada/checkers.h */ +/** + * @file checkers.h + * @brief Declarations for URL specific checkers used within Ada. + */ +#ifndef ADA_CHECKERS_H +#define ADA_CHECKERS_H + + +#include +#include + +/** + * @namespace ada::checkers + * @brief Includes the definitions for validation functions + */ +namespace ada::checkers { + + /** + * Assuming that x is an ASCII letter, this function returns the lower case equivalent. + * @details More likely to be inlined by the compiler and constexpr. + */ + constexpr char to_lower(char x) noexcept; + + /** + * Returns true if the character is an ASCII letter. Equivalent to std::isalpha but + * more likely to be inlined by the compiler. + * + * @attention std::isalpha is not constexpr generally. + */ + constexpr bool is_alpha(char x) noexcept; + + /** + * Check whether a string starts with 0x or 0X. The function is only + * safe if input.size() >=2. + * + * @see has_hex_prefix + */ + inline bool has_hex_prefix_unsafe(std::string_view input); + /** + * Check whether a string starts with 0x or 0X. + */ + inline bool has_hex_prefix(std::string_view input); + + /** + * Check whether x is an ASCII digit. More likely to be inlined than std::isdigit. + */ + constexpr bool is_digit(char x) noexcept; + + /** + * @details A string starts with a Windows drive letter if all of the following are true: + * + * - its length is greater than or equal to 2 + * - its first two code points are a Windows drive letter + * - its length is 2 or its third code point is U+002F (/), U+005C (\), U+003F (?), or U+0023 (#). + * + * https://url.spec.whatwg.org/#start-with-a-windows-drive-letter + */ + inline constexpr bool is_windows_drive_letter(std::string_view input) noexcept; + + /** + * @details A normalized Windows drive letter is a Windows drive letter of which the second code point is U+003A (:). + */ + inline constexpr bool is_normalized_windows_drive_letter(std::string_view input) noexcept; + + /** + * @warning Will be removed when Ada supports C++20. + */ + ada_really_inline constexpr bool begins_with(std::string_view view, std::string_view prefix); + + /** + * Returns true if an input is an ipv4 address. + */ + ada_really_inline ada_constexpr bool is_ipv4(std::string_view view) noexcept; + + /** + * Returns a bitset. If the first bit is set, then at least one character needs + * percent encoding. If the second bit is set, a \\ is found. If the third bit is set + * then we have a dot. If the fourth bit is set, then we have a percent character. + */ + ada_really_inline constexpr uint8_t path_signature(std::string_view input) noexcept; + + /** + * Returns true if the length of the domain name and its labels are according to the specifications. + * The length of the domain must be 255 octets (253 characters not including the last 2 which are the empty + * label reserved at the end). When the empty label is included (a dot at the end), the domain name can have + * 254 characters. The length of a label must be at least 1 and at most 63 characters. + * @see section 3.1. of https://www.rfc-editor.org/rfc/rfc1034 + * @see https://www.unicode.org/reports/tr46/#ToASCII + */ + ada_really_inline constexpr bool verify_dns_length(std::string_view input) noexcept; + +} // namespace ada::checkers + +#endif //ADA_CHECKERS_H +/* end file include/ada/checkers.h */ +// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/include, filename=ada/scheme.h +/* begin file include/ada/scheme.h */ +/** + * @file scheme.h + * @brief Declarations for the URL scheme. + */ +#ifndef ADA_SCHEME_H +#define ADA_SCHEME_H + + +#include +#include +#include + +/** + * @namespace ada::scheme + * @brief Includes the scheme declarations + */ +namespace ada::scheme { + + /** + * Type of the scheme as an enum. + * Using strings to represent a scheme type is not ideal because + * checking for types involves string comparisons. It is faster to use + * a simple integer. + */ + enum type { + HTTP = 0, + NOT_SPECIAL = 1, + HTTPS = 2, + WS = 3, + FTP = 4, + WSS = 5, + FILE = 6 + }; + + /** + * A special scheme is an ASCII string that is listed in the first column of the following table. + * The default port for a special scheme is listed in the second column on the same row. + * The default port for any other ASCII string is null. + * + * @see https://url.spec.whatwg.org/#url-miscellaneous + * @param scheme + * @return If scheme is a special scheme + */ + ada_really_inline constexpr bool is_special(std::string_view scheme); + + /** + * A special scheme is an ASCII string that is listed in the first column of the following table. + * The default port for a special scheme is listed in the second column on the same row. + * The default port for any other ASCII string is null. + * + * @see https://url.spec.whatwg.org/#url-miscellaneous + * @param scheme + * @return The special port + */ + constexpr uint16_t get_special_port(std::string_view scheme) noexcept; + + /** + * Returns the port number of a special scheme. + * @see https://url.spec.whatwg.org/#special-scheme + */ + constexpr uint16_t get_special_port(ada::scheme::type type) noexcept; + /** + * Returns the scheme of an input, or NOT_SPECIAL if it's not a special scheme defined by the spec. + */ + constexpr ada::scheme::type get_scheme_type(std::string_view scheme) noexcept; + +} // namespace ada::serializers + +#endif // ADA_SCHEME_H +/* end file include/ada/scheme.h */ +// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/include, filename=ada/serializers.h +/* begin file include/ada/serializers.h */ +/** + * @file serializers.h + * @brief Definitions for the URL serializers. + */ +#ifndef ADA_SERIALIZERS_H +#define ADA_SERIALIZERS_H + + +#include +#include +#include + +/** + * @namespace ada::serializers + * @brief Includes the definitions for URL serializers + */ +namespace ada::serializers { + + /** + * Finds and returns the longest sequence of 0 values in a ipv6 input. + */ + void find_longest_sequence_of_ipv6_pieces(const std::array& address, size_t& compress, size_t& compress_length) noexcept; + + /** + * Serializes an ipv6 address. + * @details An IPv6 address is a 128-bit unsigned integer that identifies a network address. + * @see https://url.spec.whatwg.org/#concept-ipv6-serializer + */ + std::string ipv6(const std::array& address) noexcept; + + /** + * Serializes an ipv4 address. + * @details An IPv4 address is a 32-bit unsigned integer that identifies a network address. + * @see https://url.spec.whatwg.org/#concept-ipv4-serializer + */ + std::string ipv4(const uint64_t address) noexcept; + +} // namespace ada::serializers + +#endif // ADA_SERIALIZERS_H +/* end file include/ada/serializers.h */ +// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/include, filename=ada/unicode.h +/* begin file include/ada/unicode.h */ +/** + * @file unicode.h + * @brief Definitions for all unicode specific functions. + */ +#ifndef ADA_UNICODE_H +#define ADA_UNICODE_H + +#include +#include + +/** + * @namespace ada::unicode + * @brief Includes the definitions for unicode operations + */ +namespace ada::unicode { + + /** + * We receive a UTF-8 string representing a domain name. + * If the string is percent encoded, we apply percent decoding. + * + * Given a domain, we need to identify its labels. + * They are separated by label-separators: + * + * U+002E ( . ) FULL STOP + * U+FF0E ( . ) FULLWIDTH FULL STOP + * U+3002 ( 。 ) IDEOGRAPHIC FULL STOP + * U+FF61 ( 。 ) HALFWIDTH IDEOGRAPHIC FULL STOP + * + * They are all mapped to U+002E. + * + * We process each label into a string that should not exceed 63 octets. + * If the string is already punycode (starts with "xn--"), then we must + * scan it to look for unallowed code points. + * Otherwise, if the string is not pure ASCII, we need to transcode it + * to punycode by following RFC 3454 which requires us to + * - Map characters (see section 3), + * - Normalize (see section 4), + * - Reject forbidden characters, + * - Check for right-to-left characters and if so, check all requirements (see section 6), + * - Optionally reject based on unassigned code points (section 7). + * + * The Unicode standard provides a table of code points with a mapping, a list of + * forbidden code points and so forth. This table is subject to change and will + * vary based on the implementation. For Unicode 15, the table is at + * https://www.unicode.org/Public/idna/15.0.0/IdnaMappingTable.txt + * If you use ICU, they parse this table and map it to code using a Python script. + * + * The resulting strings should not exceed 255 octets according to RFC 1035 section 2.3.4. + * ICU checks for label size and domain size, but if we pass "be_strict = false", these + * errors are ignored. + * + * @see https://url.spec.whatwg.org/#concept-domain-to-ascii + * + */ + bool to_ascii(std::optional& out, std::string_view plain, bool be_strict, size_t first_percent); + + /** + * Checks if the input has tab or newline characters. + * + * @attention The has_tabs_or_newline function is a bottleneck and it is simple enough that compilers + * like GCC can 'autovectorize it'. + */ + ada_really_inline constexpr bool has_tabs_or_newline(std::string_view user_input) noexcept; + + /** + * Checks if the input is a forbidden host code point. + * @see https://url.spec.whatwg.org/#forbidden-host-code-point + */ + ada_really_inline constexpr bool is_forbidden_host_code_point(const char c) noexcept; + + + /** + * Checks if the input is a forbidden domain code point. + * @see https://url.spec.whatwg.org/#forbidden-domain-code-point + */ + ada_really_inline constexpr bool contains_forbidden_domain_code_point(char * input, size_t length) noexcept; + + /** + * Checks if the input is a forbidden doamin code point. + * @see https://url.spec.whatwg.org/#forbidden-domain-code-point + */ + ada_really_inline constexpr bool is_forbidden_domain_code_point(const char c) noexcept; + + /** + * Checks if the input is alphanumeric, '+', '-' or '.' + */ + ada_really_inline constexpr bool is_alnum_plus(const char c) noexcept; + + /** + * @details An ASCII hex digit is an ASCII upper hex digit or ASCII lower hex digit. + * An ASCII upper hex digit is an ASCII digit or a code point in the range U+0041 (A) to U+0046 (F), inclusive. + * An ASCII lower hex digit is an ASCII digit or a code point in the range U+0061 (a) to U+0066 (f), inclusive. + */ + ada_really_inline constexpr bool is_ascii_hex_digit(const char c) noexcept; + + /** + * Checks if the input is a C0 control or space character. + * + * @details A C0 control or space is a C0 control or U+0020 SPACE. + * A C0 control is a code point in the range U+0000 NULL to U+001F INFORMATION SEPARATOR ONE, inclusive. + */ + ada_really_inline constexpr bool is_c0_control_or_space(const char c) noexcept; + + /** + * Checks if the input is a ASCII tab or newline character. + * + * @details An ASCII tab or newline is U+0009 TAB, U+000A LF, or U+000D CR. + */ + ada_really_inline constexpr bool is_ascii_tab_or_newline(const char c) noexcept; + + /** + * @details A double-dot path segment must be ".." or an ASCII case-insensitive match for ".%2e", "%2e.", or "%2e%2e". + */ + ada_really_inline ada_constexpr bool is_double_dot_path_segment(const std::string_view input) noexcept; + + /** + * @details A single-dot path segment must be "." or an ASCII case-insensitive match for "%2e". + */ + ada_really_inline constexpr bool is_single_dot_path_segment(const std::string_view input) noexcept; + + /** + * @details ipv4 character might contain 0-9 or a-f character ranges. + */ + ada_really_inline constexpr bool is_lowercase_hex(const char c) noexcept; + + /** + * @details Convert hex to binary. + */ + unsigned constexpr convert_hex_to_binary(char c) noexcept; + + /** + * first_percent should be = input.find('%') + * + * @todo It would be faster as noexcept maybe, but it could be unsafe since. + * @author Node.js + * @see https://github.com/nodejs/node/blob/main/src/node_url.cc#L245 + * @see https://encoding.spec.whatwg.org/#utf-8-decode-without-bom + */ + std::string percent_decode(const std::string_view input, size_t first_percent); + + /** + * Returns a percent-encoding string whether percent encoding was needed or not. + * @see https://github.com/nodejs/node/blob/main/src/node_url.cc#L226 + */ + std::string percent_encode(const std::string_view input, const uint8_t character_set[]); + + /** + * Returns true if percent encoding was needed, in which case, we store + * the percent-encoded content in 'out'. Otherwise, out is left unchanged. + * @see https://github.com/nodejs/node/blob/main/src/node_url.cc#L226 + */ + bool percent_encode(const std::string_view input, const uint8_t character_set[], std::string& out); + + /** + * Lowers the string in-place, assuming that the content is ASCII. + * Return true if the content was ASCII. + */ + constexpr bool to_lower_ascii(char * input, size_t length) noexcept; +} // namespace ada::unicode + +#endif // ADA_UNICODE_H +/* end file include/ada/unicode.h */ + +#include +#include +#include +#include +#include +#include + +namespace ada { + /** + * @brief Generic URL struct. + * + * @details To disambiguate from a valid URL string it can also be referred to as a URL record. + * A URL is a struct that represents a universal identifier. + * @see https://url.spec.whatwg.org/#url-representation + */ + struct url { + url() = default; + url(const url &u) = default; + url(url &&u) noexcept = default; + url &operator=(url &&u) noexcept = default; + url &operator=(const url &u) = default; + ADA_ATTRIBUTE_NOINLINE ~url() = default; + + /** + * @private + * A URL’s username is an ASCII string identifying a username. It is initially the empty string. + */ + std::string username{}; + + /** + * @private + * A URL’s password is an ASCII string identifying a password. It is initially the empty string. + */ + std::string password{}; + + /** + * @private + * A URL’s host is null or a host. It is initially null. + */ + std::optional host{}; + + /** + * @private + * A URL’s port is either null or a 16-bit unsigned integer that identifies a networking port. It is initially null. + */ + std::optional port{}; + + /** + * @private + * A URL’s path is either an ASCII string or a list of zero or more ASCII strings, usually identifying a location. + */ + std::string path{}; + + /** + * @private + * A URL’s query is either null or an ASCII string. It is initially null. + */ + std::optional query{}; + + /** + * @private + * A URL’s fragment is either null or an ASCII string that can be used for further processing on the resource + * the URL’s other components identify. It is initially null. + */ + std::optional fragment{}; + + /** + * @see https://url.spec.whatwg.org/#dom-url-href + * @see https://url.spec.whatwg.org/#concept-url-serializer + */ + [[nodiscard]] std::string get_href() const noexcept; + + /** + * The origin getter steps are to return the serialization of this’s URL’s origin. [HTML] + * @see https://url.spec.whatwg.org/#concept-url-origin + */ + [[nodiscard]] std::string get_origin() const noexcept; + + /** + * The protocol getter steps are to return this’s URL’s scheme, followed by U+003A (:). + * @see https://url.spec.whatwg.org/#dom-url-protocol + */ + [[nodiscard]] std::string get_protocol() const noexcept; + + /** + * Return url’s host, serialized, followed by U+003A (:) and url’s port, serialized. + * @see https://url.spec.whatwg.org/#dom-url-host + */ + [[nodiscard]] std::string get_host() const noexcept; + + /** + * Return this’s URL’s host, serialized. + * @see https://url.spec.whatwg.org/#dom-url-hostname + */ + [[nodiscard]] std::string get_hostname() const noexcept; + + /** + * The pathname getter steps are to return the result of URL path serializing this’s URL. + * @see https://url.spec.whatwg.org/#dom-url-pathname + */ + [[nodiscard]] std::string get_pathname() const noexcept; + + /** + * Return U+003F (?), followed by this’s URL’s query. + * @see https://url.spec.whatwg.org/#dom-url-search + */ + [[nodiscard]] std::string get_search() const noexcept; + + /** + * The username getter steps are to return this’s URL’s username. + * @see https://url.spec.whatwg.org/#dom-url-username + */ + [[nodiscard]] std::string get_username() const noexcept; + + /** + * @return Returns true on successful operation. + * @see https://url.spec.whatwg.org/#dom-url-username + */ + bool set_username(const std::string_view input); + + /** + * @return Returns true on success. + * @see https://url.spec.whatwg.org/#dom-url-password + */ + bool set_password(const std::string_view input); + + /** + * @return Returns true on success. + * @see https://url.spec.whatwg.org/#dom-url-port + */ + bool set_port(const std::string_view input); + + /** + * This function always succeeds. + * @see https://url.spec.whatwg.org/#dom-url-hash + */ + void set_hash(const std::string_view input); + + /** + * This function always succeeds. + * @see https://url.spec.whatwg.org/#dom-url-search + */ + void set_search(const std::string_view input); + + /** + * @return Returns true on success. + * @see https://url.spec.whatwg.org/#dom-url-search + */ + bool set_pathname(const std::string_view input); + + /** + * @return Returns true on success. + * @see https://url.spec.whatwg.org/#dom-url-host + */ + bool set_host(const std::string_view input); + + /** + * @return Returns true on success. + * @see https://url.spec.whatwg.org/#dom-url-hostname + */ + bool set_hostname(const std::string_view input); + + /** + * @return Returns true on success. + * @see https://url.spec.whatwg.org/#dom-url-protocol + */ + bool set_protocol(const std::string_view input); + + /** + * @see https://url.spec.whatwg.org/#dom-url-href + */ + bool set_href(const std::string_view input); + + /** + * The password getter steps are to return this’s URL’s password. + * @see https://url.spec.whatwg.org/#dom-url-password + */ + [[nodiscard]] std::string get_password() const noexcept; + + /** + * Return this’s URL’s port, serialized. + * @see https://url.spec.whatwg.org/#dom-url-port + */ + [[nodiscard]] std::string get_port() const noexcept; + + /** + * Return U+0023 (#), followed by this’s URL’s fragment. + * @see https://url.spec.whatwg.org/#dom-url-hash + */ + [[nodiscard]] std::string get_hash() const noexcept; + + /** + * Returns true if this URL has a valid domain as per RFC 1034 and + * corresponding specifications. Among other things, it requires + * that the domain string has fewer than 255 octets. + */ + [[nodiscard]] bool has_valid_domain() const noexcept; + + /** + * Used for returning the validity from the result of the URL parser. + */ + bool is_valid{true}; + + /** + * A URL has an opaque path if its path is a string. + */ + bool has_opaque_path{false}; + + /** + * A URL includes credentials if its username or password is not the empty string. + */ + [[nodiscard]] ada_really_inline bool includes_credentials() const noexcept; + + /** + * A URL is special if its scheme is a special scheme. A URL is not special if its scheme is not a special scheme. + */ + [[nodiscard]] ada_really_inline bool is_special() const noexcept; + + /** + * @private + * + * Return the 'special port' if the URL is special and not 'file'. + * Returns 0 otherwise. + */ + [[nodiscard]] inline uint16_t get_special_port() const; + + /** + * @private + * + * Return the scheme type. Note that it is faster to do + * get_scheme_type() == ada::scheme::type::FILE than to do + * get_scheme() == "file", since the former is a direct integer comparison, + * while the other involves a (cheap) string test. + */ + [[nodiscard]] ada_really_inline ada::scheme::type get_scheme_type() const noexcept; + + /** + * @private + * + * Get the default port if the url's scheme has one, returns 0 otherwise. + */ + [[nodiscard]] ada_really_inline uint16_t scheme_default_port() const noexcept; + /** + * @private + * + * A URL cannot have a username/password/port if its host is null or the empty string, or its scheme is "file". + */ + [[nodiscard]] inline bool cannot_have_credentials_or_port() const; + + /** + * @private + * + * Parse a port (16-bit decimal digit) from the provided input. + * We assume that the input does not contain spaces or tabs + * within the ASCII digits. + * It returns how many bytes were consumed when a number is successfully parsed. + * @return On failure, it returns zero. + * @see https://url.spec.whatwg.org/#host-parsing + */ + ada_really_inline size_t parse_port(std::string_view view, bool check_trailing_content = false) noexcept; + + /** + * @private + * + * Return a string representing the scheme. Note that get_scheme_type() should often be used instead. + * @see https://url.spec.whatwg.org/#dom-url-protocol + */ + [[nodiscard]] inline std::string_view get_scheme() const noexcept; + /** + * Set the scheme for this URL. The provided scheme should be a valid + * scheme string, be lower-cased, not contain spaces or tabs. It should + * have no spurious trailing or leading content. + */ + inline void set_scheme(std::string&& new_scheme) noexcept; + + /** + * @private + * + * Take the scheme from another URL. The scheme string is moved from the + * provided url. + */ + inline void copy_scheme(ada::url&& u) noexcept; + + /** + * @private + * + * Take the scheme from another URL. The scheme string is copied from the + * provided url. + */ + inline void copy_scheme(const ada::url& u); + + /** + * @private + * + * Parse the host from the provided input. We assume that + * the input does not contain spaces or tabs. Control + * characters and spaces are not trimmed (they should have + * been removed if needed). + * Return true on success. + * @see https://url.spec.whatwg.org/#host-parsing + */ + [[nodiscard]] ada_really_inline bool parse_host(std::string_view input); + + /** + * @private + * + * Parse the path from the provided input. + * Return true on success. Control characters not + * trimmed from the ends (they should have + * been removed if needed). + * + * The input is expected to be UTF-8. + * + * @see https://url.spec.whatwg.org/ + */ + [[nodiscard]] ada_really_inline bool parse_path(const std::string_view input); + + /** + * @private + */ + template + [[nodiscard]] ada_really_inline bool parse_scheme(const std::string_view input); + + /** + * Returns a JSON string representation of this URL. + */ + std::string to_string() const; + + private: + + /** + * @private + * + * Return true on success. + * @see https://url.spec.whatwg.org/#concept-ipv4-parser + */ + [[nodiscard]] bool parse_ipv4(std::string_view input); + + /** + * @private + * + * Return true on success. + * @see https://url.spec.whatwg.org/#concept-ipv6-parser + */ + [[nodiscard]] bool parse_ipv6(std::string_view input); + + /** + * @private + * + * Return true on success. + * @see https://url.spec.whatwg.org/#concept-opaque-host-parser + */ + [[nodiscard]] bool parse_opaque_host(std::string_view input); + + /** + * @private + */ + ada::scheme::type type{ada::scheme::type::NOT_SPECIAL}; + + /** + * @private + * + * A URL’s scheme is an ASCII string that identifies the type of URL and can be used to dispatch a + * URL for further processing after parsing. It is initially the empty string. + * We only set non_special_scheme when the scheme is non-special, otherwise we avoid constructing + * string. + * + * Special schemes are stored in ada::scheme::details::is_special_list so we typically do not need + * to store them in each url instance. + */ + std::string non_special_scheme{}; + + }; // struct url + + + inline std::ostream& operator<<(std::ostream& out, const ada::url& u); +} // namespace ada + +#endif // ADA_URL_H +/* end file include/ada/url.h */ +// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/include, filename=ada/state.h +/* begin file include/ada/state.h */ +/** + * @file state.h + * @brief Definitions for the states of the URL state machine. + */ +#ifndef ADA_STATE_H +#define ADA_STATE_H + + +#include + +namespace ada { + + /** + * @see https://url.spec.whatwg.org/#url-parsing + */ + enum class state { + AUTHORITY, + SCHEME_START, + SCHEME, + HOST, + NO_SCHEME, + FRAGMENT, + RELATIVE_SCHEME, + RELATIVE_SLASH, + FILE, + FILE_HOST, + FILE_SLASH, + PATH_OR_AUTHORITY, + SPECIAL_AUTHORITY_IGNORE_SLASHES, + SPECIAL_AUTHORITY_SLASHES, + SPECIAL_RELATIVE_OR_AUTHORITY, + QUERY, + PATH, + PATH_START, + OPAQUE_PATH, + PORT, + }; + + /** + * Stringify a URL state machine state. + */ + ada_warn_unused std::string to_string(ada::state s); + +} // ada namespace + +#endif // ADA_STATE_H +/* end file include/ada/state.h */ + +#include +#include + +/** + * @namespace ada::helpers + * @brief Includes the definitions for helper functions + */ +namespace ada::helpers { + + /** + * This function is used to prune a fragment from a url, and returning the removed string if input has fragment. + * + * @details prune_fragment seeks the first '#' and returns everything after it as a + * string_view, and modifies (in place) the input so that it points at everything + * before the '#'. If no '#' is found, the input is left unchanged and std::nullopt is returned. + * + * @attention The function is non-allocating and it does not throw. + * @returns Note that the returned string_view might be empty! + */ + ada_really_inline std::optional prune_fragment(std::string_view& input) noexcept; + + /** + * Defined by the URL specification, shorten a URLs paths. + * @see https://url.spec.whatwg.org/#shorten-a-urls-path + */ + ada_really_inline void shorten_path(std::string& path, ada::scheme::type type) noexcept; + + + /** + * @private + * + * Parse the path from the provided input and append to the existing + * (possibly empty) path. The input cannot contain tabs and spaces: it + * is the user's responsibility to check. + * + * The input is expected to be UTF-8. + * + * @return true on success. + * @see https://url.spec.whatwg.org/ + */ + ada_really_inline bool parse_prepared_path(const std::string_view input, ada::scheme::type type, std::string& path); + + /** + * Remove and mutate all ASCII tab or newline characters from an input. + */ + ada_really_inline void remove_ascii_tab_or_newline(std::string& input) noexcept; + + /** + * Return the substring from input going from index pos to the end. If pos > input.size(), + * it returns an empty string_view. This function cannot throw. + */ + ada_really_inline std::string_view substring(std::string_view input, size_t pos) noexcept; + + /** + * Returns a host's delimiter location depending on the state of the instance. + * Used by the host parser. + */ + ada_really_inline size_t get_host_delimiter_location(const ada::url& url, std::string_view& view, bool& inside_brackets) noexcept; + + /** + * Removes leading and trailing C0 control and whitespace characters from string. + */ + ada_really_inline void trim_c0_whitespace(std::string_view& input) noexcept; + +} // namespace ada::helpers + +#endif // ADA_HELPERS_H +/* end file include/ada/helpers.h */ +// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/include, filename=ada/parser.h +/* begin file include/ada/parser.h */ +/** + * @file parser.h + * @brief Definitions for the parser. + */ +#ifndef ADA_PARSER_H +#define ADA_PARSER_H + +// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/include, filename=ada/expected.h +/* begin file include/ada/expected.h */ +/** + * @file expected.h + * @brief Definitions for std::expected + * @private Excluded from docs through the doxygen file. + */ +/// +// expected - An implementation of std::expected with extensions +// Written in 2017 by Sy Brand (tartanllama@gmail.com, @TartanLlama) +// +// Documentation available at http://tl.tartanllama.xyz/ +// +// To the extent possible under law, the author(s) have dedicated all +// copyright and related and neighboring rights to this software to the +// public domain worldwide. This software is distributed without any warranty. +// +// You should have received a copy of the CC0 Public Domain Dedication +// along with this software. If not, see +// . +/// + +#ifndef TL_EXPECTED_HPP +#define TL_EXPECTED_HPP + +#define TL_EXPECTED_VERSION_MAJOR 1 +#define TL_EXPECTED_VERSION_MINOR 0 +#define TL_EXPECTED_VERSION_PATCH 1 + +#include +#include +#include +#include + +#if defined(__EXCEPTIONS) || defined(_CPPUNWIND) +#define TL_EXPECTED_EXCEPTIONS_ENABLED +#endif + +#if (defined(_MSC_VER) && _MSC_VER == 1900) +#define TL_EXPECTED_MSVC2015 +#define TL_EXPECTED_MSVC2015_CONSTEXPR +#else +#define TL_EXPECTED_MSVC2015_CONSTEXPR constexpr +#endif + +#if (defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ <= 9 && \ + !defined(__clang__)) +#define TL_EXPECTED_GCC49 +#endif + +#if (defined(__GNUC__) && __GNUC__ == 5 && __GNUC_MINOR__ <= 4 && \ + !defined(__clang__)) +#define TL_EXPECTED_GCC54 +#endif + +#if (defined(__GNUC__) && __GNUC__ == 5 && __GNUC_MINOR__ <= 5 && \ + !defined(__clang__)) +#define TL_EXPECTED_GCC55 +#endif + +#if (defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ <= 9 && \ + !defined(__clang__)) +// GCC < 5 doesn't support overloading on const&& for member functions + +#define TL_EXPECTED_NO_CONSTRR +// GCC < 5 doesn't support some standard C++11 type traits +#define TL_EXPECTED_IS_TRIVIALLY_COPY_CONSTRUCTIBLE(T) \ + std::has_trivial_copy_constructor +#define TL_EXPECTED_IS_TRIVIALLY_COPY_ASSIGNABLE(T) \ + std::has_trivial_copy_assign + +// This one will be different for GCC 5.7 if it's ever supported +#define TL_EXPECTED_IS_TRIVIALLY_DESTRUCTIBLE(T) \ + std::is_trivially_destructible + +// GCC 5 < v < 8 has a bug in is_trivially_copy_constructible which breaks +// std::vector for non-copyable types +#elif (defined(__GNUC__) && __GNUC__ < 8 && !defined(__clang__)) +#ifndef TL_GCC_LESS_8_TRIVIALLY_COPY_CONSTRUCTIBLE_MUTEX +#define TL_GCC_LESS_8_TRIVIALLY_COPY_CONSTRUCTIBLE_MUTEX +namespace tl { +namespace detail { +template +struct is_trivially_copy_constructible + : std::is_trivially_copy_constructible {}; +#ifdef _GLIBCXX_VECTOR +template +struct is_trivially_copy_constructible> : std::false_type {}; +#endif +} // namespace detail +} // namespace tl +#endif + +#define TL_EXPECTED_IS_TRIVIALLY_COPY_CONSTRUCTIBLE(T) \ + tl::detail::is_trivially_copy_constructible +#define TL_EXPECTED_IS_TRIVIALLY_COPY_ASSIGNABLE(T) \ + std::is_trivially_copy_assignable +#define TL_EXPECTED_IS_TRIVIALLY_DESTRUCTIBLE(T) \ + std::is_trivially_destructible +#else +#define TL_EXPECTED_IS_TRIVIALLY_COPY_CONSTRUCTIBLE(T) \ + std::is_trivially_copy_constructible +#define TL_EXPECTED_IS_TRIVIALLY_COPY_ASSIGNABLE(T) \ + std::is_trivially_copy_assignable +#define TL_EXPECTED_IS_TRIVIALLY_DESTRUCTIBLE(T) \ + std::is_trivially_destructible +#endif + +#if __cplusplus > 201103L +#define TL_EXPECTED_CXX14 +#endif + +#ifdef TL_EXPECTED_GCC49 +#define TL_EXPECTED_GCC49_CONSTEXPR +#else +#define TL_EXPECTED_GCC49_CONSTEXPR constexpr +#endif + +#if (__cplusplus == 201103L || defined(TL_EXPECTED_MSVC2015) || \ + defined(TL_EXPECTED_GCC49)) +#define TL_EXPECTED_11_CONSTEXPR +#else +#define TL_EXPECTED_11_CONSTEXPR constexpr +#endif + +namespace tl { +template class expected; + +#ifndef TL_MONOSTATE_INPLACE_MUTEX +#define TL_MONOSTATE_INPLACE_MUTEX +class monostate {}; + +struct in_place_t { + explicit in_place_t() = default; +}; +static constexpr in_place_t in_place{}; +#endif + +template class unexpected { +public: + static_assert(!std::is_same::value, "E must not be void"); + + unexpected() = delete; + constexpr explicit unexpected(const E &e) : m_val(e) {} + + constexpr explicit unexpected(E &&e) : m_val(std::move(e)) {} + + template ::value>::type * = nullptr> + constexpr explicit unexpected(Args &&...args) + : m_val(std::forward(args)...) {} + template < + class U, class... Args, + typename std::enable_if &, Args &&...>::value>::type * = nullptr> + constexpr explicit unexpected(std::initializer_list l, Args &&...args) + : m_val(l, std::forward(args)...) {} + + constexpr const E &value() const & { return m_val; } + TL_EXPECTED_11_CONSTEXPR E &value() & { return m_val; } + TL_EXPECTED_11_CONSTEXPR E &&value() && { return std::move(m_val); } + constexpr const E &&value() const && { return std::move(m_val); } + +private: + E m_val; +}; + +#ifdef __cpp_deduction_guides +template unexpected(E) -> unexpected; +#endif + +template +constexpr bool operator==(const unexpected &lhs, const unexpected &rhs) { + return lhs.value() == rhs.value(); +} +template +constexpr bool operator!=(const unexpected &lhs, const unexpected &rhs) { + return lhs.value() != rhs.value(); +} +template +constexpr bool operator<(const unexpected &lhs, const unexpected &rhs) { + return lhs.value() < rhs.value(); +} +template +constexpr bool operator<=(const unexpected &lhs, const unexpected &rhs) { + return lhs.value() <= rhs.value(); +} +template +constexpr bool operator>(const unexpected &lhs, const unexpected &rhs) { + return lhs.value() > rhs.value(); +} +template +constexpr bool operator>=(const unexpected &lhs, const unexpected &rhs) { + return lhs.value() >= rhs.value(); +} + +template +unexpected::type> make_unexpected(E &&e) { + return unexpected::type>(std::forward(e)); +} + +struct unexpect_t { + unexpect_t() = default; +}; +static constexpr unexpect_t unexpect{}; + +namespace detail { +template +[[noreturn]] TL_EXPECTED_11_CONSTEXPR void throw_exception(E &&e) { +#ifdef TL_EXPECTED_EXCEPTIONS_ENABLED + throw std::forward(e); +#else +#ifdef _MSC_VER + __assume(0); +#else + __builtin_unreachable(); +#endif +#endif +} + +#ifndef TL_TRAITS_MUTEX +#define TL_TRAITS_MUTEX +// C++14-style aliases for brevity +template using remove_const_t = typename std::remove_const::type; +template +using remove_reference_t = typename std::remove_reference::type; +template using decay_t = typename std::decay::type; +template +using enable_if_t = typename std::enable_if::type; +template +using conditional_t = typename std::conditional::type; + +// std::conjunction from C++17 +template struct conjunction : std::true_type {}; +template struct conjunction : B {}; +template +struct conjunction + : std::conditional, B>::type {}; + +#if defined(_LIBCPP_VERSION) && __cplusplus == 201103L +#define TL_TRAITS_LIBCXX_MEM_FN_WORKAROUND +#endif + +// In C++11 mode, there's an issue in libc++'s std::mem_fn +// which results in a hard-error when using it in a noexcept expression +// in some cases. This is a check to workaround the common failing case. +#ifdef TL_TRAITS_LIBCXX_MEM_FN_WORKAROUND +template +struct is_pointer_to_non_const_member_func : std::false_type {}; +template +struct is_pointer_to_non_const_member_func + : std::true_type {}; +template +struct is_pointer_to_non_const_member_func + : std::true_type {}; +template +struct is_pointer_to_non_const_member_func + : std::true_type {}; +template +struct is_pointer_to_non_const_member_func + : std::true_type {}; +template +struct is_pointer_to_non_const_member_func + : std::true_type {}; +template +struct is_pointer_to_non_const_member_func + : std::true_type {}; + +template struct is_const_or_const_ref : std::false_type {}; +template struct is_const_or_const_ref : std::true_type {}; +template struct is_const_or_const_ref : std::true_type {}; +#endif + +// std::invoke from C++17 +// https://stackoverflow.com/questions/38288042/c11-14-invoke-workaround +template < + typename Fn, typename... Args, +#ifdef TL_TRAITS_LIBCXX_MEM_FN_WORKAROUND + typename = enable_if_t::value && + is_const_or_const_ref::value)>, +#endif + typename = enable_if_t>::value>, int = 0> +constexpr auto invoke(Fn &&f, Args &&...args) noexcept( + noexcept(std::mem_fn(f)(std::forward(args)...))) + -> decltype(std::mem_fn(f)(std::forward(args)...)) { + return std::mem_fn(f)(std::forward(args)...); +} + +template >::value>> +constexpr auto invoke(Fn &&f, Args &&...args) noexcept( + noexcept(std::forward(f)(std::forward(args)...))) + -> decltype(std::forward(f)(std::forward(args)...)) { + return std::forward(f)(std::forward(args)...); +} + +// std::invoke_result from C++17 +template struct invoke_result_impl; + +template +struct invoke_result_impl< + F, + decltype(detail::invoke(std::declval(), std::declval()...), void()), + Us...> { + using type = + decltype(detail::invoke(std::declval(), std::declval()...)); +}; + +template +using invoke_result = invoke_result_impl; + +template +using invoke_result_t = typename invoke_result::type; + +#if defined(_MSC_VER) && _MSC_VER <= 1900 +// TODO make a version which works with MSVC 2015 +template struct is_swappable : std::true_type {}; + +template struct is_nothrow_swappable : std::true_type {}; +#else +// https://stackoverflow.com/questions/26744589/what-is-a-proper-way-to-implement-is-swappable-to-test-for-the-swappable-concept +namespace swap_adl_tests { +// if swap ADL finds this then it would call std::swap otherwise (same +// signature) +struct tag {}; + +template tag swap(T &, T &); +template tag swap(T (&a)[N], T (&b)[N]); + +// helper functions to test if an unqualified swap is possible, and if it +// becomes std::swap +template std::false_type can_swap(...) noexcept(false); +template (), std::declval()))> +std::true_type can_swap(int) noexcept(noexcept(swap(std::declval(), + std::declval()))); + +template std::false_type uses_std(...); +template +std::is_same(), std::declval())), tag> +uses_std(int); + +template +struct is_std_swap_noexcept + : std::integral_constant::value && + std::is_nothrow_move_assignable::value> {}; + +template +struct is_std_swap_noexcept : is_std_swap_noexcept {}; + +template +struct is_adl_swap_noexcept + : std::integral_constant(0))> {}; +} // namespace swap_adl_tests + +template +struct is_swappable + : std::integral_constant< + bool, + decltype(detail::swap_adl_tests::can_swap(0))::value && + (!decltype(detail::swap_adl_tests::uses_std(0))::value || + (std::is_move_assignable::value && + std::is_move_constructible::value))> {}; + +template +struct is_swappable + : std::integral_constant< + bool, + decltype(detail::swap_adl_tests::can_swap(0))::value && + (!decltype(detail::swap_adl_tests::uses_std( + 0))::value || + is_swappable::value)> {}; + +template +struct is_nothrow_swappable + : std::integral_constant< + bool, + is_swappable::value && + ((decltype(detail::swap_adl_tests::uses_std(0))::value && + detail::swap_adl_tests::is_std_swap_noexcept::value) || + (!decltype(detail::swap_adl_tests::uses_std(0))::value && + detail::swap_adl_tests::is_adl_swap_noexcept::value))> {}; +#endif +#endif + +// Trait for checking if a type is a tl::expected +template struct is_expected_impl : std::false_type {}; +template +struct is_expected_impl> : std::true_type {}; +template using is_expected = is_expected_impl>; + +template +using expected_enable_forward_value = detail::enable_if_t< + std::is_constructible::value && + !std::is_same, in_place_t>::value && + !std::is_same, detail::decay_t>::value && + !std::is_same, detail::decay_t>::value>; + +template +using expected_enable_from_other = detail::enable_if_t< + std::is_constructible::value && + std::is_constructible::value && + !std::is_constructible &>::value && + !std::is_constructible &&>::value && + !std::is_constructible &>::value && + !std::is_constructible &&>::value && + !std::is_convertible &, T>::value && + !std::is_convertible &&, T>::value && + !std::is_convertible &, T>::value && + !std::is_convertible &&, T>::value>; + +template +using is_void_or = conditional_t::value, std::true_type, U>; + +template +using is_copy_constructible_or_void = + is_void_or>; + +template +using is_move_constructible_or_void = + is_void_or>; + +template +using is_copy_assignable_or_void = is_void_or>; + +template +using is_move_assignable_or_void = is_void_or>; + +} // namespace detail + +namespace detail { +struct no_init_t {}; +static constexpr no_init_t no_init{}; + +// Implements the storage of the values, and ensures that the destructor is +// trivial if it can be. +// +// This specialization is for where neither `T` or `E` is trivially +// destructible, so the destructors must be called on destruction of the +// `expected` +template ::value, + bool = std::is_trivially_destructible::value> +struct expected_storage_base { + constexpr expected_storage_base() : m_val(T{}), m_has_val(true) {} + constexpr expected_storage_base(no_init_t) : m_no_init(), m_has_val(false) {} + + template ::value> * = + nullptr> + constexpr expected_storage_base(in_place_t, Args &&...args) + : m_val(std::forward(args)...), m_has_val(true) {} + + template &, Args &&...>::value> * = nullptr> + constexpr expected_storage_base(in_place_t, std::initializer_list il, + Args &&...args) + : m_val(il, std::forward(args)...), m_has_val(true) {} + template ::value> * = + nullptr> + constexpr explicit expected_storage_base(unexpect_t, Args &&...args) + : m_unexpect(std::forward(args)...), m_has_val(false) {} + + template &, Args &&...>::value> * = nullptr> + constexpr explicit expected_storage_base(unexpect_t, + std::initializer_list il, + Args &&...args) + : m_unexpect(il, std::forward(args)...), m_has_val(false) {} + + ~expected_storage_base() { + if (m_has_val) { + m_val.~T(); + } else { + m_unexpect.~unexpected(); + } + } + union { + T m_val; + unexpected m_unexpect; + char m_no_init; + }; + bool m_has_val; +}; + +// This specialization is for when both `T` and `E` are trivially-destructible, +// so the destructor of the `expected` can be trivial. +template struct expected_storage_base { + constexpr expected_storage_base() : m_val(T{}), m_has_val(true) {} + constexpr expected_storage_base(no_init_t) : m_no_init(), m_has_val(false) {} + + template ::value> * = + nullptr> + constexpr expected_storage_base(in_place_t, Args &&...args) + : m_val(std::forward(args)...), m_has_val(true) {} + + template &, Args &&...>::value> * = nullptr> + constexpr expected_storage_base(in_place_t, std::initializer_list il, + Args &&...args) + : m_val(il, std::forward(args)...), m_has_val(true) {} + template ::value> * = + nullptr> + constexpr explicit expected_storage_base(unexpect_t, Args &&...args) + : m_unexpect(std::forward(args)...), m_has_val(false) {} + + template &, Args &&...>::value> * = nullptr> + constexpr explicit expected_storage_base(unexpect_t, + std::initializer_list il, + Args &&...args) + : m_unexpect(il, std::forward(args)...), m_has_val(false) {} + + ~expected_storage_base() = default; + union { + T m_val; + unexpected m_unexpect; + char m_no_init; + }; + bool m_has_val; +}; + +// T is trivial, E is not. +template struct expected_storage_base { + constexpr expected_storage_base() : m_val(T{}), m_has_val(true) {} + TL_EXPECTED_MSVC2015_CONSTEXPR expected_storage_base(no_init_t) + : m_no_init(), m_has_val(false) {} + + template ::value> * = + nullptr> + constexpr expected_storage_base(in_place_t, Args &&...args) + : m_val(std::forward(args)...), m_has_val(true) {} + + template &, Args &&...>::value> * = nullptr> + constexpr expected_storage_base(in_place_t, std::initializer_list il, + Args &&...args) + : m_val(il, std::forward(args)...), m_has_val(true) {} + template ::value> * = + nullptr> + constexpr explicit expected_storage_base(unexpect_t, Args &&...args) + : m_unexpect(std::forward(args)...), m_has_val(false) {} + + template &, Args &&...>::value> * = nullptr> + constexpr explicit expected_storage_base(unexpect_t, + std::initializer_list il, + Args &&...args) + : m_unexpect(il, std::forward(args)...), m_has_val(false) {} + + ~expected_storage_base() { + if (!m_has_val) { + m_unexpect.~unexpected(); + } + } + + union { + T m_val; + unexpected m_unexpect; + char m_no_init; + }; + bool m_has_val; +}; + +// E is trivial, T is not. +template struct expected_storage_base { + constexpr expected_storage_base() : m_val(T{}), m_has_val(true) {} + constexpr expected_storage_base(no_init_t) : m_no_init(), m_has_val(false) {} + + template ::value> * = + nullptr> + constexpr expected_storage_base(in_place_t, Args &&...args) + : m_val(std::forward(args)...), m_has_val(true) {} + + template &, Args &&...>::value> * = nullptr> + constexpr expected_storage_base(in_place_t, std::initializer_list il, + Args &&...args) + : m_val(il, std::forward(args)...), m_has_val(true) {} + template ::value> * = + nullptr> + constexpr explicit expected_storage_base(unexpect_t, Args &&...args) + : m_unexpect(std::forward(args)...), m_has_val(false) {} + + template &, Args &&...>::value> * = nullptr> + constexpr explicit expected_storage_base(unexpect_t, + std::initializer_list il, + Args &&...args) + : m_unexpect(il, std::forward(args)...), m_has_val(false) {} + + ~expected_storage_base() { + if (m_has_val) { + m_val.~T(); + } + } + union { + T m_val; + unexpected m_unexpect; + char m_no_init; + }; + bool m_has_val; +}; + +// `T` is `void`, `E` is trivially-destructible +template struct expected_storage_base { + #if __GNUC__ <= 5 + //no constexpr for GCC 4/5 bug + #else + TL_EXPECTED_MSVC2015_CONSTEXPR + #endif + expected_storage_base() : m_has_val(true) {} + + constexpr expected_storage_base(no_init_t) : m_val(), m_has_val(false) {} + + constexpr expected_storage_base(in_place_t) : m_has_val(true) {} + + template ::value> * = + nullptr> + constexpr explicit expected_storage_base(unexpect_t, Args &&...args) + : m_unexpect(std::forward(args)...), m_has_val(false) {} + + template &, Args &&...>::value> * = nullptr> + constexpr explicit expected_storage_base(unexpect_t, + std::initializer_list il, + Args &&...args) + : m_unexpect(il, std::forward(args)...), m_has_val(false) {} + + ~expected_storage_base() = default; + struct dummy {}; + union { + unexpected m_unexpect; + dummy m_val; + }; + bool m_has_val; +}; + +// `T` is `void`, `E` is not trivially-destructible +template struct expected_storage_base { + constexpr expected_storage_base() : m_dummy(), m_has_val(true) {} + constexpr expected_storage_base(no_init_t) : m_dummy(), m_has_val(false) {} + + constexpr expected_storage_base(in_place_t) : m_dummy(), m_has_val(true) {} + + template ::value> * = + nullptr> + constexpr explicit expected_storage_base(unexpect_t, Args &&...args) + : m_unexpect(std::forward(args)...), m_has_val(false) {} + + template &, Args &&...>::value> * = nullptr> + constexpr explicit expected_storage_base(unexpect_t, + std::initializer_list il, + Args &&...args) + : m_unexpect(il, std::forward(args)...), m_has_val(false) {} + + ~expected_storage_base() { + if (!m_has_val) { + m_unexpect.~unexpected(); + } + } + + union { + unexpected m_unexpect; + char m_dummy; + }; + bool m_has_val; +}; + +// This base class provides some handy member functions which can be used in +// further derived classes +template +struct expected_operations_base : expected_storage_base { + using expected_storage_base::expected_storage_base; + + template void construct(Args &&...args) noexcept { + new (std::addressof(this->m_val)) T(std::forward(args)...); + this->m_has_val = true; + } + + template void construct_with(Rhs &&rhs) noexcept { + new (std::addressof(this->m_val)) T(std::forward(rhs).get()); + this->m_has_val = true; + } + + template void construct_error(Args &&...args) noexcept { + new (std::addressof(this->m_unexpect)) + unexpected(std::forward(args)...); + this->m_has_val = false; + } + +#ifdef TL_EXPECTED_EXCEPTIONS_ENABLED + + // These assign overloads ensure that the most efficient assignment + // implementation is used while maintaining the strong exception guarantee. + // The problematic case is where rhs has a value, but *this does not. + // + // This overload handles the case where we can just copy-construct `T` + // directly into place without throwing. + template ::value> + * = nullptr> + void assign(const expected_operations_base &rhs) noexcept { + if (!this->m_has_val && rhs.m_has_val) { + geterr().~unexpected(); + construct(rhs.get()); + } else { + assign_common(rhs); + } + } + + // This overload handles the case where we can attempt to create a copy of + // `T`, then no-throw move it into place if the copy was successful. + template ::value && + std::is_nothrow_move_constructible::value> + * = nullptr> + void assign(const expected_operations_base &rhs) noexcept { + if (!this->m_has_val && rhs.m_has_val) { + T tmp = rhs.get(); + geterr().~unexpected(); + construct(std::move(tmp)); + } else { + assign_common(rhs); + } + } + + // This overload is the worst-case, where we have to move-construct the + // unexpected value into temporary storage, then try to copy the T into place. + // If the construction succeeds, then everything is fine, but if it throws, + // then we move the old unexpected value back into place before rethrowing the + // exception. + template ::value && + !std::is_nothrow_move_constructible::value> + * = nullptr> + void assign(const expected_operations_base &rhs) { + if (!this->m_has_val && rhs.m_has_val) { + auto tmp = std::move(geterr()); + geterr().~unexpected(); + +#ifdef TL_EXPECTED_EXCEPTIONS_ENABLED + try { + construct(rhs.get()); + } catch (...) { + geterr() = std::move(tmp); + throw; + } +#else + construct(rhs.get()); +#endif + } else { + assign_common(rhs); + } + } + + // These overloads do the same as above, but for rvalues + template ::value> + * = nullptr> + void assign(expected_operations_base &&rhs) noexcept { + if (!this->m_has_val && rhs.m_has_val) { + geterr().~unexpected(); + construct(std::move(rhs).get()); + } else { + assign_common(std::move(rhs)); + } + } + + template ::value> + * = nullptr> + void assign(expected_operations_base &&rhs) { + if (!this->m_has_val && rhs.m_has_val) { + auto tmp = std::move(geterr()); + geterr().~unexpected(); +#ifdef TL_EXPECTED_EXCEPTIONS_ENABLED + try { + construct(std::move(rhs).get()); + } catch (...) { + geterr() = std::move(tmp); + throw; + } +#else + construct(std::move(rhs).get()); +#endif + } else { + assign_common(std::move(rhs)); + } + } + +#else + + // If exceptions are disabled then we can just copy-construct + void assign(const expected_operations_base &rhs) noexcept { + if (!this->m_has_val && rhs.m_has_val) { + geterr().~unexpected(); + construct(rhs.get()); + } else { + assign_common(rhs); + } + } + + void assign(expected_operations_base &&rhs) noexcept { + if (!this->m_has_val && rhs.m_has_val) { + geterr().~unexpected(); + construct(std::move(rhs).get()); + } else { + assign_common(rhs); + } + } + +#endif + + // The common part of move/copy assigning + template void assign_common(Rhs &&rhs) { + if (this->m_has_val) { + if (rhs.m_has_val) { + get() = std::forward(rhs).get(); + } else { + destroy_val(); + construct_error(std::forward(rhs).geterr()); + } + } else { + if (!rhs.m_has_val) { + geterr() = std::forward(rhs).geterr(); + } + } + } + + bool has_value() const { return this->m_has_val; } + + TL_EXPECTED_11_CONSTEXPR T &get() & { return this->m_val; } + constexpr const T &get() const & { return this->m_val; } + TL_EXPECTED_11_CONSTEXPR T &&get() && { return std::move(this->m_val); } +#ifndef TL_EXPECTED_NO_CONSTRR + constexpr const T &&get() const && { return std::move(this->m_val); } +#endif + + TL_EXPECTED_11_CONSTEXPR unexpected &geterr() & { + return this->m_unexpect; + } + constexpr const unexpected &geterr() const & { return this->m_unexpect; } + TL_EXPECTED_11_CONSTEXPR unexpected &&geterr() && { + return std::move(this->m_unexpect); + } +#ifndef TL_EXPECTED_NO_CONSTRR + constexpr const unexpected &&geterr() const && { + return std::move(this->m_unexpect); + } +#endif + + TL_EXPECTED_11_CONSTEXPR void destroy_val() { get().~T(); } +}; + +// This base class provides some handy member functions which can be used in +// further derived classes +template +struct expected_operations_base : expected_storage_base { + using expected_storage_base::expected_storage_base; + + template void construct() noexcept { this->m_has_val = true; } + + // This function doesn't use its argument, but needs it so that code in + // levels above this can work independently of whether T is void + template void construct_with(Rhs &&) noexcept { + this->m_has_val = true; + } + + template void construct_error(Args &&...args) noexcept { + new (std::addressof(this->m_unexpect)) + unexpected(std::forward(args)...); + this->m_has_val = false; + } + + template void assign(Rhs &&rhs) noexcept { + if (!this->m_has_val) { + if (rhs.m_has_val) { + geterr().~unexpected(); + construct(); + } else { + geterr() = std::forward(rhs).geterr(); + } + } else { + if (!rhs.m_has_val) { + construct_error(std::forward(rhs).geterr()); + } + } + } + + bool has_value() const { return this->m_has_val; } + + TL_EXPECTED_11_CONSTEXPR unexpected &geterr() & { + return this->m_unexpect; + } + constexpr const unexpected &geterr() const & { return this->m_unexpect; } + TL_EXPECTED_11_CONSTEXPR unexpected &&geterr() && { + return std::move(this->m_unexpect); + } +#ifndef TL_EXPECTED_NO_CONSTRR + constexpr const unexpected &&geterr() const && { + return std::move(this->m_unexpect); + } +#endif + + TL_EXPECTED_11_CONSTEXPR void destroy_val() { + // no-op + } +}; + +// This class manages conditionally having a trivial copy constructor +// This specialization is for when T and E are trivially copy constructible +template :: + value &&TL_EXPECTED_IS_TRIVIALLY_COPY_CONSTRUCTIBLE(E)::value> +struct expected_copy_base : expected_operations_base { + using expected_operations_base::expected_operations_base; +}; + +// This specialization is for when T or E are not trivially copy constructible +template +struct expected_copy_base : expected_operations_base { + using expected_operations_base::expected_operations_base; + + expected_copy_base() = default; + expected_copy_base(const expected_copy_base &rhs) + : expected_operations_base(no_init) { + if (rhs.has_value()) { + this->construct_with(rhs); + } else { + this->construct_error(rhs.geterr()); + } + } + + expected_copy_base(expected_copy_base &&rhs) = default; + expected_copy_base &operator=(const expected_copy_base &rhs) = default; + expected_copy_base &operator=(expected_copy_base &&rhs) = default; +}; + +// This class manages conditionally having a trivial move constructor +// Unfortunately there's no way to achieve this in GCC < 5 AFAIK, since it +// doesn't implement an analogue to std::is_trivially_move_constructible. We +// have to make do with a non-trivial move constructor even if T is trivially +// move constructible +#ifndef TL_EXPECTED_GCC49 +template >::value + &&std::is_trivially_move_constructible::value> +struct expected_move_base : expected_copy_base { + using expected_copy_base::expected_copy_base; +}; +#else +template struct expected_move_base; +#endif +template +struct expected_move_base : expected_copy_base { + using expected_copy_base::expected_copy_base; + + expected_move_base() = default; + expected_move_base(const expected_move_base &rhs) = default; + + expected_move_base(expected_move_base &&rhs) noexcept( + std::is_nothrow_move_constructible::value) + : expected_copy_base(no_init) { + if (rhs.has_value()) { + this->construct_with(std::move(rhs)); + } else { + this->construct_error(std::move(rhs.geterr())); + } + } + expected_move_base &operator=(const expected_move_base &rhs) = default; + expected_move_base &operator=(expected_move_base &&rhs) = default; +}; + +// This class manages conditionally having a trivial copy assignment operator +template >::value + &&TL_EXPECTED_IS_TRIVIALLY_COPY_ASSIGNABLE(E)::value + &&TL_EXPECTED_IS_TRIVIALLY_COPY_CONSTRUCTIBLE(E)::value + &&TL_EXPECTED_IS_TRIVIALLY_DESTRUCTIBLE(E)::value> +struct expected_copy_assign_base : expected_move_base { + using expected_move_base::expected_move_base; +}; + +template +struct expected_copy_assign_base : expected_move_base { + using expected_move_base::expected_move_base; + + expected_copy_assign_base() = default; + expected_copy_assign_base(const expected_copy_assign_base &rhs) = default; + + expected_copy_assign_base(expected_copy_assign_base &&rhs) = default; + expected_copy_assign_base &operator=(const expected_copy_assign_base &rhs) { + this->assign(rhs); + return *this; + } + expected_copy_assign_base & + operator=(expected_copy_assign_base &&rhs) = default; +}; + +// This class manages conditionally having a trivial move assignment operator +// Unfortunately there's no way to achieve this in GCC < 5 AFAIK, since it +// doesn't implement an analogue to std::is_trivially_move_assignable. We have +// to make do with a non-trivial move assignment operator even if T is trivially +// move assignable +#ifndef TL_EXPECTED_GCC49 +template , + std::is_trivially_move_constructible, + std::is_trivially_move_assignable>>:: + value &&std::is_trivially_destructible::value + &&std::is_trivially_move_constructible::value + &&std::is_trivially_move_assignable::value> +struct expected_move_assign_base : expected_copy_assign_base { + using expected_copy_assign_base::expected_copy_assign_base; +}; +#else +template struct expected_move_assign_base; +#endif + +template +struct expected_move_assign_base + : expected_copy_assign_base { + using expected_copy_assign_base::expected_copy_assign_base; + + expected_move_assign_base() = default; + expected_move_assign_base(const expected_move_assign_base &rhs) = default; + + expected_move_assign_base(expected_move_assign_base &&rhs) = default; + + expected_move_assign_base & + operator=(const expected_move_assign_base &rhs) = default; + + expected_move_assign_base & + operator=(expected_move_assign_base &&rhs) noexcept( + std::is_nothrow_move_constructible::value + &&std::is_nothrow_move_assignable::value) { + this->assign(std::move(rhs)); + return *this; + } +}; + +// expected_delete_ctor_base will conditionally delete copy and move +// constructors depending on whether T is copy/move constructible +template ::value && + std::is_copy_constructible::value), + bool EnableMove = (is_move_constructible_or_void::value && + std::is_move_constructible::value)> +struct expected_delete_ctor_base { + expected_delete_ctor_base() = default; + expected_delete_ctor_base(const expected_delete_ctor_base &) = default; + expected_delete_ctor_base(expected_delete_ctor_base &&) noexcept = default; + expected_delete_ctor_base & + operator=(const expected_delete_ctor_base &) = default; + expected_delete_ctor_base & + operator=(expected_delete_ctor_base &&) noexcept = default; +}; + +template +struct expected_delete_ctor_base { + expected_delete_ctor_base() = default; + expected_delete_ctor_base(const expected_delete_ctor_base &) = default; + expected_delete_ctor_base(expected_delete_ctor_base &&) noexcept = delete; + expected_delete_ctor_base & + operator=(const expected_delete_ctor_base &) = default; + expected_delete_ctor_base & + operator=(expected_delete_ctor_base &&) noexcept = default; +}; + +template +struct expected_delete_ctor_base { + expected_delete_ctor_base() = default; + expected_delete_ctor_base(const expected_delete_ctor_base &) = delete; + expected_delete_ctor_base(expected_delete_ctor_base &&) noexcept = default; + expected_delete_ctor_base & + operator=(const expected_delete_ctor_base &) = default; + expected_delete_ctor_base & + operator=(expected_delete_ctor_base &&) noexcept = default; +}; + +template +struct expected_delete_ctor_base { + expected_delete_ctor_base() = default; + expected_delete_ctor_base(const expected_delete_ctor_base &) = delete; + expected_delete_ctor_base(expected_delete_ctor_base &&) noexcept = delete; + expected_delete_ctor_base & + operator=(const expected_delete_ctor_base &) = default; + expected_delete_ctor_base & + operator=(expected_delete_ctor_base &&) noexcept = default; +}; + +// expected_delete_assign_base will conditionally delete copy and move +// constructors depending on whether T and E are copy/move constructible + +// assignable +template ::value && + std::is_copy_constructible::value && + is_copy_assignable_or_void::value && + std::is_copy_assignable::value), + bool EnableMove = (is_move_constructible_or_void::value && + std::is_move_constructible::value && + is_move_assignable_or_void::value && + std::is_move_assignable::value)> +struct expected_delete_assign_base { + expected_delete_assign_base() = default; + expected_delete_assign_base(const expected_delete_assign_base &) = default; + expected_delete_assign_base(expected_delete_assign_base &&) noexcept = + default; + expected_delete_assign_base & + operator=(const expected_delete_assign_base &) = default; + expected_delete_assign_base & + operator=(expected_delete_assign_base &&) noexcept = default; +}; + +template +struct expected_delete_assign_base { + expected_delete_assign_base() = default; + expected_delete_assign_base(const expected_delete_assign_base &) = default; + expected_delete_assign_base(expected_delete_assign_base &&) noexcept = + default; + expected_delete_assign_base & + operator=(const expected_delete_assign_base &) = default; + expected_delete_assign_base & + operator=(expected_delete_assign_base &&) noexcept = delete; +}; + +template +struct expected_delete_assign_base { + expected_delete_assign_base() = default; + expected_delete_assign_base(const expected_delete_assign_base &) = default; + expected_delete_assign_base(expected_delete_assign_base &&) noexcept = + default; + expected_delete_assign_base & + operator=(const expected_delete_assign_base &) = delete; + expected_delete_assign_base & + operator=(expected_delete_assign_base &&) noexcept = default; +}; + +template +struct expected_delete_assign_base { + expected_delete_assign_base() = default; + expected_delete_assign_base(const expected_delete_assign_base &) = default; + expected_delete_assign_base(expected_delete_assign_base &&) noexcept = + default; + expected_delete_assign_base & + operator=(const expected_delete_assign_base &) = delete; + expected_delete_assign_base & + operator=(expected_delete_assign_base &&) noexcept = delete; +}; + +// This is needed to be able to construct the expected_default_ctor_base which +// follows, while still conditionally deleting the default constructor. +struct default_constructor_tag { + explicit constexpr default_constructor_tag() = default; +}; + +// expected_default_ctor_base will ensure that expected has a deleted default +// consturctor if T is not default constructible. +// This specialization is for when T is default constructible +template ::value || std::is_void::value> +struct expected_default_ctor_base { + constexpr expected_default_ctor_base() noexcept = default; + constexpr expected_default_ctor_base( + expected_default_ctor_base const &) noexcept = default; + constexpr expected_default_ctor_base(expected_default_ctor_base &&) noexcept = + default; + expected_default_ctor_base & + operator=(expected_default_ctor_base const &) noexcept = default; + expected_default_ctor_base & + operator=(expected_default_ctor_base &&) noexcept = default; + + constexpr explicit expected_default_ctor_base(default_constructor_tag) {} +}; + +// This specialization is for when T is not default constructible +template struct expected_default_ctor_base { + constexpr expected_default_ctor_base() noexcept = delete; + constexpr expected_default_ctor_base( + expected_default_ctor_base const &) noexcept = default; + constexpr expected_default_ctor_base(expected_default_ctor_base &&) noexcept = + default; + expected_default_ctor_base & + operator=(expected_default_ctor_base const &) noexcept = default; + expected_default_ctor_base & + operator=(expected_default_ctor_base &&) noexcept = default; + + constexpr explicit expected_default_ctor_base(default_constructor_tag) {} +}; +} // namespace detail + +template class bad_expected_access : public std::exception { +public: + explicit bad_expected_access(E e) : m_val(std::move(e)) {} + + virtual const char *what() const noexcept override { + return "Bad expected access"; + } + + const E &error() const & { return m_val; } + E &error() & { return m_val; } + const E &&error() const && { return std::move(m_val); } + E &&error() && { return std::move(m_val); } + +private: + E m_val; +}; + +/// An `expected` object is an object that contains the storage for +/// another object and manages the lifetime of this contained object `T`. +/// Alternatively it could contain the storage for another unexpected object +/// `E`. The contained object may not be initialized after the expected object +/// has been initialized, and may not be destroyed before the expected object +/// has been destroyed. The initialization state of the contained object is +/// tracked by the expected object. +template +class expected : private detail::expected_move_assign_base, + private detail::expected_delete_ctor_base, + private detail::expected_delete_assign_base, + private detail::expected_default_ctor_base { + static_assert(!std::is_reference::value, "T must not be a reference"); + static_assert(!std::is_same::type>::value, + "T must not be in_place_t"); + static_assert(!std::is_same::type>::value, + "T must not be unexpect_t"); + static_assert( + !std::is_same>::type>::value, + "T must not be unexpected"); + static_assert(!std::is_reference::value, "E must not be a reference"); + + T *valptr() { return std::addressof(this->m_val); } + const T *valptr() const { return std::addressof(this->m_val); } + unexpected *errptr() { return std::addressof(this->m_unexpect); } + const unexpected *errptr() const { + return std::addressof(this->m_unexpect); + } + + template ::value> * = nullptr> + TL_EXPECTED_11_CONSTEXPR U &val() { + return this->m_val; + } + TL_EXPECTED_11_CONSTEXPR unexpected &err() { return this->m_unexpect; } + + template ::value> * = nullptr> + constexpr const U &val() const { + return this->m_val; + } + constexpr const unexpected &err() const { return this->m_unexpect; } + + using impl_base = detail::expected_move_assign_base; + using ctor_base = detail::expected_default_ctor_base; + +public: + typedef T value_type; + typedef E error_type; + typedef unexpected unexpected_type; + +#if defined(TL_EXPECTED_CXX14) && !defined(TL_EXPECTED_GCC49) && \ + !defined(TL_EXPECTED_GCC54) && !defined(TL_EXPECTED_GCC55) + template TL_EXPECTED_11_CONSTEXPR auto and_then(F &&f) & { + return and_then_impl(*this, std::forward(f)); + } + template TL_EXPECTED_11_CONSTEXPR auto and_then(F &&f) && { + return and_then_impl(std::move(*this), std::forward(f)); + } + template constexpr auto and_then(F &&f) const & { + return and_then_impl(*this, std::forward(f)); + } + +#ifndef TL_EXPECTED_NO_CONSTRR + template constexpr auto and_then(F &&f) const && { + return and_then_impl(std::move(*this), std::forward(f)); + } +#endif + +#else + template + TL_EXPECTED_11_CONSTEXPR auto + and_then(F &&f) & -> decltype(and_then_impl(std::declval(), + std::forward(f))) { + return and_then_impl(*this, std::forward(f)); + } + template + TL_EXPECTED_11_CONSTEXPR auto + and_then(F &&f) && -> decltype(and_then_impl(std::declval(), + std::forward(f))) { + return and_then_impl(std::move(*this), std::forward(f)); + } + template + constexpr auto and_then(F &&f) const & -> decltype(and_then_impl( + std::declval(), std::forward(f))) { + return and_then_impl(*this, std::forward(f)); + } + +#ifndef TL_EXPECTED_NO_CONSTRR + template + constexpr auto and_then(F &&f) const && -> decltype(and_then_impl( + std::declval(), std::forward(f))) { + return and_then_impl(std::move(*this), std::forward(f)); + } +#endif +#endif + +#if defined(TL_EXPECTED_CXX14) && !defined(TL_EXPECTED_GCC49) && \ + !defined(TL_EXPECTED_GCC54) && !defined(TL_EXPECTED_GCC55) + template TL_EXPECTED_11_CONSTEXPR auto map(F &&f) & { + return expected_map_impl(*this, std::forward(f)); + } + template TL_EXPECTED_11_CONSTEXPR auto map(F &&f) && { + return expected_map_impl(std::move(*this), std::forward(f)); + } + template constexpr auto map(F &&f) const & { + return expected_map_impl(*this, std::forward(f)); + } + template constexpr auto map(F &&f) const && { + return expected_map_impl(std::move(*this), std::forward(f)); + } +#else + template + TL_EXPECTED_11_CONSTEXPR decltype(expected_map_impl( + std::declval(), std::declval())) + map(F &&f) & { + return expected_map_impl(*this, std::forward(f)); + } + template + TL_EXPECTED_11_CONSTEXPR decltype(expected_map_impl(std::declval(), + std::declval())) + map(F &&f) && { + return expected_map_impl(std::move(*this), std::forward(f)); + } + template + constexpr decltype(expected_map_impl(std::declval(), + std::declval())) + map(F &&f) const & { + return expected_map_impl(*this, std::forward(f)); + } + +#ifndef TL_EXPECTED_NO_CONSTRR + template + constexpr decltype(expected_map_impl(std::declval(), + std::declval())) + map(F &&f) const && { + return expected_map_impl(std::move(*this), std::forward(f)); + } +#endif +#endif + +#if defined(TL_EXPECTED_CXX14) && !defined(TL_EXPECTED_GCC49) && \ + !defined(TL_EXPECTED_GCC54) && !defined(TL_EXPECTED_GCC55) + template TL_EXPECTED_11_CONSTEXPR auto transform(F &&f) & { + return expected_map_impl(*this, std::forward(f)); + } + template TL_EXPECTED_11_CONSTEXPR auto transform(F &&f) && { + return expected_map_impl(std::move(*this), std::forward(f)); + } + template constexpr auto transform(F &&f) const & { + return expected_map_impl(*this, std::forward(f)); + } + template constexpr auto transform(F &&f) const && { + return expected_map_impl(std::move(*this), std::forward(f)); + } +#else + template + TL_EXPECTED_11_CONSTEXPR decltype(expected_map_impl( + std::declval(), std::declval())) + transform(F &&f) & { + return expected_map_impl(*this, std::forward(f)); + } + template + TL_EXPECTED_11_CONSTEXPR decltype(expected_map_impl(std::declval(), + std::declval())) + transform(F &&f) && { + return expected_map_impl(std::move(*this), std::forward(f)); + } + template + constexpr decltype(expected_map_impl(std::declval(), + std::declval())) + transform(F &&f) const & { + return expected_map_impl(*this, std::forward(f)); + } + +#ifndef TL_EXPECTED_NO_CONSTRR + template + constexpr decltype(expected_map_impl(std::declval(), + std::declval())) + transform(F &&f) const && { + return expected_map_impl(std::move(*this), std::forward(f)); + } +#endif +#endif + +#if defined(TL_EXPECTED_CXX14) && !defined(TL_EXPECTED_GCC49) && \ + !defined(TL_EXPECTED_GCC54) && !defined(TL_EXPECTED_GCC55) + template TL_EXPECTED_11_CONSTEXPR auto map_error(F &&f) & { + return map_error_impl(*this, std::forward(f)); + } + template TL_EXPECTED_11_CONSTEXPR auto map_error(F &&f) && { + return map_error_impl(std::move(*this), std::forward(f)); + } + template constexpr auto map_error(F &&f) const & { + return map_error_impl(*this, std::forward(f)); + } + template constexpr auto map_error(F &&f) const && { + return map_error_impl(std::move(*this), std::forward(f)); + } +#else + template + TL_EXPECTED_11_CONSTEXPR decltype(map_error_impl(std::declval(), + std::declval())) + map_error(F &&f) & { + return map_error_impl(*this, std::forward(f)); + } + template + TL_EXPECTED_11_CONSTEXPR decltype(map_error_impl(std::declval(), + std::declval())) + map_error(F &&f) && { + return map_error_impl(std::move(*this), std::forward(f)); + } + template + constexpr decltype(map_error_impl(std::declval(), + std::declval())) + map_error(F &&f) const & { + return map_error_impl(*this, std::forward(f)); + } + +#ifndef TL_EXPECTED_NO_CONSTRR + template + constexpr decltype(map_error_impl(std::declval(), + std::declval())) + map_error(F &&f) const && { + return map_error_impl(std::move(*this), std::forward(f)); + } +#endif +#endif + template expected TL_EXPECTED_11_CONSTEXPR or_else(F &&f) & { + return or_else_impl(*this, std::forward(f)); + } + + template expected TL_EXPECTED_11_CONSTEXPR or_else(F &&f) && { + return or_else_impl(std::move(*this), std::forward(f)); + } + + template expected constexpr or_else(F &&f) const & { + return or_else_impl(*this, std::forward(f)); + } + +#ifndef TL_EXPECTED_NO_CONSTRR + template expected constexpr or_else(F &&f) const && { + return or_else_impl(std::move(*this), std::forward(f)); + } +#endif + constexpr expected() = default; + constexpr expected(const expected &rhs) = default; + constexpr expected(expected &&rhs) = default; + expected &operator=(const expected &rhs) = default; + expected &operator=(expected &&rhs) = default; + + template ::value> * = + nullptr> + constexpr expected(in_place_t, Args &&...args) + : impl_base(in_place, std::forward(args)...), + ctor_base(detail::default_constructor_tag{}) {} + + template &, Args &&...>::value> * = nullptr> + constexpr expected(in_place_t, std::initializer_list il, Args &&...args) + : impl_base(in_place, il, std::forward(args)...), + ctor_base(detail::default_constructor_tag{}) {} + + template ::value> * = + nullptr, + detail::enable_if_t::value> * = + nullptr> + explicit constexpr expected(const unexpected &e) + : impl_base(unexpect, e.value()), + ctor_base(detail::default_constructor_tag{}) {} + + template < + class G = E, + detail::enable_if_t::value> * = + nullptr, + detail::enable_if_t::value> * = nullptr> + constexpr expected(unexpected const &e) + : impl_base(unexpect, e.value()), + ctor_base(detail::default_constructor_tag{}) {} + + template < + class G = E, + detail::enable_if_t::value> * = nullptr, + detail::enable_if_t::value> * = nullptr> + explicit constexpr expected(unexpected &&e) noexcept( + std::is_nothrow_constructible::value) + : impl_base(unexpect, std::move(e.value())), + ctor_base(detail::default_constructor_tag{}) {} + + template < + class G = E, + detail::enable_if_t::value> * = nullptr, + detail::enable_if_t::value> * = nullptr> + constexpr expected(unexpected &&e) noexcept( + std::is_nothrow_constructible::value) + : impl_base(unexpect, std::move(e.value())), + ctor_base(detail::default_constructor_tag{}) {} + + template ::value> * = + nullptr> + constexpr explicit expected(unexpect_t, Args &&...args) + : impl_base(unexpect, std::forward(args)...), + ctor_base(detail::default_constructor_tag{}) {} + + template &, Args &&...>::value> * = nullptr> + constexpr explicit expected(unexpect_t, std::initializer_list il, + Args &&...args) + : impl_base(unexpect, il, std::forward(args)...), + ctor_base(detail::default_constructor_tag{}) {} + + template ::value && + std::is_convertible::value)> * = + nullptr, + detail::expected_enable_from_other + * = nullptr> + explicit TL_EXPECTED_11_CONSTEXPR expected(const expected &rhs) + : ctor_base(detail::default_constructor_tag{}) { + if (rhs.has_value()) { + this->construct(*rhs); + } else { + this->construct_error(rhs.error()); + } + } + + template ::value && + std::is_convertible::value)> * = + nullptr, + detail::expected_enable_from_other + * = nullptr> + TL_EXPECTED_11_CONSTEXPR expected(const expected &rhs) + : ctor_base(detail::default_constructor_tag{}) { + if (rhs.has_value()) { + this->construct(*rhs); + } else { + this->construct_error(rhs.error()); + } + } + + template < + class U, class G, + detail::enable_if_t::value && + std::is_convertible::value)> * = nullptr, + detail::expected_enable_from_other * = nullptr> + explicit TL_EXPECTED_11_CONSTEXPR expected(expected &&rhs) + : ctor_base(detail::default_constructor_tag{}) { + if (rhs.has_value()) { + this->construct(std::move(*rhs)); + } else { + this->construct_error(std::move(rhs.error())); + } + } + + template < + class U, class G, + detail::enable_if_t<(std::is_convertible::value && + std::is_convertible::value)> * = nullptr, + detail::expected_enable_from_other * = nullptr> + TL_EXPECTED_11_CONSTEXPR expected(expected &&rhs) + : ctor_base(detail::default_constructor_tag{}) { + if (rhs.has_value()) { + this->construct(std::move(*rhs)); + } else { + this->construct_error(std::move(rhs.error())); + } + } + + template < + class U = T, + detail::enable_if_t::value> * = nullptr, + detail::expected_enable_forward_value * = nullptr> + explicit TL_EXPECTED_MSVC2015_CONSTEXPR expected(U &&v) + : expected(in_place, std::forward(v)) {} + + template < + class U = T, + detail::enable_if_t::value> * = nullptr, + detail::expected_enable_forward_value * = nullptr> + TL_EXPECTED_MSVC2015_CONSTEXPR expected(U &&v) + : expected(in_place, std::forward(v)) {} + + template < + class U = T, class G = T, + detail::enable_if_t::value> * = + nullptr, + detail::enable_if_t::value> * = nullptr, + detail::enable_if_t< + (!std::is_same, detail::decay_t>::value && + !detail::conjunction, + std::is_same>>::value && + std::is_constructible::value && + std::is_assignable::value && + std::is_nothrow_move_constructible::value)> * = nullptr> + expected &operator=(U &&v) { + if (has_value()) { + val() = std::forward(v); + } else { + err().~unexpected(); + ::new (valptr()) T(std::forward(v)); + this->m_has_val = true; + } + + return *this; + } + + template < + class U = T, class G = T, + detail::enable_if_t::value> * = + nullptr, + detail::enable_if_t::value> * = nullptr, + detail::enable_if_t< + (!std::is_same, detail::decay_t>::value && + !detail::conjunction, + std::is_same>>::value && + std::is_constructible::value && + std::is_assignable::value && + std::is_nothrow_move_constructible::value)> * = nullptr> + expected &operator=(U &&v) { + if (has_value()) { + val() = std::forward(v); + } else { + auto tmp = std::move(err()); + err().~unexpected(); + +#ifdef TL_EXPECTED_EXCEPTIONS_ENABLED + try { + ::new (valptr()) T(std::forward(v)); + this->m_has_val = true; + } catch (...) { + err() = std::move(tmp); + throw; + } +#else + ::new (valptr()) T(std::forward(v)); + this->m_has_val = true; +#endif + } + + return *this; + } + + template ::value && + std::is_assignable::value> * = nullptr> + expected &operator=(const unexpected &rhs) { + if (!has_value()) { + err() = rhs; + } else { + this->destroy_val(); + ::new (errptr()) unexpected(rhs); + this->m_has_val = false; + } + + return *this; + } + + template ::value && + std::is_move_assignable::value> * = nullptr> + expected &operator=(unexpected &&rhs) noexcept { + if (!has_value()) { + err() = std::move(rhs); + } else { + this->destroy_val(); + ::new (errptr()) unexpected(std::move(rhs)); + this->m_has_val = false; + } + + return *this; + } + + template ::value> * = nullptr> + void emplace(Args &&...args) { + if (has_value()) { + val().~T(); + } else { + err().~unexpected(); + this->m_has_val = true; + } + ::new (valptr()) T(std::forward(args)...); + } + + template ::value> * = nullptr> + void emplace(Args &&...args) { + if (has_value()) { + val().~T(); + ::new (valptr()) T(std::forward(args)...); + } else { + auto tmp = std::move(err()); + err().~unexpected(); + +#ifdef TL_EXPECTED_EXCEPTIONS_ENABLED + try { + ::new (valptr()) T(std::forward(args)...); + this->m_has_val = true; + } catch (...) { + err() = std::move(tmp); + throw; + } +#else + ::new (valptr()) T(std::forward(args)...); + this->m_has_val = true; +#endif + } + } + + template &, Args &&...>::value> * = nullptr> + void emplace(std::initializer_list il, Args &&...args) { + if (has_value()) { + T t(il, std::forward(args)...); + val() = std::move(t); + } else { + err().~unexpected(); + ::new (valptr()) T(il, std::forward(args)...); + this->m_has_val = true; + } + } + + template &, Args &&...>::value> * = nullptr> + void emplace(std::initializer_list il, Args &&...args) { + if (has_value()) { + T t(il, std::forward(args)...); + val() = std::move(t); + } else { + auto tmp = std::move(err()); + err().~unexpected(); + +#ifdef TL_EXPECTED_EXCEPTIONS_ENABLED + try { + ::new (valptr()) T(il, std::forward(args)...); + this->m_has_val = true; + } catch (...) { + err() = std::move(tmp); + throw; + } +#else + ::new (valptr()) T(il, std::forward(args)...); + this->m_has_val = true; +#endif + } + } + +private: + using t_is_void = std::true_type; + using t_is_not_void = std::false_type; + using t_is_nothrow_move_constructible = std::true_type; + using move_constructing_t_can_throw = std::false_type; + using e_is_nothrow_move_constructible = std::true_type; + using move_constructing_e_can_throw = std::false_type; + + void swap_where_both_have_value(expected & /*rhs*/, t_is_void) noexcept { + // swapping void is a no-op + } + + void swap_where_both_have_value(expected &rhs, t_is_not_void) { + using std::swap; + swap(val(), rhs.val()); + } + + void swap_where_only_one_has_value(expected &rhs, t_is_void) noexcept( + std::is_nothrow_move_constructible::value) { + ::new (errptr()) unexpected_type(std::move(rhs.err())); + rhs.err().~unexpected_type(); + std::swap(this->m_has_val, rhs.m_has_val); + } + + void swap_where_only_one_has_value(expected &rhs, t_is_not_void) { + swap_where_only_one_has_value_and_t_is_not_void( + rhs, typename std::is_nothrow_move_constructible::type{}, + typename std::is_nothrow_move_constructible::type{}); + } + + void swap_where_only_one_has_value_and_t_is_not_void( + expected &rhs, t_is_nothrow_move_constructible, + e_is_nothrow_move_constructible) noexcept { + auto temp = std::move(val()); + val().~T(); + ::new (errptr()) unexpected_type(std::move(rhs.err())); + rhs.err().~unexpected_type(); + ::new (rhs.valptr()) T(std::move(temp)); + std::swap(this->m_has_val, rhs.m_has_val); + } + + void swap_where_only_one_has_value_and_t_is_not_void( + expected &rhs, t_is_nothrow_move_constructible, + move_constructing_e_can_throw) { + auto temp = std::move(val()); + val().~T(); +#ifdef TL_EXPECTED_EXCEPTIONS_ENABLED + try { + ::new (errptr()) unexpected_type(std::move(rhs.err())); + rhs.err().~unexpected_type(); + ::new (rhs.valptr()) T(std::move(temp)); + std::swap(this->m_has_val, rhs.m_has_val); + } catch (...) { + val() = std::move(temp); + throw; + } +#else + ::new (errptr()) unexpected_type(std::move(rhs.err())); + rhs.err().~unexpected_type(); + ::new (rhs.valptr()) T(std::move(temp)); + std::swap(this->m_has_val, rhs.m_has_val); +#endif + } + + void swap_where_only_one_has_value_and_t_is_not_void( + expected &rhs, move_constructing_t_can_throw, + e_is_nothrow_move_constructible) { + auto temp = std::move(rhs.err()); + rhs.err().~unexpected_type(); +#ifdef TL_EXPECTED_EXCEPTIONS_ENABLED + try { + ::new (rhs.valptr()) T(std::move(val())); + val().~T(); + ::new (errptr()) unexpected_type(std::move(temp)); + std::swap(this->m_has_val, rhs.m_has_val); + } catch (...) { + rhs.err() = std::move(temp); + throw; + } +#else + ::new (rhs.valptr()) T(std::move(val())); + val().~T(); + ::new (errptr()) unexpected_type(std::move(temp)); + std::swap(this->m_has_val, rhs.m_has_val); +#endif + } + +public: + template + detail::enable_if_t::value && + detail::is_swappable::value && + (std::is_nothrow_move_constructible::value || + std::is_nothrow_move_constructible::value)> + swap(expected &rhs) noexcept( + std::is_nothrow_move_constructible::value + &&detail::is_nothrow_swappable::value + &&std::is_nothrow_move_constructible::value + &&detail::is_nothrow_swappable::value) { + if (has_value() && rhs.has_value()) { + swap_where_both_have_value(rhs, typename std::is_void::type{}); + } else if (!has_value() && rhs.has_value()) { + rhs.swap(*this); + } else if (has_value()) { + swap_where_only_one_has_value(rhs, typename std::is_void::type{}); + } else { + using std::swap; + swap(err(), rhs.err()); + } + } + + constexpr const T *operator->() const { return valptr(); } + TL_EXPECTED_11_CONSTEXPR T *operator->() { return valptr(); } + + template ::value> * = nullptr> + constexpr const U &operator*() const & { + return val(); + } + template ::value> * = nullptr> + TL_EXPECTED_11_CONSTEXPR U &operator*() & { + return val(); + } + template ::value> * = nullptr> + constexpr const U &&operator*() const && { + return std::move(val()); + } + template ::value> * = nullptr> + TL_EXPECTED_11_CONSTEXPR U &&operator*() && { + return std::move(val()); + } + + constexpr bool has_value() const noexcept { return this->m_has_val; } + constexpr explicit operator bool() const noexcept { return this->m_has_val; } + + template ::value> * = nullptr> + TL_EXPECTED_11_CONSTEXPR const U &value() const & { + if (!has_value()) + detail::throw_exception(bad_expected_access(err().value())); + return val(); + } + template ::value> * = nullptr> + TL_EXPECTED_11_CONSTEXPR U &value() & { + if (!has_value()) + detail::throw_exception(bad_expected_access(err().value())); + return val(); + } + template ::value> * = nullptr> + TL_EXPECTED_11_CONSTEXPR const U &&value() const && { + if (!has_value()) + detail::throw_exception(bad_expected_access(std::move(err()).value())); + return std::move(val()); + } + template ::value> * = nullptr> + TL_EXPECTED_11_CONSTEXPR U &&value() && { + if (!has_value()) + detail::throw_exception(bad_expected_access(std::move(err()).value())); + return std::move(val()); + } + + constexpr const E &error() const & { return err().value(); } + TL_EXPECTED_11_CONSTEXPR E &error() & { return err().value(); } + constexpr const E &&error() const && { return std::move(err().value()); } + TL_EXPECTED_11_CONSTEXPR E &&error() && { return std::move(err().value()); } + + template constexpr T value_or(U &&v) const & { + static_assert(std::is_copy_constructible::value && + std::is_convertible::value, + "T must be copy-constructible and convertible to from U&&"); + return bool(*this) ? **this : static_cast(std::forward(v)); + } + template TL_EXPECTED_11_CONSTEXPR T value_or(U &&v) && { + static_assert(std::is_move_constructible::value && + std::is_convertible::value, + "T must be move-constructible and convertible to from U&&"); + return bool(*this) ? std::move(**this) : static_cast(std::forward(v)); + } +}; + +namespace detail { +template using exp_t = typename detail::decay_t::value_type; +template using err_t = typename detail::decay_t::error_type; +template using ret_t = expected>; + +#ifdef TL_EXPECTED_CXX14 +template >::value> * = nullptr, + class Ret = decltype(detail::invoke(std::declval(), + *std::declval()))> +constexpr auto and_then_impl(Exp &&exp, F &&f) { + static_assert(detail::is_expected::value, "F must return an expected"); + + return exp.has_value() + ? detail::invoke(std::forward(f), *std::forward(exp)) + : Ret(unexpect, std::forward(exp).error()); +} + +template >::value> * = nullptr, + class Ret = decltype(detail::invoke(std::declval()))> +constexpr auto and_then_impl(Exp &&exp, F &&f) { + static_assert(detail::is_expected::value, "F must return an expected"); + + return exp.has_value() ? detail::invoke(std::forward(f)) + : Ret(unexpect, std::forward(exp).error()); +} +#else +template struct TC; +template (), + *std::declval())), + detail::enable_if_t>::value> * = nullptr> +auto and_then_impl(Exp &&exp, F &&f) -> Ret { + static_assert(detail::is_expected::value, "F must return an expected"); + + return exp.has_value() + ? detail::invoke(std::forward(f), *std::forward(exp)) + : Ret(unexpect, std::forward(exp).error()); +} + +template ())), + detail::enable_if_t>::value> * = nullptr> +constexpr auto and_then_impl(Exp &&exp, F &&f) -> Ret { + static_assert(detail::is_expected::value, "F must return an expected"); + + return exp.has_value() ? detail::invoke(std::forward(f)) + : Ret(unexpect, std::forward(exp).error()); +} +#endif + +#ifdef TL_EXPECTED_CXX14 +template >::value> * = nullptr, + class Ret = decltype(detail::invoke(std::declval(), + *std::declval())), + detail::enable_if_t::value> * = nullptr> +constexpr auto expected_map_impl(Exp &&exp, F &&f) { + using result = ret_t>; + return exp.has_value() ? result(detail::invoke(std::forward(f), + *std::forward(exp))) + : result(unexpect, std::forward(exp).error()); +} + +template >::value> * = nullptr, + class Ret = decltype(detail::invoke(std::declval(), + *std::declval())), + detail::enable_if_t::value> * = nullptr> +auto expected_map_impl(Exp &&exp, F &&f) { + using result = expected>; + if (exp.has_value()) { + detail::invoke(std::forward(f), *std::forward(exp)); + return result(); + } + + return result(unexpect, std::forward(exp).error()); +} + +template >::value> * = nullptr, + class Ret = decltype(detail::invoke(std::declval())), + detail::enable_if_t::value> * = nullptr> +constexpr auto expected_map_impl(Exp &&exp, F &&f) { + using result = ret_t>; + return exp.has_value() ? result(detail::invoke(std::forward(f))) + : result(unexpect, std::forward(exp).error()); +} + +template >::value> * = nullptr, + class Ret = decltype(detail::invoke(std::declval())), + detail::enable_if_t::value> * = nullptr> +auto expected_map_impl(Exp &&exp, F &&f) { + using result = expected>; + if (exp.has_value()) { + detail::invoke(std::forward(f)); + return result(); + } + + return result(unexpect, std::forward(exp).error()); +} +#else +template >::value> * = nullptr, + class Ret = decltype(detail::invoke(std::declval(), + *std::declval())), + detail::enable_if_t::value> * = nullptr> + +constexpr auto expected_map_impl(Exp &&exp, F &&f) + -> ret_t> { + using result = ret_t>; + + return exp.has_value() ? result(detail::invoke(std::forward(f), + *std::forward(exp))) + : result(unexpect, std::forward(exp).error()); +} + +template >::value> * = nullptr, + class Ret = decltype(detail::invoke(std::declval(), + *std::declval())), + detail::enable_if_t::value> * = nullptr> + +auto expected_map_impl(Exp &&exp, F &&f) -> expected> { + if (exp.has_value()) { + detail::invoke(std::forward(f), *std::forward(exp)); + return {}; + } + + return unexpected>(std::forward(exp).error()); +} + +template >::value> * = nullptr, + class Ret = decltype(detail::invoke(std::declval())), + detail::enable_if_t::value> * = nullptr> + +constexpr auto expected_map_impl(Exp &&exp, F &&f) + -> ret_t> { + using result = ret_t>; + + return exp.has_value() ? result(detail::invoke(std::forward(f))) + : result(unexpect, std::forward(exp).error()); +} + +template >::value> * = nullptr, + class Ret = decltype(detail::invoke(std::declval())), + detail::enable_if_t::value> * = nullptr> + +auto expected_map_impl(Exp &&exp, F &&f) -> expected> { + if (exp.has_value()) { + detail::invoke(std::forward(f)); + return {}; + } + + return unexpected>(std::forward(exp).error()); +} +#endif + +#if defined(TL_EXPECTED_CXX14) && !defined(TL_EXPECTED_GCC49) && \ + !defined(TL_EXPECTED_GCC54) && !defined(TL_EXPECTED_GCC55) +template >::value> * = nullptr, + class Ret = decltype(detail::invoke(std::declval(), + std::declval().error())), + detail::enable_if_t::value> * = nullptr> +constexpr auto map_error_impl(Exp &&exp, F &&f) { + using result = expected, detail::decay_t>; + return exp.has_value() + ? result(*std::forward(exp)) + : result(unexpect, detail::invoke(std::forward(f), + std::forward(exp).error())); +} +template >::value> * = nullptr, + class Ret = decltype(detail::invoke(std::declval(), + std::declval().error())), + detail::enable_if_t::value> * = nullptr> +auto map_error_impl(Exp &&exp, F &&f) { + using result = expected, monostate>; + if (exp.has_value()) { + return result(*std::forward(exp)); + } + + detail::invoke(std::forward(f), std::forward(exp).error()); + return result(unexpect, monostate{}); +} +template >::value> * = nullptr, + class Ret = decltype(detail::invoke(std::declval(), + std::declval().error())), + detail::enable_if_t::value> * = nullptr> +constexpr auto map_error_impl(Exp &&exp, F &&f) { + using result = expected, detail::decay_t>; + return exp.has_value() + ? result() + : result(unexpect, detail::invoke(std::forward(f), + std::forward(exp).error())); +} +template >::value> * = nullptr, + class Ret = decltype(detail::invoke(std::declval(), + std::declval().error())), + detail::enable_if_t::value> * = nullptr> +auto map_error_impl(Exp &&exp, F &&f) { + using result = expected, monostate>; + if (exp.has_value()) { + return result(); + } + + detail::invoke(std::forward(f), std::forward(exp).error()); + return result(unexpect, monostate{}); +} +#else +template >::value> * = nullptr, + class Ret = decltype(detail::invoke(std::declval(), + std::declval().error())), + detail::enable_if_t::value> * = nullptr> +constexpr auto map_error_impl(Exp &&exp, F &&f) + -> expected, detail::decay_t> { + using result = expected, detail::decay_t>; + + return exp.has_value() + ? result(*std::forward(exp)) + : result(unexpect, detail::invoke(std::forward(f), + std::forward(exp).error())); +} + +template >::value> * = nullptr, + class Ret = decltype(detail::invoke(std::declval(), + std::declval().error())), + detail::enable_if_t::value> * = nullptr> +auto map_error_impl(Exp &&exp, F &&f) -> expected, monostate> { + using result = expected, monostate>; + if (exp.has_value()) { + return result(*std::forward(exp)); + } + + detail::invoke(std::forward(f), std::forward(exp).error()); + return result(unexpect, monostate{}); +} + +template >::value> * = nullptr, + class Ret = decltype(detail::invoke(std::declval(), + std::declval().error())), + detail::enable_if_t::value> * = nullptr> +constexpr auto map_error_impl(Exp &&exp, F &&f) + -> expected, detail::decay_t> { + using result = expected, detail::decay_t>; + + return exp.has_value() + ? result() + : result(unexpect, detail::invoke(std::forward(f), + std::forward(exp).error())); +} + +template >::value> * = nullptr, + class Ret = decltype(detail::invoke(std::declval(), + std::declval().error())), + detail::enable_if_t::value> * = nullptr> +auto map_error_impl(Exp &&exp, F &&f) -> expected, monostate> { + using result = expected, monostate>; + if (exp.has_value()) { + return result(); + } + + detail::invoke(std::forward(f), std::forward(exp).error()); + return result(unexpect, monostate{}); +} +#endif + +#ifdef TL_EXPECTED_CXX14 +template (), + std::declval().error())), + detail::enable_if_t::value> * = nullptr> +constexpr auto or_else_impl(Exp &&exp, F &&f) { + static_assert(detail::is_expected::value, "F must return an expected"); + return exp.has_value() ? std::forward(exp) + : detail::invoke(std::forward(f), + std::forward(exp).error()); +} + +template (), + std::declval().error())), + detail::enable_if_t::value> * = nullptr> +detail::decay_t or_else_impl(Exp &&exp, F &&f) { + return exp.has_value() ? std::forward(exp) + : (detail::invoke(std::forward(f), + std::forward(exp).error()), + std::forward(exp)); +} +#else +template (), + std::declval().error())), + detail::enable_if_t::value> * = nullptr> +auto or_else_impl(Exp &&exp, F &&f) -> Ret { + static_assert(detail::is_expected::value, "F must return an expected"); + return exp.has_value() ? std::forward(exp) + : detail::invoke(std::forward(f), + std::forward(exp).error()); +} + +template (), + std::declval().error())), + detail::enable_if_t::value> * = nullptr> +detail::decay_t or_else_impl(Exp &&exp, F &&f) { + return exp.has_value() ? std::forward(exp) + : (detail::invoke(std::forward(f), + std::forward(exp).error()), + std::forward(exp)); +} +#endif +} // namespace detail + +template +constexpr bool operator==(const expected &lhs, + const expected &rhs) { + return (lhs.has_value() != rhs.has_value()) + ? false + : (!lhs.has_value() ? lhs.error() == rhs.error() : *lhs == *rhs); +} +template +constexpr bool operator!=(const expected &lhs, + const expected &rhs) { + return (lhs.has_value() != rhs.has_value()) + ? true + : (!lhs.has_value() ? lhs.error() != rhs.error() : *lhs != *rhs); +} +template +constexpr bool operator==(const expected &lhs, + const expected &rhs) { + return (lhs.has_value() != rhs.has_value()) + ? false + : (!lhs.has_value() ? lhs.error() == rhs.error() : true); +} +template +constexpr bool operator!=(const expected &lhs, + const expected &rhs) { + return (lhs.has_value() != rhs.has_value()) + ? true + : (!lhs.has_value() ? lhs.error() == rhs.error() : false); +} + +template +constexpr bool operator==(const expected &x, const U &v) { + return x.has_value() ? *x == v : false; +} +template +constexpr bool operator==(const U &v, const expected &x) { + return x.has_value() ? *x == v : false; +} +template +constexpr bool operator!=(const expected &x, const U &v) { + return x.has_value() ? *x != v : true; +} +template +constexpr bool operator!=(const U &v, const expected &x) { + return x.has_value() ? *x != v : true; +} + +template +constexpr bool operator==(const expected &x, const unexpected &e) { + return x.has_value() ? false : x.error() == e.value(); +} +template +constexpr bool operator==(const unexpected &e, const expected &x) { + return x.has_value() ? false : x.error() == e.value(); +} +template +constexpr bool operator!=(const expected &x, const unexpected &e) { + return x.has_value() ? true : x.error() != e.value(); +} +template +constexpr bool operator!=(const unexpected &e, const expected &x) { + return x.has_value() ? true : x.error() != e.value(); +} + +template ::value || + std::is_move_constructible::value) && + detail::is_swappable::value && + std::is_move_constructible::value && + detail::is_swappable::value> * = nullptr> +void swap(expected &lhs, + expected &rhs) noexcept(noexcept(lhs.swap(rhs))) { + lhs.swap(rhs); +} +} // namespace tl + +#endif +/* end file include/ada/expected.h */ +#include +#include + +/** + * @namespace ada::parser + * @brief Includes the definitions for supported parsers + */ +namespace ada::parser { + + /** + * Parses a url. + */ + url parse_url(std::string_view user_input, + const ada::url* base_url = nullptr, + ada::encoding_type encoding = ada::encoding_type::UTF8); + +} // namespace ada + +#endif // ADA_PARSER_H +/* end file include/ada/parser.h */ +// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/include, filename=ada/scheme-inl.h +/* begin file include/ada/scheme-inl.h */ +/** + * @file scheme-inl.h + * @brief Definitions for the URL scheme. + */ +#ifndef ADA_SCHEME_INL_H +#define ADA_SCHEME_INL_H + + +namespace ada::scheme { + + /** + * @namespace ada::scheme::details + * @brief Includes the definitions for scheme specific entities + */ + namespace details { + // for use with is_special and get_special_port + // Spaces, if present, are removed from URL. + constexpr std::string_view is_special_list[] = {"http", " ", "https", + "ws", "ftp", "wss", "file", " "}; + // for use with get_special_port + constexpr uint16_t special_ports[] = {80, 0, 443, 80, 21, 443, 0, 0}; + } + + ada_really_inline constexpr bool is_special(std::string_view scheme) { + if(scheme.empty()) { return false; } + int hash_value = (2*scheme.size() + (unsigned)(scheme[0])) & 7; + const std::string_view target = details::is_special_list[hash_value]; + return (target[0] == scheme[0]) && (target.substr(1) == scheme.substr(1)); + } + constexpr uint16_t get_special_port(std::string_view scheme) noexcept { + if(scheme.empty()) { return 0; } + int hash_value = (2*scheme.size() + (unsigned)(scheme[0])) & 7; + const std::string_view target = details::is_special_list[hash_value]; + if ((target[0] == scheme[0]) && (target.substr(1) == scheme.substr(1))) { + return details::special_ports[hash_value]; + } else { return 0; } + } + constexpr uint16_t get_special_port(ada::scheme::type type) noexcept { + return details::special_ports[int(type)]; + } + constexpr ada::scheme::type get_scheme_type(std::string_view scheme) noexcept { + if(scheme.empty()) { return ada::scheme::NOT_SPECIAL; } + int hash_value = (2*scheme.size() + (unsigned)(scheme[0])) & 7; + const std::string_view target = details::is_special_list[hash_value]; + if ((target[0] == scheme[0]) && (target.substr(1) == scheme.substr(1))) { + return ada::scheme::type(hash_value); + } else { return ada::scheme::NOT_SPECIAL; } + } + +} // namespace ada::serializers + +#endif // ADA_SCHEME_H +/* end file include/ada/scheme-inl.h */ +// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/include, filename=ada/url-inl.h +/* begin file include/ada/url-inl.h */ +/** + * @file url-inl.h + * @brief Definitions for the URL + */ +#ifndef ADA_URL_INL_H +#define ADA_URL_INL_H + + +namespace ada { + [[nodiscard]] ada_really_inline bool url::includes_credentials() const noexcept { + return !username.empty() || !password.empty(); + } + [[nodiscard]] ada_really_inline bool url::is_special() const noexcept { + return type != ada::scheme::NOT_SPECIAL; + } + [[nodiscard]] inline uint16_t url::get_special_port() const { + return ada::scheme::get_special_port(type); + } + [[nodiscard]] ada_really_inline ada::scheme::type url::get_scheme_type() const noexcept { + return type; + } + [[nodiscard]] ada_really_inline uint16_t url::scheme_default_port() const noexcept { + return scheme::get_special_port(type); + } + [[nodiscard]] inline bool url::cannot_have_credentials_or_port() const { + return !host.has_value() || host.value().empty() || type == ada::scheme::type::FILE; + } + ada_really_inline size_t url::parse_port(std::string_view view, bool check_trailing_content) noexcept { + ada_log("parse_port('", view, "') ", view.size()); + uint16_t parsed_port{}; + auto r = std::from_chars(view.data(), view.data() + view.size(), parsed_port); + if(r.ec == std::errc::result_out_of_range) { + ada_log("parse_port: std::errc::result_out_of_range"); + is_valid = false; + return 0; + } + ada_log("parse_port: ", parsed_port); + const size_t consumed = size_t(r.ptr - view.data()); + ada_log("parse_port: consumed ", consumed); + if(check_trailing_content) { + is_valid &= (consumed == view.size() || view[consumed] == '/' || view[consumed] == '?' || (is_special() && view[consumed] == '\\')); + } + ada_log("parse_port: is_valid = ", is_valid); + if(is_valid) { + port = (r.ec == std::errc() && scheme_default_port() != parsed_port) ? + std::optional(parsed_port) : std::nullopt; + } + return consumed; + } + [[nodiscard]] inline std::string_view url::get_scheme() const noexcept { + if(is_special()) { return ada::scheme::details::is_special_list[type]; } + // We only move the 'scheme' if it is non-special. + return non_special_scheme; + } + inline void url::set_scheme(std::string&& new_scheme) noexcept { + type = ada::scheme::get_scheme_type(new_scheme); + // We only move the 'scheme' if it is non-special. + if(!is_special()) { + non_special_scheme = new_scheme; + } + } + inline void url::copy_scheme(ada::url&& u) noexcept { + non_special_scheme = u.non_special_scheme; + type = u.type; + } + inline void url::copy_scheme(const ada::url& u) { + non_special_scheme = u.non_special_scheme; + type = u.type; + } + + inline std::ostream& operator<<(std::ostream& out, const ada::url& u) { + return out << u.to_string(); + } +} // namespace ada + +#endif // ADA_URL_H +/* end file include/ada/url-inl.h */ + +// Public API +// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/include, filename=ada/ada_version.h +/* begin file include/ada/ada_version.h */ +/** + * @file ada_version.h + * @brief Definitions for Ada's version number. + */ +#ifndef ADA_ADA_VERSION_H +#define ADA_ADA_VERSION_H + +#define ADA_VERSION "1.0.0" + +namespace ada { + + enum { + ADA_VERSION_MAJOR = 1, + ADA_VERSION_MINOR = 0, + ADA_VERSION_REVISION = 0, + }; + +} // namespace ada + +#endif // ADA_ADA_VERSION_H +/* end file include/ada/ada_version.h */ +// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/include, filename=ada/implementation.h +/* begin file include/ada/implementation.h */ +/** + * @file implementation.h + * @brief Definitions for user facing functions for parsing URL and it's components. + */ +#ifndef ADA_IMPLEMENTATION_H +#define ADA_IMPLEMENTATION_H + +#include +#include + + +namespace ada { + enum class errors { + generic_error + }; + + using result = tl::expected; + + /** + * The URL parser takes a scalar value string input, with an optional null or base URL base (default null) + * and an optional encoding encoding (default UTF-8). + * + * @param input the string input to analyze. + * @param base_url the optional string input to use as a base url. + * @param encoding encoding (default to UTF-8) + * @return a parsed URL. + */ + ada_warn_unused ada::result parse(std::string_view input, + const ada::url* base_url = nullptr, + ada::encoding_type encoding = ada::encoding_type::UTF8); + /** + * Computes a href string from a file path. + * @return a href string (starts with file:://) + */ + std::string href_from_file(std::string_view path); +} + +#endif // ADA_IMPLEMENTATION_H +/* end file include/ada/implementation.h */ + +#endif // ADA_H +/* end file include/ada.h */ diff --git a/node.gyp b/node.gyp index e7b0d968e9d4bf..a60513915984c8 100644 --- a/node.gyp +++ b/node.gyp @@ -468,6 +468,7 @@ 'deps/histogram/histogram.gyp:histogram', 'deps/uvwasi/uvwasi.gyp:uvwasi', 'deps/simdutf/simdutf.gyp:simdutf', + 'deps/ada/ada.gyp:ada', ], 'sources': [ @@ -1219,6 +1220,7 @@ 'node_dtrace_ustack', 'node_dtrace_provider', 'deps/simdutf/simdutf.gyp:simdutf', + 'deps/ada/ada.gyp:ada', ], 'includes': [ @@ -1318,6 +1320,7 @@ 'node_dtrace_header', 'node_dtrace_ustack', 'node_dtrace_provider', + 'deps/ada/ada.gyp:ada', ], 'includes': [ @@ -1387,6 +1390,7 @@ '<(node_lib_target_name)', 'deps/histogram/histogram.gyp:histogram', 'deps/uvwasi/uvwasi.gyp:uvwasi', + 'deps/ada/ada.gyp:ada', ], 'includes': [ diff --git a/tools/dep_updaters/update-ada.sh b/tools/dep_updaters/update-ada.sh new file mode 100644 index 00000000000000..45112916583bc9 --- /dev/null +++ b/tools/dep_updaters/update-ada.sh @@ -0,0 +1,52 @@ +#!/bin/sh +set -e +# Shell script to update ada in the source tree to a specific version + +BASE_DIR=$(cd "$(dirname "$0")/../.." && pwd) +DEPS_DIR="$BASE_DIR/deps" +ADA_VERSION=$1 + +if [ "$#" -le 0 ]; then + echo "Error: please provide an ada version to update to" + echo " e.g. $0 1.0.0" + exit 1 +fi + +echo "Making temporary workspace..." + +WORKSPACE=$(mktemp -d 2> /dev/null || mktemp -d -t 'tmp') + +cleanup () { + EXIT_CODE=$? + [ -d "$WORKSPACE" ] && rm -rf "$WORKSPACE" + exit $EXIT_CODE +} + +trap cleanup INT TERM EXIT + +ADA_REF="v$ADA_VERSION" +ADA_ZIP="ada-$ADA_VERSION.zip" +ADA_LICENSE="LICENSE-MIT" + +cd "$WORKSPACE" + +echo "Fetching ada source archive..." +curl -sL -o "$ADA_ZIP" "https://github.com/ada-url/ada/releases/download/$ADA_REF/singleheader.zip" +unzip "$ADA_ZIP" +rm "$ADA_ZIP" +rm ./*_demo.cpp + +curl -sL -o "$ADA_LICENSE" "https://raw.githubusercontent.com/ada-url/ada/HEAD/LICENSE-MIT" + +echo "Replacing existing ada (except GYP build files)" +mv "$DEPS_DIR/ada/"*.gyp "$DEPS_DIR/ada/README.md" "$WORKSPACE/" +rm -rf "$DEPS_DIR/ada" +mv "$WORKSPACE" "$DEPS_DIR/ada" + +echo "All done!" +echo "" +echo "Please git add ada, commit the new version:" +echo "" +echo "$ git add -A deps/ada" +echo "$ git commit -m \"deps: update ada to $ADA_VERSION\"" +echo "" diff --git a/tools/license-builder.sh b/tools/license-builder.sh index 8389f24c681082..1b52a473a15bf2 100755 --- a/tools/license-builder.sh +++ b/tools/license-builder.sh @@ -81,6 +81,8 @@ licenseText="$(sed -e '/The data format used by the zlib library/,$d' -e 's/^\/\ addlicense "zlib" "deps/zlib" "$licenseText" licenseText="$(cat "${rootdir}/deps/simdutf/LICENSE-MIT")" addlicense "simdutf" "deps/simdutf" "$licenseText" +licenseText="$(curl -sL https://raw.githubusercontent.com/ada-url/ada/HEAD/LICENSE-MIT)" +addlicense "ada" "deps/ada" "$licenseText" # npm licenseText="$(cat "${rootdir}/deps/npm/LICENSE")"