Skip to content

Commit

Permalink
We should not error on forbidden characters (#27)
Browse files Browse the repository at this point in the history
* We should not error on forbidden characters

* Formatting.

* More formatting.
  • Loading branch information
lemire authored and anonrig committed Apr 26, 2023
1 parent d1caf7d commit 775d205
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 17 deletions.
13 changes: 10 additions & 3 deletions include/ada/idna/to_ascii.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,25 @@ namespace ada::idna {
// decoding: percent decoding should be done prior to calling this function. We
// do not remove tabs and spaces, they should have been removed prior to calling
// this function. We also do not trim control characters. We also assume that
// the input is not empty. We return "" on error. For now.
// the input is not empty. We return "" on error.
//
// Example: "www.öbb.at" -> "www.xn--bb-eka.at"
//
// This function may accept or even produce invalid domains.
std::string to_ascii(std::string_view ut8_string);

// Returns true if the string contains a forbidden code point according to the
// WHATGL URL specification:
// https://url.spec.whatwg.org/#forbidden-domain-code-point
bool contains_forbidden_domain_code_point(std::string_view ascii_string);

bool constexpr begins_with(std::u32string_view view,
std::u32string_view prefix);
bool constexpr begins_with(std::string_view view, std::string_view prefix);

bool constexpr is_ascii(std::u32string_view view);
bool constexpr is_ascii(std::string_view view);

std::string from_ascii_to_ascii(std::string_view ut8_string);

} // namespace ada::idna

#endif // ADA_IDNA_TO_ASCII_H
23 changes: 9 additions & 14 deletions src/to_ascii.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,18 +59,18 @@ constexpr static uint8_t is_forbidden_domain_code_point_table[] = {

static_assert(sizeof(is_forbidden_domain_code_point_table) == 256);

inline constexpr bool is_forbidden_domain_code_point(const char c) noexcept {
inline bool is_forbidden_domain_code_point(const char c) noexcept {
return is_forbidden_domain_code_point_table[uint8_t(c)];
}

// We return "" on error. For now.
std::string from_ascii_to_ascii(std::string_view ut8_string) {
static const std::string error = "";
if (std::any_of(ut8_string.begin(), ut8_string.end(),
is_forbidden_domain_code_point)) {
return error;
}
bool contains_forbidden_domain_code_point(std::string_view view) {
return (
std::any_of(view.begin(), view.end(), is_forbidden_domain_code_point));
}

// We return "" on error.
static std::string from_ascii_to_ascii(std::string_view ut8_string) {
static const std::string error = "";
// copy and map
// we could be more efficient by avoiding the copy when unnecessary.
std::string mapped_string = std::string(ut8_string);
Expand Down Expand Up @@ -124,7 +124,7 @@ std::string from_ascii_to_ascii(std::string_view ut8_string) {
return out;
}

// We return "" on error. For now.
// We return "" on error.
std::string to_ascii(std::string_view ut8_string) {
if (is_ascii(ut8_string)) {
return from_ascii_to_ascii(ut8_string);
Expand Down Expand Up @@ -211,11 +211,6 @@ std::string to_ascii(std::string_view ut8_string) {
out.push_back('.');
}
}

if (std::any_of(out.begin(), out.end(), is_forbidden_domain_code_point)) {
return error;
}

return out;
}
} // namespace ada::idna
3 changes: 3 additions & 0 deletions tests/wpt_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ bool idna_test_v2_to_ascii(std::string_view filename) {

std::string_view input = object["input"].get_string();
std::string output = ada::idna::to_ascii(input);
if (ada::idna::contains_forbidden_domain_code_point(output)) {
output = "";
}
auto expected_output = object["output"];

if (expected_output.is_null() && output.size()) {
Expand Down

0 comments on commit 775d205

Please sign in to comment.