-
Notifications
You must be signed in to change notification settings - Fork 3.2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
For whatwg/url#341.
- Loading branch information
Showing
5 changed files
with
37,218 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
promise_test(() => fetch("resources/IdnaTestV2.json").then(res => res.json()).then(runTests), "Loading data…"); | ||
|
||
function runTests(idnaTests) { | ||
for (const idnaTest of idnaTests) { | ||
if (typeof idnaTest === "string") { | ||
continue // skip comments | ||
} | ||
if (idnaTest.input === "") { | ||
continue // cannot test empty string input through new URL() | ||
} | ||
test(() => { | ||
if (idnaTest.output === null) { | ||
assert_throws_js(TypeError, () => new URL(`https://${idnaTest.input}/x`)); | ||
} else { | ||
const url = new URL(`https://${idnaTest.input}/x`); | ||
assert_equals(url.host, idnaTest.output); | ||
assert_equals(url.hostname, idnaTest.output); | ||
assert_equals(url.pathname, "/x"); | ||
assert_equals(url.href, `https://${idnaTest.output}/x`); | ||
} | ||
}, `ToASCII("${idnaTest.input}")${idnaTest.comment ? " " + idnaTest.comment : ""}`); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
2023-01. Feedback by Anne van Kesteren on | ||
|
||
https://unicode.org/Public/idna/latest/IdnaTestV2.txt | ||
Date: 2022-05-26, 22:30:12 GMT | ||
|
||
(I have almost exclusively focused on ToASCII cases.) | ||
|
||
* VerifyDnsLength is not P4, but rather A4_1 and A4_2. | ||
* Tests that use trailing ASCII digit labels are not useful for browsers as that will trigger the IPv4 parser. This is a problem for a number of the A4_1 and A4_2 tests. And also a large number of tests later on, such as ToASCII("xn--gl0as212a.8.") or ToASCII("1.27"). | ||
* Test for ToASCII("$") is marked P1 and V6, not U1. This might apply more widely. | ||
* NV8 is not used as a status. | ||
* A3 and X3 do not appear to be used as a status. (These are catered for by P4 presumably.) | ||
* CheckBidi is not V8. V8 does not appear to be used. You'd have to filter out all B1-6 statuses instead. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
import os | ||
import json | ||
import requests | ||
|
||
if not os.path.exists("IdnaTestV2.txt"): | ||
# Download IdnaTestV2.txt if it doesn't exist yet | ||
open("IdnaTestV2.txt", "w").write(requests.get("https://unicode.org/Public/idna/latest/IdnaTestV2.txt").text) | ||
|
||
test_input = open("IdnaTestV2.txt", "r").readlines() | ||
test_output = ["This resource is a conversion of IdnaTestV2 aimed to match the requirements of the URL Standard's domain to ASCII"] | ||
|
||
def remove_escapes(input): | ||
return json.loads("\"" + input + "\"") | ||
|
||
unique_statuses = [] | ||
|
||
i = 0 | ||
for test in test_input: | ||
# Remove newlines | ||
test = test.rstrip() | ||
|
||
# Remove lines from test_input that are comments or empty | ||
if test.startswith("#") or test == "": | ||
continue | ||
|
||
# Remove escapes (doesn't handle \x{XXXX} but those do not appear in the source) | ||
test = remove_escapes(test) | ||
|
||
# Normalize columns | ||
# | ||
# Since we are only interested in ToASCII and enforce Transitional_Processing=false we care | ||
# about the following columns: | ||
# | ||
# * Column 1: source | ||
# * Column 4: toAsciiN | ||
# * Column 5: toAsciiNStatus | ||
columns = [column.strip() for column in test.split(";")] | ||
|
||
# Column 1 | ||
column_source = columns[0] | ||
|
||
# Column 4 (if empty, use Column 2; if empty again, use Column 1) | ||
column_to_ascii = columns[3] | ||
if column_to_ascii == "": | ||
column_to_ascii = columns[1] | ||
if column_to_ascii == "": | ||
column_to_ascii = column_source | ||
|
||
# Column 5 (if empty, use Column 3; if empty again, assume empty list) | ||
column_status = columns[4] | ||
if column_status == "": | ||
column_status = columns[2] | ||
if column_status == "": | ||
column_status = [] | ||
else: | ||
assert column_status.startswith("[") | ||
column_status = [status.strip() for status in column_status[1:-1].split(",")] | ||
|
||
for status in column_status: | ||
if status not in unique_statuses: | ||
unique_statuses.append(status) | ||
|
||
# The URL Standard has | ||
# | ||
# * UseSTD3ASCIIRules=false; however there are no tests marked U1 (some should be though) | ||
# * CheckHyphens=false; thus ignore V2, V3? | ||
# * VerifyDnsLength=false; thus ignore A4_1 and A4_2 | ||
comment = "" | ||
for ignored_status in ["A4_1", "A4_2", "U1", "V2", "V3"]: | ||
if ignored_status in column_status: | ||
column_status.remove(ignored_status) | ||
comment += ignored_status + " (ignored); " | ||
for status in column_status: | ||
comment += status + "; " | ||
if comment != "": | ||
comment = comment.strip()[:-1] | ||
|
||
output = column_to_ascii | ||
if len(column_status) > 0: | ||
output = None | ||
|
||
test_output_entry = { "input": column_source, "output": output } | ||
if comment != "": | ||
test_output_entry["comment"] = comment | ||
|
||
test_output.append(test_output_entry) | ||
|
||
handle = open("IdnaTestV2.json", "w") | ||
handle.write(json.dumps(test_output, sort_keys=True, allow_nan=False, indent=2, separators=(',', ': '))) | ||
handle.write("\n") | ||
|
||
unique_statuses.sort() | ||
print(unique_statuses) |
Oops, something went wrong.