From 4ea06228352217df8cac7d230bd1e058b94ce0dc Mon Sep 17 00:00:00 2001 From: Domenic Denicola Date: Sat, 4 Mar 2023 17:20:50 +0900 Subject: [PATCH] Fix empty label handling As discovered in https://github.com/jsdom/whatwg-url/pull/250, the library currently does not handle empty labels in the same way that browsers seem to do. Although the standard is unclear, we should align to browser handling. To test this, we pull in the new IdnaTestV2.json file from WPT. This is somewhat redundant with our existing IdnaTestV2.txt, but it also represents a significant curation effort to ensure we only have URL-applicable tests, so we should make use of that. --- index.js | 4 +++- scripts/getLatestTests.js | 7 ++++++- test/toascii.js | 25 ++++++++++++++++++++++++- 3 files changed, 33 insertions(+), 3 deletions(-) diff --git a/index.js b/index.js index f6ca73f..518eb10 100644 --- a/index.js +++ b/index.js @@ -130,7 +130,9 @@ function validateLabel(label, { checkHyphens, checkBidi, checkJoiners, processin } // https://tools.ietf.org/html/rfc5893#section-2 - if (checkBidi) { + // For the codePoints length check, see discussion in https://github.com/jsdom/whatwg-url/pull/250 and the second item + // in https://github.com/whatwg/url/issues/744. + if (checkBidi && codePoints.length > 0) { let rtl; // 1 diff --git a/scripts/getLatestTests.js b/scripts/getLatestTests.js index 979b128..061de2b 100644 --- a/scripts/getLatestTests.js +++ b/scripts/getLatestTests.js @@ -24,7 +24,12 @@ async function main() { })(), (async () => { const asciiTarget = fs.createWriteStream(path.resolve(__dirname, "../test/fixtures/toascii.json")); - const response = await fetch("https://github.com/web-platform-tests/wpt/raw/112ad5ca55d55f6da2ccc7468e6dcc91b4e5d223/url/resources/toascii.json"); + const response = await fetch("https://github.com/web-platform-tests/wpt/raw/7234ceaeb1505c42bef05a88a77da930653a4e31/url/resources/toascii.json"); + await pipelinePromise(response.body, asciiTarget); + })(), + (async () => { + const asciiTarget = fs.createWriteStream(path.resolve(__dirname, "../test/fixtures/IdnaTestV2ToASCII.json")); + const response = await fetch("https://github.com/web-platform-tests/wpt/raw/7234ceaeb1505c42bef05a88a77da930653a4e31/url/resources/IdnaTestV2.json"); await pipelinePromise(response.body, asciiTarget); })() ]); diff --git a/test/toascii.js b/test/toascii.js index e4d7ffd..792a172 100644 --- a/test/toascii.js +++ b/test/toascii.js @@ -5,6 +5,7 @@ const assert = require("assert"); const tr46 = require("../index.js"); const toASCIITestCases = require("./fixtures/toascii.json"); +const idnaTestV2 = require("./fixtures/IdnaTestV2ToASCII.json"); function testToASCII(testCase) { return () => { @@ -29,7 +30,29 @@ describe("ToASCII", () => { let description = testCase.input; if (testCase.comment) { - description = ` (${testCase.comment})`; + description += ` (${testCase.comment})`; + } + + specify(description, testToASCII(testCase)); + } +}); + +describe("ToASCII via IdnaTestV2.json in wpt", () => { + for (const testCase of idnaTestV2) { + if (typeof testCase === "string") { + // It's a "comment"; skip it. + continue; + } + + if (testCase.input.includes("?")) { + // ToASCII will not fail on these. But, the URL Standard will. IdnaTestV2.json is mostly focused on the URL + // Standard, so it expects failures. We should skip them. + continue; + } + + let description = testCase.input; + if (testCase.comment) { + description += ` (${testCase.comment})`; } specify(description, testToASCII(testCase));