From ff858bc42550bf370e7ed20c1646a54d907f40ee Mon Sep 17 00:00:00 2001 From: graphemecluster Date: Tue, 23 Apr 2024 06:50:22 +0800 Subject: [PATCH 1/8] Improve Recovery of Unterminated Regular Expressions --- src/compiler/scanner.ts | 127 ++++++++++++------ src/testRunner/tests.ts | 1 + .../unittests/regExpParserRecovery.ts | 81 +++++++++++ .../reference/parser645086_1.errors.txt | 7 +- .../reference/parser645086_2.errors.txt | 7 +- .../reference/parserMissingToken2.errors.txt | 4 +- .../reference/parserMissingToken2.js | 2 +- .../reference/parserMissingToken2.types | 4 +- ...gularExpressionDivideAmbiguity4.errors.txt | 4 +- .../tsxAttributeInvalidNames.errors.txt | 4 +- .../reference/tsxAttributeInvalidNames.js | 2 +- .../reference/tsxAttributeInvalidNames.types | 4 +- ...unterminatedRegexAtEndOfSource1.errors.txt | 4 +- tests/cases/fourslash/whiteSpaceTrimming4.ts | 2 +- 14 files changed, 184 insertions(+), 69 deletions(-) create mode 100644 src/testRunner/unittests/regExpParserRecovery.ts diff --git a/src/compiler/scanner.ts b/src/compiler/scanner.ts index 01dc3b1b31494..8bf8725142831 100644 --- a/src/compiler/scanner.ts +++ b/src/compiler/scanner.ts @@ -21,6 +21,7 @@ import { KeywordSyntaxKind, LanguageVariant, last, + lastOrUndefined, LineAndCharacter, MapLike, parsePseudoBigInt, @@ -2389,7 +2390,8 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean function reScanSlashToken(): SyntaxKind { if (token === SyntaxKind.SlashToken || token === SyntaxKind.SlashEqualsToken) { // Quickly get to the end of regex such that we know the flags - let p = tokenStart + 1; + const startOfRegExpBody = tokenStart + 1; + pos = startOfRegExpBody; let inEscape = false; // Although nested character classes are allowed in Unicode Sets mode, // an unescaped slash is nevertheless invalid even in a character class in Unicode mode. @@ -2401,16 +2403,14 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean while (true) { // If we reach the end of a file, or hit a newline, then this is an unterminated // regex. Report error and return what we have so far. - if (p >= end) { + if (pos >= end) { tokenFlags |= TokenFlags.Unterminated; - error(Diagnostics.Unterminated_regular_expression_literal); break; } - const ch = text.charCodeAt(p); + const ch = text.charCodeAt(pos); if (isLineBreak(ch)) { tokenFlags |= TokenFlags.Unterminated; - error(Diagnostics.Unterminated_regular_expression_literal); break; } @@ -2422,7 +2422,6 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean else if (ch === CharacterCodes.slash && !inCharacterClass) { // A slash within a character class is permissible, // but in general it signals the end of the regexp literal. - p++; break; } else if (ch === CharacterCodes.openBracket) { @@ -2434,50 +2433,94 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean else if (ch === CharacterCodes.closeBracket) { inCharacterClass = false; } - p++; + pos++; } - const isUnterminated = !!(tokenFlags & TokenFlags.Unterminated); - const endOfBody = p - (isUnterminated ? 0 : 1); - let regExpFlags = RegularExpressionFlags.None; - while (p < end) { - const ch = text.charCodeAt(p); - if (!isIdentifierPart(ch, languageVersion)) { - break; - } - const flag = characterToRegularExpressionFlag(String.fromCharCode(ch)); - if (flag === undefined) { - error(Diagnostics.Unknown_regular_expression_flag, p, 1); - } - else if (regExpFlags & flag) { - error(Diagnostics.Duplicate_regular_expression_flag, p, 1); - } - else if (((regExpFlags | flag) & RegularExpressionFlags.UnicodeMode) === RegularExpressionFlags.UnicodeMode) { - error(Diagnostics.The_Unicode_u_flag_and_the_Unicode_Sets_v_flag_cannot_be_set_simultaneously, p, 1); - } - else { - regExpFlags |= flag; - const availableFrom = regExpFlagToFirstAvailableLanguageVersion.get(flag)!; - if (languageVersion < availableFrom) { - error(Diagnostics.This_regular_expression_flag_is_only_available_when_targeting_0_or_later, p, 1, getNameOfScriptTarget(availableFrom)); + const endOfRegExpBody = pos; + if (tokenFlags & TokenFlags.Unterminated) { + // Search for the nearest unbalanced bracket for better recovery. Since the expression is + // invalid anyways, we take nested square brackets into consideration for the best guess. + pos = startOfRegExpBody; + inEscape = false; + let characterClassDepth = 0; + const bracketStack: CharacterCodes[] = []; + while (pos < endOfRegExpBody) { + const ch = text.charCodeAt(pos); + if (inEscape) { + inEscape = false; + } + else if (ch === CharacterCodes.backslash) { + inEscape = true; + } + else if (ch === CharacterCodes.openBracket) { + characterClassDepth++; + } + else if (ch === CharacterCodes.closeBracket && characterClassDepth) { + characterClassDepth--; } + else if (!characterClassDepth) { + if (ch === CharacterCodes.openParen) { + bracketStack.push(CharacterCodes.closeParen); + } + else if (ch === CharacterCodes.openBrace) { + bracketStack.push(CharacterCodes.closeBrace); + } + else if (ch === lastOrUndefined(bracketStack)) { + bracketStack.pop(); + } + else if (ch === CharacterCodes.closeParen || ch === CharacterCodes.closeBracket || ch === CharacterCodes.closeBrace) { + // We encountered an unbalanced bracket outside a character class. Treat this position as the end of regex. + break; + } + } + pos++; } - p++; + // Whitespaces and semicolons at the end are not likely to be part of the regex + while (isWhiteSpaceLike(text.charCodeAt(pos - 1)) || text.charCodeAt(pos - 1) === CharacterCodes.semicolon) pos--; + error(Diagnostics.Unterminated_regular_expression_literal, tokenStart, pos - tokenStart); } - pos = tokenStart + 1; - const saveTokenPos = tokenStart; - const saveTokenFlags = tokenFlags; - scanRegularExpressionWorker(text, endOfBody, regExpFlags, isUnterminated); - if (!isUnterminated) { - pos = p; + else { + // Consume the slash character + pos++; + let regExpFlags = RegularExpressionFlags.None; + while (pos < end) { + const ch = text.charCodeAt(pos); + if (!isIdentifierPart(ch, languageVersion)) { + break; + } + const flag = characterToRegularExpressionFlag(String.fromCharCode(ch)); + if (flag === undefined) { + error(Diagnostics.Unknown_regular_expression_flag, pos, 1); + } + else if (regExpFlags & flag) { + error(Diagnostics.Duplicate_regular_expression_flag, pos, 1); + } + else if (((regExpFlags | flag) & RegularExpressionFlags.UnicodeMode) === RegularExpressionFlags.UnicodeMode) { + error(Diagnostics.The_Unicode_u_flag_and_the_Unicode_Sets_v_flag_cannot_be_set_simultaneously, pos, 1); + } + else { + regExpFlags |= flag; + const availableFrom = regExpFlagToFirstAvailableLanguageVersion.get(flag)!; + if (languageVersion < availableFrom) { + error(Diagnostics.This_regular_expression_flag_is_only_available_when_targeting_0_or_later, pos, 1, getNameOfScriptTarget(availableFrom)); + } + } + pos++; + } + const endOfRegExpFlags = pos; + pos = startOfRegExpBody; + const saveTokenPos = tokenStart; + const saveTokenFlags = tokenFlags; + scanRegularExpressionWorker(text, endOfRegExpBody, regExpFlags); + pos = endOfRegExpFlags; + tokenStart = saveTokenPos; + tokenFlags = saveTokenFlags; } - tokenStart = saveTokenPos; - tokenFlags = saveTokenFlags; tokenValue = text.substring(tokenStart, pos); token = SyntaxKind.RegularExpressionLiteral; } return token; - function scanRegularExpressionWorker(text: string, end: number, regExpFlags: RegularExpressionFlags, isUnterminated: boolean) { + function scanRegularExpressionWorker(text: string, end: number, regExpFlags: RegularExpressionFlags) { /** Grammar parameter */ const unicodeMode = !!(regExpFlags & RegularExpressionFlags.UnicodeMode); /** Grammar parameter */ @@ -2685,10 +2728,6 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean // falls through case CharacterCodes.closeBracket: case CharacterCodes.closeBrace: - if (isUnterminated && !isInGroup) { - // Assume what starting from the character to be outside of the regex - return; - } if (unicodeMode || ch === CharacterCodes.closeParen) { error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, pos, 1, String.fromCharCode(ch)); } diff --git a/src/testRunner/tests.ts b/src/testRunner/tests.ts index c211d221aa4c9..42816c8a890cd 100644 --- a/src/testRunner/tests.ts +++ b/src/testRunner/tests.ts @@ -16,6 +16,7 @@ import "./unittests/paths"; import "./unittests/printer"; import "./unittests/programApi"; import "./unittests/publicApi"; +import "./unittests/regExpParserRecovery"; import "./unittests/reuseProgramStructure"; import "./unittests/semver"; import "./unittests/transform"; diff --git a/src/testRunner/unittests/regExpParserRecovery.ts b/src/testRunner/unittests/regExpParserRecovery.ts new file mode 100644 index 0000000000000..8cf25928d13f7 --- /dev/null +++ b/src/testRunner/unittests/regExpParserRecovery.ts @@ -0,0 +1,81 @@ +import * as ts from "../_namespaces/ts"; + +describe("unittests:: regExpParserRecovery", () => { + const testCases = [ + "/", + "/[]", + "/{}", + "/()", + "/foo", + "/foo[]", + "/foo{}", + "/foo()", + "/[]foo", + "/{}foo", + "/()foo", + "/{[]}", + "/([])", + "/[)}({]", + "/({[)}]})", + "/\\[", + "/\\{", + "/\\(", + "/[\\[]", + "/(\\[)", + "/{\\[}", + "/[\\(]", + "/(\\()", + "/{\\(}", + "/[\\{]", + "/(\\{)", + "/{\\{}", + "/\\{(\\[\\([{])", + "/\\]", + "/\\}", + "/\\)", + "/[\\]]", + "/(\\])", + "/{\\]}", + "/[\\)]", + "/(\\))", + "/{\\)}", + "/[\\}]", + "/(\\})", + "/{\\}}", + "/({[\\]})]})", + ]; + const whiteSpaceSequences = [ + "", + " ", + "\t\v\r\n", + "\u3000\u2028", + ]; + it("stops parsing unterminated regexes at correct position", () => { + ts.forEach(testCases, testCase => { + ts.forEach(whiteSpaceSequences, whiteSpaces => { + const testCaseWithWhiteSpaces = testCase + whiteSpaces; + const sources = [ + `const regex = ${testCaseWithWhiteSpaces};`, + `(${testCaseWithWhiteSpaces});`, + `([${testCaseWithWhiteSpaces}]);`, + `({prop: ${testCaseWithWhiteSpaces}});`, + `({prop: ([(${testCaseWithWhiteSpaces})])});`, + `({[(${testCaseWithWhiteSpaces}).source]: 42});`, + ]; + ts.forEach(sources, source => { + const { parseDiagnostics } = ts.createLanguageServiceSourceFile( + /*fileName*/ "", + ts.ScriptSnapshot.fromString(source), + ts.ScriptTarget.Latest, + /*version*/ "0", + /*setNodeParents*/ false, + ); + const diagnostic = ts.find(parseDiagnostics, ({ code }) => code === ts.Diagnostics.Unterminated_regular_expression_literal.code); + assert(diagnostic, "There should be an 'Unterminated regular expression literal.' error"); + assert.equal(diagnostic.start, source.indexOf("/"), "Diagnostic should start at where the regex starts"); + assert.equal(diagnostic.length, testCase.length, "Diagnostic should end at where the regex ends"); + }); + }); + }); + }); +}); diff --git a/tests/baselines/reference/parser645086_1.errors.txt b/tests/baselines/reference/parser645086_1.errors.txt index 19271f9ae00ae..32871f9fe4cdc 100644 --- a/tests/baselines/reference/parser645086_1.errors.txt +++ b/tests/baselines/reference/parser645086_1.errors.txt @@ -1,13 +1,10 @@ parser645086_1.ts(1,13): error TS1005: ',' expected. parser645086_1.ts(1,14): error TS1134: Variable declaration expected. -parser645086_1.ts(1,15): error TS1161: Unterminated regular expression literal. -==== parser645086_1.ts (3 errors) ==== +==== parser645086_1.ts (2 errors) ==== var v = /[]/]/ ~ !!! error TS1005: ',' expected. ~ -!!! error TS1134: Variable declaration expected. - -!!! error TS1161: Unterminated regular expression literal. \ No newline at end of file +!!! error TS1134: Variable declaration expected. \ No newline at end of file diff --git a/tests/baselines/reference/parser645086_2.errors.txt b/tests/baselines/reference/parser645086_2.errors.txt index dffb43065e05d..7d1f93ee1bb95 100644 --- a/tests/baselines/reference/parser645086_2.errors.txt +++ b/tests/baselines/reference/parser645086_2.errors.txt @@ -1,13 +1,10 @@ parser645086_2.ts(1,14): error TS1005: ',' expected. parser645086_2.ts(1,15): error TS1134: Variable declaration expected. -parser645086_2.ts(1,16): error TS1161: Unterminated regular expression literal. -==== parser645086_2.ts (3 errors) ==== +==== parser645086_2.ts (2 errors) ==== var v = /[^]/]/ ~ !!! error TS1005: ',' expected. ~ -!!! error TS1134: Variable declaration expected. - -!!! error TS1161: Unterminated regular expression literal. \ No newline at end of file +!!! error TS1134: Variable declaration expected. \ No newline at end of file diff --git a/tests/baselines/reference/parserMissingToken2.errors.txt b/tests/baselines/reference/parserMissingToken2.errors.txt index e1f7c4f8fb230..71aab05b9600e 100644 --- a/tests/baselines/reference/parserMissingToken2.errors.txt +++ b/tests/baselines/reference/parserMissingToken2.errors.txt @@ -1,7 +1,7 @@ -parserMissingToken2.ts(1,2): error TS1161: Unterminated regular expression literal. +parserMissingToken2.ts(1,1): error TS1161: Unterminated regular expression literal. ==== parserMissingToken2.ts (1 errors) ==== / b; - + ~~~ !!! error TS1161: Unterminated regular expression literal. \ No newline at end of file diff --git a/tests/baselines/reference/parserMissingToken2.js b/tests/baselines/reference/parserMissingToken2.js index c4c6a4220d0ba..1faf9ad4971f9 100644 --- a/tests/baselines/reference/parserMissingToken2.js +++ b/tests/baselines/reference/parserMissingToken2.js @@ -4,4 +4,4 @@ / b; //// [parserMissingToken2.js] -/ b;; +/ b; diff --git a/tests/baselines/reference/parserMissingToken2.types b/tests/baselines/reference/parserMissingToken2.types index 88a03e78264e0..ef9428be503a2 100644 --- a/tests/baselines/reference/parserMissingToken2.types +++ b/tests/baselines/reference/parserMissingToken2.types @@ -2,6 +2,6 @@ === parserMissingToken2.ts === / b; ->/ b; : RegExp -> : ^^^^^^ +>/ b : RegExp +> : ^^^^^^ diff --git a/tests/baselines/reference/parserRegularExpressionDivideAmbiguity4.errors.txt b/tests/baselines/reference/parserRegularExpressionDivideAmbiguity4.errors.txt index 71688b6975608..afa9b3e5e4577 100644 --- a/tests/baselines/reference/parserRegularExpressionDivideAmbiguity4.errors.txt +++ b/tests/baselines/reference/parserRegularExpressionDivideAmbiguity4.errors.txt @@ -1,10 +1,10 @@ parserRegularExpressionDivideAmbiguity4.ts(1,1): error TS2304: Cannot find name 'foo'. -parserRegularExpressionDivideAmbiguity4.ts(1,6): error TS1161: Unterminated regular expression literal. +parserRegularExpressionDivideAmbiguity4.ts(1,5): error TS1161: Unterminated regular expression literal. ==== parserRegularExpressionDivideAmbiguity4.ts (2 errors) ==== foo(/notregexp); ~~~ !!! error TS2304: Cannot find name 'foo'. - + ~~~~~~~~~~ !!! error TS1161: Unterminated regular expression literal. \ No newline at end of file diff --git a/tests/baselines/reference/tsxAttributeInvalidNames.errors.txt b/tests/baselines/reference/tsxAttributeInvalidNames.errors.txt index a4c162ac25d3d..339091ece5b38 100644 --- a/tests/baselines/reference/tsxAttributeInvalidNames.errors.txt +++ b/tests/baselines/reference/tsxAttributeInvalidNames.errors.txt @@ -10,7 +10,7 @@ file.tsx(11,1): error TS2362: The left-hand side of an arithmetic operation must file.tsx(11,8): error TS1003: Identifier expected. file.tsx(11,9): error TS2304: Cannot find name 'data'. file.tsx(11,13): error TS1005: ';' expected. -file.tsx(11,20): error TS1161: Unterminated regular expression literal. +file.tsx(11,19): error TS1161: Unterminated regular expression literal. ==== file.tsx (13 errors) ==== @@ -49,5 +49,5 @@ file.tsx(11,20): error TS1161: Unterminated regular expression literal. !!! error TS2304: Cannot find name 'data'. ~ !!! error TS1005: ';' expected. - + ~~ !!! error TS1161: Unterminated regular expression literal. \ No newline at end of file diff --git a/tests/baselines/reference/tsxAttributeInvalidNames.js b/tests/baselines/reference/tsxAttributeInvalidNames.js index 3e23b1986fec7..abaf80c3c462d 100644 --- a/tests/baselines/reference/tsxAttributeInvalidNames.js +++ b/tests/baselines/reference/tsxAttributeInvalidNames.js @@ -22,4 +22,4 @@ data = { 32: } / > ; { 32; } -/>;; +/>; diff --git a/tests/baselines/reference/tsxAttributeInvalidNames.types b/tests/baselines/reference/tsxAttributeInvalidNames.types index a90b7267d3b12..56531b6f75662 100644 --- a/tests/baselines/reference/tsxAttributeInvalidNames.types +++ b/tests/baselines/reference/tsxAttributeInvalidNames.types @@ -56,6 +56,6 @@ declare module JSX { > : ^^^ >32 : 32 > : ^^ ->/>; : RegExp -> : ^^^^^^ +>/> : RegExp +> : ^^^^^^ diff --git a/tests/baselines/reference/unterminatedRegexAtEndOfSource1.errors.txt b/tests/baselines/reference/unterminatedRegexAtEndOfSource1.errors.txt index 9050b11866bfb..0e291c50928c5 100644 --- a/tests/baselines/reference/unterminatedRegexAtEndOfSource1.errors.txt +++ b/tests/baselines/reference/unterminatedRegexAtEndOfSource1.errors.txt @@ -1,7 +1,7 @@ -unterminatedRegexAtEndOfSource1.ts(1,10): error TS1161: Unterminated regular expression literal. +unterminatedRegexAtEndOfSource1.ts(1,9): error TS1161: Unterminated regular expression literal. ==== unterminatedRegexAtEndOfSource1.ts (1 errors) ==== var a = / - + ~ !!! error TS1161: Unterminated regular expression literal. \ No newline at end of file diff --git a/tests/cases/fourslash/whiteSpaceTrimming4.ts b/tests/cases/fourslash/whiteSpaceTrimming4.ts index 59223197681ce..6989dfb907e3e 100644 --- a/tests/cases/fourslash/whiteSpaceTrimming4.ts +++ b/tests/cases/fourslash/whiteSpaceTrimming4.ts @@ -5,4 +5,4 @@ goTo.marker('1'); edit.insert("\n"); -verify.currentFileContentIs("var re = /\\w+ \n /;"); +verify.currentFileContentIs("var re = /\\w+\n /;"); From a7621e0c2d972395c104cd28f8630ccc8cccf25f Mon Sep 17 00:00:00 2001 From: graphemecluster Date: Fri, 26 Apr 2024 06:26:03 +0800 Subject: [PATCH 2/8] Unrevert the change to incremental parser test This is now recovered. --- src/testRunner/unittests/incrementalParser.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/testRunner/unittests/incrementalParser.ts b/src/testRunner/unittests/incrementalParser.ts index a42044d0a67db..52c3f16e0c01a 100644 --- a/src/testRunner/unittests/incrementalParser.ts +++ b/src/testRunner/unittests/incrementalParser.ts @@ -160,7 +160,7 @@ describe("unittests:: Incremental Parser", () => { const oldText = ts.ScriptSnapshot.fromString(source); const newTextAndChange = withInsert(oldText, semicolonIndex, "/"); - compareTrees(oldText, newTextAndChange.text, newTextAndChange.textChangeRange, 0); + compareTrees(oldText, newTextAndChange.text, newTextAndChange.textChangeRange, 4); }); it("Regular expression 2", () => { From f1aa06fdbabdeb2fbae33dd3391c3722ceb2910f Mon Sep 17 00:00:00 2001 From: graphemecluster Date: Fri, 26 Apr 2024 06:26:13 +0800 Subject: [PATCH 3/8] Revert the change to `parseErrorAtPosition` in `parser.ts` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The change is no longer necessary since itโ€™s moved to `checker.ts` in #58295. The partially reverts https://github.com/microsoft/TypeScript/pull/55600/commits/1a5228d20452db775930201826fe8b46409a1097. --- src/compiler/parser.ts | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/compiler/parser.ts b/src/compiler/parser.ts index 80c63bbe58964..da1e09dfb61b7 100644 --- a/src/compiler/parser.ts +++ b/src/compiler/parser.ts @@ -62,7 +62,6 @@ import { DeleteExpression, Diagnostic, DiagnosticArguments, - DiagnosticCategory, DiagnosticMessage, Diagnostics, DiagnosticWithDetachedLocation, @@ -2144,11 +2143,7 @@ namespace Parser { // Don't report another error if it would just be at the same position as the last error. const lastError = lastOrUndefined(parseDiagnostics); let result: DiagnosticWithDetachedLocation | undefined; - if (message.category === DiagnosticCategory.Message && lastError && start === lastError.start && length === lastError.length) { - result = createDetachedDiagnostic(fileName, sourceText, start, length, message, ...args); - addRelatedInfo(lastError, result); - } - else if (!lastError || start !== lastError.start) { + if (!lastError || start !== lastError.start) { result = createDetachedDiagnostic(fileName, sourceText, start, length, message, ...args); parseDiagnostics.push(result); } From 978e1959ce163203746b360b83d933a2043f8e10 Mon Sep 17 00:00:00 2001 From: graphemecluster Date: Fri, 26 Apr 2024 06:26:35 +0800 Subject: [PATCH 4/8] =?UTF-8?q?I=E2=80=99m=20very=20wrong?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/compiler/scanner.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compiler/scanner.ts b/src/compiler/scanner.ts index a15d76fd30665..7cf1918866945 100644 --- a/src/compiler/scanner.ts +++ b/src/compiler/scanner.ts @@ -2542,7 +2542,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean /** @see {scanClassSetExpression} */ let mayContainStrings = false; - /** The number of numeric (anonymous) capturing groups defined in the regex. */ + /** The number of all (named and unnamed) capturing groups defined in the regex. */ let numberOfCapturingGroups = 0; /** All named capturing groups defined in the regex. */ const groupSpecifiers = new Set(); From 76acd92e394bbaec9c2e7583121a44974022947d Mon Sep 17 00:00:00 2001 From: graphemecluster Date: Sat, 27 Apr 2024 14:17:55 +0800 Subject: [PATCH 5/8] Use `scanRange` to prevent variable shadowing --- src/compiler/scanner.ts | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/src/compiler/scanner.ts b/src/compiler/scanner.ts index e78cc161ddd07..5ae766dab8699 100644 --- a/src/compiler/scanner.ts +++ b/src/compiler/scanner.ts @@ -2508,14 +2508,9 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean pos++; } if (reportErrors) { - const endOfRegExpFlags = pos; - pos = startOfRegExpBody; - const saveTokenPos = tokenStart; - const saveTokenFlags = tokenFlags; - scanRegularExpressionWorker(text, endOfRegExpBody, regExpFlags, /*annexB*/ true); - pos = endOfRegExpFlags; - tokenStart = saveTokenPos; - tokenFlags = saveTokenFlags; + scanRange(startOfRegExpBody, endOfRegExpBody - startOfRegExpBody, () => { + scanRegularExpressionWorker(regExpFlags, /*annexB*/ true); + }); } } tokenValue = text.substring(tokenStart, pos); @@ -2523,7 +2518,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } return token; - function scanRegularExpressionWorker(text: string, end: number, regExpFlags: RegularExpressionFlags, annexB: boolean) { + function scanRegularExpressionWorker(regExpFlags: RegularExpressionFlags, annexB: boolean) { /** Grammar parameter */ const unicodeMode = !!(regExpFlags & RegularExpressionFlags.UnicodeMode); /** Grammar parameter */ From e67692acb3fdd068b4e577dc9ad9fa350f2e4ca8 Mon Sep 17 00:00:00 2001 From: graphemecluster Date: Sat, 27 Apr 2024 15:12:43 +0800 Subject: [PATCH 6/8] Correct flags scanning for non-BMP characters --- src/compiler/scanner.ts | 32 ++++++++++--------- ...egularExpressionWithNonBMPFlags.errors.txt | 23 +++++++++++++ .../regularExpressionWithNonBMPFlags.js | 8 +++++ .../regularExpressionWithNonBMPFlags.symbols | 6 ++++ .../regularExpressionWithNonBMPFlags.types | 9 ++++++ .../regularExpressionWithNonBMPFlags.ts | 3 ++ 6 files changed, 66 insertions(+), 15 deletions(-) create mode 100644 tests/baselines/reference/regularExpressionWithNonBMPFlags.errors.txt create mode 100644 tests/baselines/reference/regularExpressionWithNonBMPFlags.js create mode 100644 tests/baselines/reference/regularExpressionWithNonBMPFlags.symbols create mode 100644 tests/baselines/reference/regularExpressionWithNonBMPFlags.types create mode 100644 tests/cases/compiler/regularExpressionWithNonBMPFlags.ts diff --git a/src/compiler/scanner.ts b/src/compiler/scanner.ts index 5ae766dab8699..d85c7c9ab0d78 100644 --- a/src/compiler/scanner.ts +++ b/src/compiler/scanner.ts @@ -2485,27 +2485,28 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean pos++; let regExpFlags = RegularExpressionFlags.None; while (pos < end) { - const ch = text.charCodeAt(pos); + const ch = codePointAt(text, pos); if (!isIdentifierPart(ch, languageVersion)) { break; } + const size = charSize(ch); if (reportErrors) { - const flag = characterToRegularExpressionFlag(String.fromCharCode(ch)); + const flag = characterToRegularExpressionFlag(utf16EncodeAsString(ch)); if (flag === undefined) { - error(Diagnostics.Unknown_regular_expression_flag, pos, 1); + error(Diagnostics.Unknown_regular_expression_flag, pos, size); } else if (regExpFlags & flag) { - error(Diagnostics.Duplicate_regular_expression_flag, pos, 1); + error(Diagnostics.Duplicate_regular_expression_flag, pos, size); } else if (((regExpFlags | flag) & RegularExpressionFlags.UnicodeMode) === RegularExpressionFlags.UnicodeMode) { - error(Diagnostics.The_Unicode_u_flag_and_the_Unicode_Sets_v_flag_cannot_be_set_simultaneously, pos, 1); + error(Diagnostics.The_Unicode_u_flag_and_the_Unicode_Sets_v_flag_cannot_be_set_simultaneously, pos, size); } else { regExpFlags |= flag; - checkRegularExpressionFlagAvailable(flag); + checkRegularExpressionFlagAvailability(flag); } } - pos++; + pos += size; } if (reportErrors) { scanRange(startOfRegExpBody, endOfRegExpBody - startOfRegExpBody, () => { @@ -2752,25 +2753,26 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean function scanPatternModifiers(currFlags: RegularExpressionFlags): RegularExpressionFlags { while (pos < end) { - const ch = text.charCodeAt(pos); + const ch = codePointAt(text, pos); if (!isIdentifierPart(ch, languageVersion)) { break; } - const flag = characterToRegularExpressionFlag(String.fromCharCode(ch)); + const size = charSize(ch); + const flag = characterToRegularExpressionFlag(utf16EncodeAsString(ch)); if (flag === undefined) { - error(Diagnostics.Unknown_regular_expression_flag, pos, 1); + error(Diagnostics.Unknown_regular_expression_flag, pos, size); } else if (currFlags & flag) { - error(Diagnostics.Duplicate_regular_expression_flag, pos, 1); + error(Diagnostics.Duplicate_regular_expression_flag, pos, size); } else if (!(flag & RegularExpressionFlags.Modifiers)) { - error(Diagnostics.This_regular_expression_flag_cannot_be_toggled_within_a_subpattern, pos, 1); + error(Diagnostics.This_regular_expression_flag_cannot_be_toggled_within_a_subpattern, pos, size); } else { currFlags |= flag; - checkRegularExpressionFlagAvailable(flag); + checkRegularExpressionFlagAvailability(flag); } - pos++; + pos += size; } return currFlags; } @@ -3470,7 +3472,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean }); } - function checkRegularExpressionFlagAvailable(flag: RegularExpressionFlags) { + function checkRegularExpressionFlagAvailability(flag: RegularExpressionFlags) { const availableFrom = regExpFlagToFirstAvailableLanguageVersion.get(flag) as ScriptTarget | undefined; if (availableFrom && languageVersion < availableFrom) { error(Diagnostics.This_regular_expression_flag_is_only_available_when_targeting_0_or_later, pos, 1, getNameOfScriptTarget(availableFrom)); diff --git a/tests/baselines/reference/regularExpressionWithNonBMPFlags.errors.txt b/tests/baselines/reference/regularExpressionWithNonBMPFlags.errors.txt new file mode 100644 index 0000000000000..b91d0d9c12fd1 --- /dev/null +++ b/tests/baselines/reference/regularExpressionWithNonBMPFlags.errors.txt @@ -0,0 +1,23 @@ +regularExpressionWithNonBMPFlags.ts(1,23): error TS1499: Unknown regular expression flag. +regularExpressionWithNonBMPFlags.ts(1,25): error TS1499: Unknown regular expression flag. +regularExpressionWithNonBMPFlags.ts(1,28): error TS1499: Unknown regular expression flag. +regularExpressionWithNonBMPFlags.ts(1,41): error TS1499: Unknown regular expression flag. +regularExpressionWithNonBMPFlags.ts(1,43): error TS1499: Unknown regular expression flag. +regularExpressionWithNonBMPFlags.ts(1,45): error TS1499: Unknown regular expression flag. + + +==== regularExpressionWithNonBMPFlags.ts (6 errors) ==== + const ๐˜ณ๐˜ฆ๐˜จ๐˜ฆ๐˜น = /(?๐˜ด๐˜ช-๐˜ฎ:^๐˜ง๐˜ฐ๐˜ฐ.)/๐˜จ๐˜ฎ๐˜ถ; + ~~ +!!! error TS1499: Unknown regular expression flag. + ~~ +!!! error TS1499: Unknown regular expression flag. + ~~ +!!! error TS1499: Unknown regular expression flag. + ~~ +!!! error TS1499: Unknown regular expression flag. + ~~ +!!! error TS1499: Unknown regular expression flag. + ~~ +!!! error TS1499: Unknown regular expression flag. + \ No newline at end of file diff --git a/tests/baselines/reference/regularExpressionWithNonBMPFlags.js b/tests/baselines/reference/regularExpressionWithNonBMPFlags.js new file mode 100644 index 0000000000000..847b74684b459 --- /dev/null +++ b/tests/baselines/reference/regularExpressionWithNonBMPFlags.js @@ -0,0 +1,8 @@ +//// [tests/cases/compiler/regularExpressionWithNonBMPFlags.ts] //// + +//// [regularExpressionWithNonBMPFlags.ts] +const ๐˜ณ๐˜ฆ๐˜จ๐˜ฆ๐˜น = /(?๐˜ด๐˜ช-๐˜ฎ:^๐˜ง๐˜ฐ๐˜ฐ.)/๐˜จ๐˜ฎ๐˜ถ; + + +//// [regularExpressionWithNonBMPFlags.js] +const ๐˜ณ๐˜ฆ๐˜จ๐˜ฆ๐˜น = /(?๐˜ด๐˜ช-๐˜ฎ:^๐˜ง๐˜ฐ๐˜ฐ.)/๐˜จ๐˜ฎ๐˜ถ; diff --git a/tests/baselines/reference/regularExpressionWithNonBMPFlags.symbols b/tests/baselines/reference/regularExpressionWithNonBMPFlags.symbols new file mode 100644 index 0000000000000..29c7a53335550 --- /dev/null +++ b/tests/baselines/reference/regularExpressionWithNonBMPFlags.symbols @@ -0,0 +1,6 @@ +//// [tests/cases/compiler/regularExpressionWithNonBMPFlags.ts] //// + +=== regularExpressionWithNonBMPFlags.ts === +const ๐˜ณ๐˜ฆ๐˜จ๐˜ฆ๐˜น = /(?๐˜ด๐˜ช-๐˜ฎ:^๐˜ง๐˜ฐ๐˜ฐ.)/๐˜จ๐˜ฎ๐˜ถ; +>๐˜ณ๐˜ฆ๐˜จ๐˜ฆ๐˜น : Symbol(๐˜ณ๐˜ฆ๐˜จ๐˜ฆ๐˜น, Decl(regularExpressionWithNonBMPFlags.ts, 0, 5)) + diff --git a/tests/baselines/reference/regularExpressionWithNonBMPFlags.types b/tests/baselines/reference/regularExpressionWithNonBMPFlags.types new file mode 100644 index 0000000000000..5f385d608df02 --- /dev/null +++ b/tests/baselines/reference/regularExpressionWithNonBMPFlags.types @@ -0,0 +1,9 @@ +//// [tests/cases/compiler/regularExpressionWithNonBMPFlags.ts] //// + +=== regularExpressionWithNonBMPFlags.ts === +const ๐˜ณ๐˜ฆ๐˜จ๐˜ฆ๐˜น = /(?๐˜ด๐˜ช-๐˜ฎ:^๐˜ง๐˜ฐ๐˜ฐ.)/๐˜จ๐˜ฎ๐˜ถ; +>๐˜ณ๐˜ฆ๐˜จ๐˜ฆ๐˜น : RegExp +> : ^^^^^^ +>/(?๐˜ด๐˜ช-๐˜ฎ:^๐˜ง๐˜ฐ๐˜ฐ.)/๐˜จ๐˜ฎ๐˜ถ : RegExp +> : ^^^^^^ + diff --git a/tests/cases/compiler/regularExpressionWithNonBMPFlags.ts b/tests/cases/compiler/regularExpressionWithNonBMPFlags.ts new file mode 100644 index 0000000000000..65eaca6d9cff8 --- /dev/null +++ b/tests/cases/compiler/regularExpressionWithNonBMPFlags.ts @@ -0,0 +1,3 @@ +// @target: esnext + +const ๐˜ณ๐˜ฆ๐˜จ๐˜ฆ๐˜น = /(?๐˜ด๐˜ช-๐˜ฎ:^๐˜ง๐˜ฐ๐˜ฐ.)/๐˜จ๐˜ฎ๐˜ถ; From 11ad5f5ecf81bddc7dbae21618542aa0989e6051 Mon Sep 17 00:00:00 2001 From: graphemecluster Date: Wed, 22 May 2024 07:57:42 +0800 Subject: [PATCH 7/8] Revert "Correct flags scanning for non-BMP characters" This reverts commit e67692acb3fdd068b4e577dc9ad9fa350f2e4ca8. --- src/compiler/scanner.ts | 32 +++++++++---------- ...egularExpressionWithNonBMPFlags.errors.txt | 23 ------------- .../regularExpressionWithNonBMPFlags.js | 8 ----- .../regularExpressionWithNonBMPFlags.symbols | 6 ---- .../regularExpressionWithNonBMPFlags.types | 9 ------ .../regularExpressionWithNonBMPFlags.ts | 3 -- 6 files changed, 15 insertions(+), 66 deletions(-) delete mode 100644 tests/baselines/reference/regularExpressionWithNonBMPFlags.errors.txt delete mode 100644 tests/baselines/reference/regularExpressionWithNonBMPFlags.js delete mode 100644 tests/baselines/reference/regularExpressionWithNonBMPFlags.symbols delete mode 100644 tests/baselines/reference/regularExpressionWithNonBMPFlags.types delete mode 100644 tests/cases/compiler/regularExpressionWithNonBMPFlags.ts diff --git a/src/compiler/scanner.ts b/src/compiler/scanner.ts index 68e8262f6e586..f3bc41641f64c 100644 --- a/src/compiler/scanner.ts +++ b/src/compiler/scanner.ts @@ -2522,24 +2522,23 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean if (!isIdentifierPart(ch, languageVersion)) { break; } - const size = charSize(ch); if (reportErrors) { - const flag = characterToRegularExpressionFlag(utf16EncodeAsString(ch)); + const flag = characterToRegularExpressionFlag(String.fromCharCode(ch)); if (flag === undefined) { - error(Diagnostics.Unknown_regular_expression_flag, pos, size); + error(Diagnostics.Unknown_regular_expression_flag, pos, 1); } else if (regExpFlags & flag) { - error(Diagnostics.Duplicate_regular_expression_flag, pos, size); + error(Diagnostics.Duplicate_regular_expression_flag, pos, 1); } else if (((regExpFlags | flag) & RegularExpressionFlags.UnicodeMode) === RegularExpressionFlags.UnicodeMode) { - error(Diagnostics.The_Unicode_u_flag_and_the_Unicode_Sets_v_flag_cannot_be_set_simultaneously, pos, size); + error(Diagnostics.The_Unicode_u_flag_and_the_Unicode_Sets_v_flag_cannot_be_set_simultaneously, pos, 1); } else { regExpFlags |= flag; - checkRegularExpressionFlagAvailability(flag, size); + checkRegularExpressionFlagAvailable(flag, pos); } } - pos += size; + pos++; } if (reportErrors) { scanRange(startOfRegExpBody, endOfRegExpBody - startOfRegExpBody, () => { @@ -2795,26 +2794,25 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean function scanPatternModifiers(currFlags: RegularExpressionFlags): RegularExpressionFlags { while (true) { - const ch = codePointChecked(pos); + const ch = charCodeChecked(pos); if (ch === CharacterCodes.EOF || !isIdentifierPart(ch, languageVersion)) { break; } - const size = charSize(ch); - const flag = characterToRegularExpressionFlag(utf16EncodeAsString(ch)); + const flag = characterToRegularExpressionFlag(String.fromCharCode(ch)); if (flag === undefined) { - error(Diagnostics.Unknown_regular_expression_flag, pos, size); + error(Diagnostics.Unknown_regular_expression_flag, pos, 1); } else if (currFlags & flag) { - error(Diagnostics.Duplicate_regular_expression_flag, pos, size); + error(Diagnostics.Duplicate_regular_expression_flag, pos, 1); } else if (!(flag & RegularExpressionFlags.Modifiers)) { - error(Diagnostics.This_regular_expression_flag_cannot_be_toggled_within_a_subpattern, pos, size); + error(Diagnostics.This_regular_expression_flag_cannot_be_toggled_within_a_subpattern, pos, 1); } else { currFlags |= flag; - checkRegularExpressionFlagAvailability(flag, size); + checkRegularExpressionFlagAvailable(flag, pos); } - pos += size; + pos++; } return currFlags; } @@ -3527,10 +3525,10 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean }); } - function checkRegularExpressionFlagAvailability(flag: RegularExpressionFlags, size: number) { + function checkRegularExpressionFlagAvailable(flag: RegularExpressionFlags, pos: number) { const availableFrom = regExpFlagToFirstAvailableLanguageVersion.get(flag) as ScriptTarget | undefined; if (availableFrom && languageVersion < availableFrom) { - error(Diagnostics.This_regular_expression_flag_is_only_available_when_targeting_0_or_later, pos, size, getNameOfScriptTarget(availableFrom)); + error(Diagnostics.This_regular_expression_flag_is_only_available_when_targeting_0_or_later, pos, 1, getNameOfScriptTarget(availableFrom)); } } diff --git a/tests/baselines/reference/regularExpressionWithNonBMPFlags.errors.txt b/tests/baselines/reference/regularExpressionWithNonBMPFlags.errors.txt deleted file mode 100644 index b91d0d9c12fd1..0000000000000 --- a/tests/baselines/reference/regularExpressionWithNonBMPFlags.errors.txt +++ /dev/null @@ -1,23 +0,0 @@ -regularExpressionWithNonBMPFlags.ts(1,23): error TS1499: Unknown regular expression flag. -regularExpressionWithNonBMPFlags.ts(1,25): error TS1499: Unknown regular expression flag. -regularExpressionWithNonBMPFlags.ts(1,28): error TS1499: Unknown regular expression flag. -regularExpressionWithNonBMPFlags.ts(1,41): error TS1499: Unknown regular expression flag. -regularExpressionWithNonBMPFlags.ts(1,43): error TS1499: Unknown regular expression flag. -regularExpressionWithNonBMPFlags.ts(1,45): error TS1499: Unknown regular expression flag. - - -==== regularExpressionWithNonBMPFlags.ts (6 errors) ==== - const ๐˜ณ๐˜ฆ๐˜จ๐˜ฆ๐˜น = /(?๐˜ด๐˜ช-๐˜ฎ:^๐˜ง๐˜ฐ๐˜ฐ.)/๐˜จ๐˜ฎ๐˜ถ; - ~~ -!!! error TS1499: Unknown regular expression flag. - ~~ -!!! error TS1499: Unknown regular expression flag. - ~~ -!!! error TS1499: Unknown regular expression flag. - ~~ -!!! error TS1499: Unknown regular expression flag. - ~~ -!!! error TS1499: Unknown regular expression flag. - ~~ -!!! error TS1499: Unknown regular expression flag. - \ No newline at end of file diff --git a/tests/baselines/reference/regularExpressionWithNonBMPFlags.js b/tests/baselines/reference/regularExpressionWithNonBMPFlags.js deleted file mode 100644 index 847b74684b459..0000000000000 --- a/tests/baselines/reference/regularExpressionWithNonBMPFlags.js +++ /dev/null @@ -1,8 +0,0 @@ -//// [tests/cases/compiler/regularExpressionWithNonBMPFlags.ts] //// - -//// [regularExpressionWithNonBMPFlags.ts] -const ๐˜ณ๐˜ฆ๐˜จ๐˜ฆ๐˜น = /(?๐˜ด๐˜ช-๐˜ฎ:^๐˜ง๐˜ฐ๐˜ฐ.)/๐˜จ๐˜ฎ๐˜ถ; - - -//// [regularExpressionWithNonBMPFlags.js] -const ๐˜ณ๐˜ฆ๐˜จ๐˜ฆ๐˜น = /(?๐˜ด๐˜ช-๐˜ฎ:^๐˜ง๐˜ฐ๐˜ฐ.)/๐˜จ๐˜ฎ๐˜ถ; diff --git a/tests/baselines/reference/regularExpressionWithNonBMPFlags.symbols b/tests/baselines/reference/regularExpressionWithNonBMPFlags.symbols deleted file mode 100644 index 29c7a53335550..0000000000000 --- a/tests/baselines/reference/regularExpressionWithNonBMPFlags.symbols +++ /dev/null @@ -1,6 +0,0 @@ -//// [tests/cases/compiler/regularExpressionWithNonBMPFlags.ts] //// - -=== regularExpressionWithNonBMPFlags.ts === -const ๐˜ณ๐˜ฆ๐˜จ๐˜ฆ๐˜น = /(?๐˜ด๐˜ช-๐˜ฎ:^๐˜ง๐˜ฐ๐˜ฐ.)/๐˜จ๐˜ฎ๐˜ถ; ->๐˜ณ๐˜ฆ๐˜จ๐˜ฆ๐˜น : Symbol(๐˜ณ๐˜ฆ๐˜จ๐˜ฆ๐˜น, Decl(regularExpressionWithNonBMPFlags.ts, 0, 5)) - diff --git a/tests/baselines/reference/regularExpressionWithNonBMPFlags.types b/tests/baselines/reference/regularExpressionWithNonBMPFlags.types deleted file mode 100644 index 5f385d608df02..0000000000000 --- a/tests/baselines/reference/regularExpressionWithNonBMPFlags.types +++ /dev/null @@ -1,9 +0,0 @@ -//// [tests/cases/compiler/regularExpressionWithNonBMPFlags.ts] //// - -=== regularExpressionWithNonBMPFlags.ts === -const ๐˜ณ๐˜ฆ๐˜จ๐˜ฆ๐˜น = /(?๐˜ด๐˜ช-๐˜ฎ:^๐˜ง๐˜ฐ๐˜ฐ.)/๐˜จ๐˜ฎ๐˜ถ; ->๐˜ณ๐˜ฆ๐˜จ๐˜ฆ๐˜น : RegExp -> : ^^^^^^ ->/(?๐˜ด๐˜ช-๐˜ฎ:^๐˜ง๐˜ฐ๐˜ฐ.)/๐˜จ๐˜ฎ๐˜ถ : RegExp -> : ^^^^^^ - diff --git a/tests/cases/compiler/regularExpressionWithNonBMPFlags.ts b/tests/cases/compiler/regularExpressionWithNonBMPFlags.ts deleted file mode 100644 index 65eaca6d9cff8..0000000000000 --- a/tests/cases/compiler/regularExpressionWithNonBMPFlags.ts +++ /dev/null @@ -1,3 +0,0 @@ -// @target: esnext - -const ๐˜ณ๐˜ฆ๐˜จ๐˜ฆ๐˜น = /(?๐˜ด๐˜ช-๐˜ฎ:^๐˜ง๐˜ฐ๐˜ฐ.)/๐˜จ๐˜ฎ๐˜ถ; From 77235de0afea6bb273b0bb2a70f72a63efbf1e06 Mon Sep 17 00:00:00 2001 From: graphemecluster Date: Wed, 22 May 2024 10:44:28 +0800 Subject: [PATCH 8/8] Apply Suggested Changes --- src/compiler/checker.ts | 2 +- src/compiler/scanner.ts | 79 +++++++++---------- src/compiler/types.ts | 22 +++--- src/testRunner/tests.ts | 2 +- ...erRecovery.ts => regExpScannerRecovery.ts} | 42 +++++----- 5 files changed, 73 insertions(+), 74 deletions(-) rename src/testRunner/unittests/{regExpParserRecovery.ts => regExpScannerRecovery.ts} (63%) diff --git a/src/compiler/checker.ts b/src/compiler/checker.ts index bb73721914db8..997a53eb6346d 100644 --- a/src/compiler/checker.ts +++ b/src/compiler/checker.ts @@ -31885,7 +31885,7 @@ export function createTypeChecker(host: TypeCheckerHost): TypeChecker { scanner.setScriptTarget(sourceFile.languageVersion); scanner.setLanguageVariant(sourceFile.languageVariant); scanner.setOnError((message, length, arg0) => { - // emulate `parseErrorAtPosition` from parser.ts + // For providing spelling suggestions const start = scanner!.getTokenEnd(); if (message.category === DiagnosticCategory.Message && lastError && start === lastError.start && length === lastError.length) { const error = createDetachedDiagnostic(sourceFile.fileName, sourceFile.text, start, length, message, arg0); diff --git a/src/compiler/scanner.ts b/src/compiler/scanner.ts index f3bc41641f64c..1279d5a8b2e84 100644 --- a/src/compiler/scanner.ts +++ b/src/compiler/scanner.ts @@ -21,7 +21,6 @@ import { KeywordSyntaxKind, LanguageFeatureMinimumTarget, LanguageVariant, - lastOrUndefined, LineAndCharacter, MapLike, parsePseudoBigInt, @@ -1614,7 +1613,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean isRegularExpression && shouldEmitInvalidEscapeError && escapedValue >= 0xD800 && escapedValue <= 0xDBFF && pos + 6 < end && text.substring(pos, pos + 2) === "\\u" && charCodeUnchecked(pos + 2) !== CharacterCodes.openBrace ) { - // For regular expressions in Unicode mode, \u HexLeadSurrogate \u HexTrailSurrogate is treated as a single character + // For regular expressions in any Unicode mode, \u HexLeadSurrogate \u HexTrailSurrogate is treated as a single character // for the purpose of determining whether a character class range is out of order // https://tc39.es/ecma262/#prod-RegExpUnicodeEscapeSequence const nextStart = pos; @@ -2429,7 +2428,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean pos = startOfRegExpBody; let inEscape = false; // Although nested character classes are allowed in Unicode Sets mode, - // an unescaped slash is nevertheless invalid even in a character class in Unicode mode. + // an unescaped slash is nevertheless invalid even in a character class in any Unicode mode. // Additionally, parsing nested character classes will misinterpret regexes like `/[[]/` // as unterminated, consuming characters beyond the slash. (This even applies to `/[[]/v`, // which should be parsed as a well-terminated regex with an incomplete character class.) @@ -2438,13 +2437,8 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean while (true) { // If we reach the end of a file, or hit a newline, then this is an unterminated // regex. Report error and return what we have so far. - if (pos >= end) { - tokenFlags |= TokenFlags.Unterminated; - break; - } - - const ch = charCodeUnchecked(pos); - if (isLineBreak(ch)) { + const ch = charCodeChecked(pos); + if (ch === CharacterCodes.EOF || isLineBreak(ch)) { tokenFlags |= TokenFlags.Unterminated; break; } @@ -2477,7 +2471,8 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean pos = startOfRegExpBody; inEscape = false; let characterClassDepth = 0; - const bracketStack: CharacterCodes[] = []; + let inDecimalQuantifier = false; + let groupDepth = 0; while (pos < endOfRegExpBody) { const ch = charCodeUnchecked(pos); if (inEscape) { @@ -2493,18 +2488,23 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean characterClassDepth--; } else if (!characterClassDepth) { - if (ch === CharacterCodes.openParen) { - bracketStack.push(CharacterCodes.closeParen); + if (ch === CharacterCodes.openBrace) { + inDecimalQuantifier = true; } - else if (ch === CharacterCodes.openBrace) { - bracketStack.push(CharacterCodes.closeBrace); + else if (ch === CharacterCodes.closeBrace && inDecimalQuantifier) { + inDecimalQuantifier = false; } - else if (ch === lastOrUndefined(bracketStack)) { - bracketStack.pop(); - } - else if (ch === CharacterCodes.closeParen || ch === CharacterCodes.closeBracket || ch === CharacterCodes.closeBrace) { - // We encountered an unbalanced bracket outside a character class. Treat this position as the end of regex. - break; + else if (!inDecimalQuantifier) { + if (ch === CharacterCodes.openParen) { + groupDepth++; + } + else if (ch === CharacterCodes.closeParen && groupDepth) { + groupDepth--; + } + else if (ch === CharacterCodes.closeParen || ch === CharacterCodes.closeBracket || ch === CharacterCodes.closeBrace) { + // We encountered an unbalanced bracket outside a character class. Treat this position as the end of regex. + break; + } } } pos++; @@ -2517,9 +2517,9 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean // Consume the slash character pos++; let regExpFlags = RegularExpressionFlags.None; - while (pos < end) { - const ch = codePointUnchecked(pos); - if (!isIdentifierPart(ch, languageVersion)) { + while (true) { + const ch = codePointChecked(pos); + if (ch === CharacterCodes.EOF || !isIdentifierPart(ch, languageVersion)) { break; } if (reportErrors) { @@ -2530,7 +2530,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean else if (regExpFlags & flag) { error(Diagnostics.Duplicate_regular_expression_flag, pos, 1); } - else if (((regExpFlags | flag) & RegularExpressionFlags.UnicodeMode) === RegularExpressionFlags.UnicodeMode) { + else if (((regExpFlags | flag) & RegularExpressionFlags.AnyUnicodeMode) === RegularExpressionFlags.AnyUnicodeMode) { error(Diagnostics.The_Unicode_u_flag_and_the_Unicode_Sets_v_flag_cannot_be_set_simultaneously, pos, 1); } else { @@ -2560,9 +2560,9 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean /** Grammar parameter */ var unicodeSetsMode = !!(regExpFlags & RegularExpressionFlags.UnicodeSets); /** Grammar parameter */ - var unicodeMode = !!(regExpFlags & RegularExpressionFlags.UnicodeMode); + var anyUnicodeMode = !!(regExpFlags & RegularExpressionFlags.AnyUnicodeMode); - if (unicodeMode) { + if (anyUnicodeMode) { // Annex B treats any unicode mode as the strict syntax. annexB = false; } @@ -2719,7 +2719,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean error(Diagnostics.Incomplete_quantifier_Digit_expected, digitsStart, 0); } else { - if (unicodeMode) { + if (anyUnicodeMode) { error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, start, 1, String.fromCharCode(ch)); } isPreviousTermQuantifiable = true; @@ -2731,7 +2731,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } } else if (!min) { - if (unicodeMode) { + if (anyUnicodeMode) { error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, start, 1, String.fromCharCode(ch)); } isPreviousTermQuantifiable = true; @@ -2775,7 +2775,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean // falls through case CharacterCodes.closeBracket: case CharacterCodes.closeBrace: - if (unicodeMode || ch === CharacterCodes.closeParen) { + if (anyUnicodeMode || ch === CharacterCodes.closeParen) { error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, pos, 1, String.fromCharCode(ch)); } pos++; @@ -2832,7 +2832,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean scanGroupName(/*isReference*/ true); scanExpectedChar(CharacterCodes.greaterThan); } - else if (unicodeMode) { + else if (anyUnicodeMode) { error(Diagnostics.k_must_be_followed_by_a_capturing_group_name_enclosed_in_angle_brackets, pos - 2, 2); } break; @@ -2875,6 +2875,9 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean Debug.assertEqual(charCodeUnchecked(pos - 1), CharacterCodes.backslash); let ch = charCodeChecked(pos); switch (ch) { + case CharacterCodes.EOF: + error(Diagnostics.Undetermined_character_escape, pos - 1, 1); + return "\\"; case CharacterCodes.c: pos++; ch = charCodeChecked(pos); @@ -2882,7 +2885,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean pos++; return String.fromCharCode(ch & 0x1f); } - if (unicodeMode) { + if (anyUnicodeMode) { error(Diagnostics.c_must_be_followed_by_an_ASCII_letter, pos - 2, 2); } else if (atomEscape && annexB) { @@ -2913,12 +2916,8 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean pos++; return String.fromCharCode(ch); default: - if (pos >= end) { - error(Diagnostics.Undetermined_character_escape, pos - 1, 1); - return "\\"; - } pos--; - return scanEscapeSequence(/*shouldEmitInvalidEscapeError*/ unicodeMode, /*isRegularExpression*/ annexB ? "annex-b" : true); + return scanEscapeSequence(/*shouldEmitInvalidEscapeError*/ anyUnicodeMode, /*isRegularExpression*/ annexB ? "annex-b" : true); } } @@ -3464,11 +3463,11 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } } scanExpectedChar(CharacterCodes.closeBrace); - if (!unicodeMode) { + if (!anyUnicodeMode) { error(Diagnostics.Unicode_property_value_expressions_are_only_available_when_the_Unicode_u_flag_or_the_Unicode_Sets_v_flag_is_set, start, pos - start); } } - else if (unicodeMode) { + else if (anyUnicodeMode) { error(Diagnostics._0_must_be_followed_by_a_Unicode_property_value_expression_enclosed_in_braces, pos - 2, 2, String.fromCharCode(ch)); } return true; @@ -3490,7 +3489,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } function scanSourceCharacter(): string { - const size = unicodeMode ? charSize(charCodeChecked(pos)) : 1; + const size = anyUnicodeMode ? charSize(charCodeChecked(pos)) : 1; pos += size; return size > 0 ? text.substring(pos - size, pos) : ""; } diff --git a/src/compiler/types.ts b/src/compiler/types.ts index aae7c5494a17d..20819fa1d0751 100644 --- a/src/compiler/types.ts +++ b/src/compiler/types.ts @@ -2765,17 +2765,17 @@ export interface RegularExpressionLiteral extends LiteralExpression { // dprint-ignore /** @internal */ export const enum RegularExpressionFlags { - None = 0, - HasIndices = 1 << 0, // d - Global = 1 << 1, // g - IgnoreCase = 1 << 2, // i - Multiline = 1 << 3, // m - DotAll = 1 << 4, // s - Unicode = 1 << 5, // u - UnicodeSets = 1 << 6, // v - Sticky = 1 << 7, // y - UnicodeMode = Unicode | UnicodeSets, - Modifiers = IgnoreCase | Multiline | DotAll, + None = 0, + HasIndices = 1 << 0, // d + Global = 1 << 1, // g + IgnoreCase = 1 << 2, // i + Multiline = 1 << 3, // m + DotAll = 1 << 4, // s + Unicode = 1 << 5, // u + UnicodeSets = 1 << 6, // v + Sticky = 1 << 7, // y + AnyUnicodeMode = Unicode | UnicodeSets, + Modifiers = IgnoreCase | Multiline | DotAll, } export interface NoSubstitutionTemplateLiteral extends LiteralExpression, TemplateLiteralLikeNode, Declaration { diff --git a/src/testRunner/tests.ts b/src/testRunner/tests.ts index 04f2688518fb7..c273be5c65a34 100644 --- a/src/testRunner/tests.ts +++ b/src/testRunner/tests.ts @@ -47,7 +47,7 @@ export * from "./unittests/paths.js"; export * from "./unittests/printer.js"; export * from "./unittests/programApi.js"; export * from "./unittests/publicApi.js"; -export * from "./unittests/regExpParserRecovery.js"; +export * from "./unittests/regExpScannerRecovery.js"; export * from "./unittests/reuseProgramStructure.js"; export * from "./unittests/semver.js"; export * from "./unittests/services/cancellableLanguageServiceOperations.js"; diff --git a/src/testRunner/unittests/regExpParserRecovery.ts b/src/testRunner/unittests/regExpScannerRecovery.ts similarity index 63% rename from src/testRunner/unittests/regExpParserRecovery.ts rename to src/testRunner/unittests/regExpScannerRecovery.ts index a1b0a1d275c87..a4e6dd02a7394 100644 --- a/src/testRunner/unittests/regExpParserRecovery.ts +++ b/src/testRunner/unittests/regExpScannerRecovery.ts @@ -1,6 +1,6 @@ import * as ts from "../_namespaces/ts.js"; -describe("unittests:: regExpParserRecovery", () => { +describe("unittests:: regExpScannerRecovery", () => { const testCases = [ "/", "/[]", @@ -16,7 +16,7 @@ describe("unittests:: regExpParserRecovery", () => { "/{[]}", "/([])", "/[)}({]", - "/({[)}]})", + "/({[]})", "/\\[", "/\\{", "/\\(", @@ -42,27 +42,27 @@ describe("unittests:: regExpParserRecovery", () => { "/[\\}]", "/(\\})", "/{\\}}", - "/({[\\]})]})", + "/({[\\])]})", ]; const whiteSpaceSequences = [ "", " ", - "\t\v\r\n", - "\u3000\u2028", + "\t\f", + "\u3000\u2003", ]; - it("stops parsing unterminated regexes at correct position", () => { - ts.forEach(testCases, testCase => { - ts.forEach(whiteSpaceSequences, whiteSpaces => { - const testCaseWithWhiteSpaces = testCase + whiteSpaces; - const sources = [ - `const regex = ${testCaseWithWhiteSpaces};`, - `(${testCaseWithWhiteSpaces});`, - `([${testCaseWithWhiteSpaces}]);`, - `({prop: ${testCaseWithWhiteSpaces}});`, - `({prop: ([(${testCaseWithWhiteSpaces})])});`, - `({[(${testCaseWithWhiteSpaces}).source]: 42});`, - ]; - ts.forEach(sources, source => { + for (const testCase of testCases) { + for (const whiteSpaces of whiteSpaceSequences) { + const testCaseWithWhiteSpaces = testCase + whiteSpaces; + const sources = [ + `const regex = ${testCaseWithWhiteSpaces};`, + `(${testCaseWithWhiteSpaces});`, + `([${testCaseWithWhiteSpaces}]);`, + `({prop: ${testCaseWithWhiteSpaces}});`, + `({prop: ([(${testCaseWithWhiteSpaces})])});`, + `({[(${testCaseWithWhiteSpaces}).source]: 42});`, + ]; + for (const source of sources) { + it("stops parsing unterminated regexes at correct position: " + JSON.stringify(source), () => { const { parseDiagnostics } = ts.createLanguageServiceSourceFile( /*fileName*/ "", ts.ScriptSnapshot.fromString(source), @@ -75,7 +75,7 @@ describe("unittests:: regExpParserRecovery", () => { assert.equal(diagnostic.start, source.indexOf("/"), "Diagnostic should start at where the regex starts"); assert.equal(diagnostic.length, testCase.length, "Diagnostic should end at where the regex ends"); }); - }); - }); - }); + } + } + } });