Skip to content

Commit

Permalink
Apply Suggested Changes
Browse files Browse the repository at this point in the history
  • Loading branch information
graphemecluster committed May 21, 2024
1 parent 603c3cf commit 2868bd6
Showing 1 changed file with 34 additions and 29 deletions.
63 changes: 34 additions & 29 deletions src/compiler/scanner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2426,6 +2426,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
// Quickly get to the end of regex such that we know the flags
let p = tokenStart + 1;
let inEscape = false;
let namedCaptureGroups = false;
// Although nested character classes are allowed in Unicode Sets mode,
// an unescaped slash is nevertheless invalid even in a character class in Unicode mode.
// Additionally, parsing nested character classes will misinterpret regexes like `/[[]/`
Expand Down Expand Up @@ -2469,6 +2470,15 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
else if (ch === CharacterCodes.closeBracket) {
inCharacterClass = false;
}
else if (
ch === CharacterCodes.openParen
&& charCodeUnchecked(p + 1) === CharacterCodes.question
&& charCodeUnchecked(p + 2) === CharacterCodes.lessThan
&& charCodeUnchecked(p + 3) !== CharacterCodes.equals
&& charCodeUnchecked(p + 3) !== CharacterCodes.exclamation
) {
namedCaptureGroups = true;
}
p++;
}
const isUnterminated = !!(tokenFlags & TokenFlags.Unterminated);
Expand Down Expand Up @@ -2505,7 +2515,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
const saveEnd = end;
pos = tokenStart + 1;
end = endOfBody;
scanRegularExpressionWorker(regExpFlags, isUnterminated, /*annexB*/ true);
scanRegularExpressionWorker(regExpFlags, isUnterminated, /*annexB*/ true, namedCaptureGroups);
tokenStart = saveTokenStart;
tokenFlags = saveTokenFlags;
pos = savePos;
Expand All @@ -2517,7 +2527,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
return token;
}

function scanRegularExpressionWorker(regExpFlags: RegularExpressionFlags, isUnterminated: boolean, annexB: boolean) {
function scanRegularExpressionWorker(regExpFlags: RegularExpressionFlags, isUnterminated: boolean, annexB: boolean, namedCaptureGroups: boolean) {
// Why var? It avoids TDZ checks in the runtime which can be costly.
// See: https://github.com/microsoft/TypeScript/issues/52924
/* eslint-disable no-var */
Expand All @@ -2527,10 +2537,8 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
/** Grammar parameter */
var unicodeMode = !!(regExpFlags & RegularExpressionFlags.UnicodeMode);

if (unicodeMode) {
// Annex B treats any unicode mode as the strict syntax.
annexB = false;
}
// Annex B treats any unicode mode as the strict syntax.
var anyUnicodeModeOrNonAnnexB = unicodeMode || !annexB;

/** @see {scanClassSetExpression} */
var mayContainStrings = false;
Expand Down Expand Up @@ -2626,7 +2634,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
case CharacterCodes.exclamation:
pos++;
// In Annex B, `(?=Disjunction)` and `(?!Disjunction)` are quantifiable
isPreviousTermQuantifiable = annexB;
isPreviousTermQuantifiable = !anyUnicodeModeOrNonAnnexB;
break;
case CharacterCodes.lessThan:
const groupNameStart = pos;
Expand Down Expand Up @@ -2675,7 +2683,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
const digitsStart = pos;
scanDigits();
const min = tokenValue;
if (annexB && !min) {
if (!anyUnicodeModeOrNonAnnexB && !min) {
isPreviousTermQuantifiable = true;
break;
}
Expand All @@ -2693,26 +2701,26 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
break;
}
}
else if (max && Number.parseInt(min) > Number.parseInt(max) && (!annexB || text.charCodeAt(pos) === CharacterCodes.closeBrace)) {
else if (max && Number.parseInt(min) > Number.parseInt(max) && (anyUnicodeModeOrNonAnnexB || text.charCodeAt(pos) === CharacterCodes.closeBrace)) {
error(Diagnostics.Numbers_out_of_order_in_quantifier, digitsStart, pos - digitsStart);
}
}
else if (!min) {
if (!annexB) {
if (anyUnicodeModeOrNonAnnexB) {
error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, start, 1, String.fromCharCode(ch));
}
isPreviousTermQuantifiable = true;
break;
}
if (charCodeChecked(pos) !== CharacterCodes.closeBrace) {
if (annexB) {
isPreviousTermQuantifiable = true;
break;
}
else {
if (anyUnicodeModeOrNonAnnexB) {
error(Diagnostics._0_expected, pos, 0, String.fromCharCode(CharacterCodes.closeBrace));
pos--;
}
else {
isPreviousTermQuantifiable = true;
break;
}
}
// falls through
case CharacterCodes.asterisk:
Expand Down Expand Up @@ -2754,7 +2762,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
// Assume what starting from the character to be outside of the regex
return;
}
if (!annexB || ch === CharacterCodes.closeParen) {
if (anyUnicodeModeOrNonAnnexB || ch === CharacterCodes.closeParen) {
error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, pos, 1, String.fromCharCode(ch));
}
pos++;
Expand Down Expand Up @@ -2811,10 +2819,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
scanGroupName(/*isReference*/ true);
scanExpectedChar(CharacterCodes.greaterThan);
}
else {
// This is actually allowed in Annex B if there are no named capturing groups in the regex,
// but if we were going to suppress these errors, we would have to record the positions of all '\k's
// and defer the errors until after the scanning to know if the regex has any named capturing groups.
else if (namedCaptureGroups) {
error(Diagnostics.k_must_be_followed_by_a_capturing_group_name_enclosed_in_angle_brackets, pos - 2, 2);
}
break;
Expand Down Expand Up @@ -2864,7 +2869,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
pos++;
return String.fromCharCode(ch & 0x1f);
}
if (!annexB) {
if (anyUnicodeModeOrNonAnnexB) {
error(Diagnostics.c_must_be_followed_by_an_ASCII_letter, pos - 2, 2);
}
else if (atomEscape) {
Expand Down Expand Up @@ -2900,7 +2905,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
return "\\";
}
pos--;
return scanEscapeSequence(/*shouldEmitInvalidEscapeError*/ unicodeMode, /*isRegularExpression*/ annexB ? "annex-b" : true);
return scanEscapeSequence(/*shouldEmitInvalidEscapeError*/ unicodeMode, /*isRegularExpression*/ !anyUnicodeModeOrNonAnnexB || "annex-b");
}
}

Expand Down Expand Up @@ -2949,12 +2954,12 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
if (isClassContentExit(ch)) {
return;
}
if (!minCharacter && !annexB) {
if (!minCharacter && anyUnicodeModeOrNonAnnexB) {
error(Diagnostics.A_character_class_range_must_not_be_bounded_by_another_character_class, minStart, pos - 1 - minStart);
}
const maxStart = pos;
const maxCharacter = scanClassAtom();
if (!maxCharacter && !annexB) {
if (!maxCharacter && anyUnicodeModeOrNonAnnexB) {
error(Diagnostics.A_character_class_range_must_not_be_bounded_by_another_character_class, maxStart, pos - maxStart);
continue;
}
Expand Down Expand Up @@ -3450,12 +3455,12 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
error(Diagnostics.Unicode_property_value_expressions_are_only_available_when_the_Unicode_u_flag_or_the_Unicode_Sets_v_flag_is_set, start, pos - start);
}
}
else if (annexB) {
pos--;
return false;
else if (anyUnicodeModeOrNonAnnexB) {
error(Diagnostics._0_must_be_followed_by_a_Unicode_property_value_expression_enclosed_in_braces, pos - 2, 2, String.fromCharCode(ch));
}
else {
error(Diagnostics._0_must_be_followed_by_a_Unicode_property_value_expression_enclosed_in_braces, pos - 2, 2, String.fromCharCode(ch));
pos--;
return false;
}
return true;
}
Expand Down Expand Up @@ -3500,7 +3505,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
forEach(decimalEscapes, escape => {
// in AnnexB, if a DecimalEscape is greater than the number of capturing groups then it is treated as
// either a LegacyOctalEscapeSequence or IdentityEscape
if (!annexB && escape.value > numberOfCapturingGroups) {
if (anyUnicodeModeOrNonAnnexB && escape.value > numberOfCapturingGroups) {
if (numberOfCapturingGroups) {
error(Diagnostics.This_backreference_refers_to_a_group_that_does_not_exist_There_are_only_0_capturing_groups_in_this_regular_expression, escape.pos, escape.end - escape.pos, numberOfCapturingGroups);
}
Expand Down

0 comments on commit 2868bd6

Please sign in to comment.