From 40ce6e9b9d33be34c64c55ac46083ec7750613c6 Mon Sep 17 00:00:00 2001 From: Toru Nagashima Date: Sat, 10 Feb 2018 23:22:20 +0900 Subject: [PATCH 01/18] replace RegExp validation --- src/regexp.js | 880 +++++++++++++++++++++++++++ src/state.js | 3 + src/tokenize.js | 78 +-- test/run.js | 1 + test/tests-regexp.js | 1347 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 2247 insertions(+), 62 deletions(-) create mode 100644 src/regexp.js create mode 100644 test/tests-regexp.js diff --git a/src/regexp.js b/src/regexp.js new file mode 100644 index 000000000..01f7a2f57 --- /dev/null +++ b/src/regexp.js @@ -0,0 +1,880 @@ +const BACKSPACE = 0x08 +const CHARACTER_TABULATION = 0x09 +const LINE_FEED = 0x0A +const LINE_TABULATION = 0x0B +const FORM_FEED = 0x0C +const CARRIAGE_RETURN = 0x0D +const EXCLAMATION_MARK = 0x21 // ! +const DOLLAR_SIGN = 0x24 // $ +const LEFT_PARENTHESIS = 0x28 // ( +const RIGHT_PARENTHESIS = 0x29 // ) +const ASTERISK = 0x2A // * +const PLUS_SIGN = 0x2B // + +const COMMA = 0x2C // , +const HYPHEN_MINUS = 0x2D // - +const FULL_STOP = 0x2E // . +const SOLIDUS = 0x2F // / +const DIGIT_ZERO = 0x30 // 0 +const DIGIT_ONE = 0x31 // 1 +const DIGIT_THREE = 0x33 // 3 +const DIGIT_SEVEN = 0x37 // 7 +const DIGIT_NINE = 0x39 // 9 +const COLON = 0x3A // : +const EQUALS_SIGN = 0x3D // = +const QUESTION_MARK = 0x3F // ? +const LATIN_CAPITAL_LETTER_A = 0x41 // A +const LATIN_CAPITAL_LETTER_B = 0x42 // B +const LATIN_CAPITAL_LETTER_D = 0x44 // D +const LATIN_CAPITAL_LETTER_F = 0x46 // F +const LATIN_CAPITAL_LETTER_S = 0x53 // S +const LATIN_CAPITAL_LETTER_W = 0x57 // W +const LATIN_CAPITAL_LETTER_Z = 0x5A // Z +const LOW_LINE = 0x5F // _ +const LATIN_SMALL_LETTER_A = 0x61 // a +const LATIN_SMALL_LETTER_B = 0x62 // b +const LATIN_SMALL_LETTER_C = 0x63 // c +const LATIN_SMALL_LETTER_D = 0x64 // d +const LATIN_SMALL_LETTER_F = 0x66 // f +const LATIN_SMALL_LETTER_N = 0x6E // n +const LATIN_SMALL_LETTER_R = 0x72 // r +const LATIN_SMALL_LETTER_S = 0x73 // s +const LATIN_SMALL_LETTER_T = 0x74 // t +const LATIN_SMALL_LETTER_U = 0x75 // u +const LATIN_SMALL_LETTER_V = 0x76 // v +const LATIN_SMALL_LETTER_W = 0x77 // w +const LATIN_SMALL_LETTER_X = 0x78 // x +const LATIN_SMALL_LETTER_Z = 0x7A // z +const LEFT_SQUARE_BRACKET = 0x5B // [ +const REVERSE_SOLIDUS = 0x5C // \ +const RIGHT_SQUARE_BRACKET = 0x5D // [ +const CIRCUMFLEX_ACCENT = 0x5E // ^ +const LEFT_CURLY_BRACKET = 0x7B // { +const VERTICAL_LINE = 0x7C // | +const RIGHT_CURLY_BRACKET = 0x7D // } + +function isSyntaxCharacter(ch) { + return ( + ch === CIRCUMFLEX_ACCENT || + ch === DOLLAR_SIGN || + ch === REVERSE_SOLIDUS || + ch === FULL_STOP || + ch === ASTERISK || + ch === PLUS_SIGN || + ch === QUESTION_MARK || + ch === LEFT_PARENTHESIS || + ch === RIGHT_PARENTHESIS || + ch === LEFT_SQUARE_BRACKET || + ch === RIGHT_SQUARE_BRACKET || + ch === LEFT_CURLY_BRACKET || + ch === RIGHT_CURLY_BRACKET || + ch === VERTICAL_LINE + ) +} + +function isControlEscape(ch) { + return ( + ch === LATIN_SMALL_LETTER_F || + ch === LATIN_SMALL_LETTER_N || + ch === LATIN_SMALL_LETTER_R || + ch === LATIN_SMALL_LETTER_T || + ch === LATIN_SMALL_LETTER_V + ) +} + +function isControlLetter(ch) { + return ( + (ch >= LATIN_CAPITAL_LETTER_A && ch <= LATIN_CAPITAL_LETTER_Z) || + (ch >= LATIN_SMALL_LETTER_A && ch <= LATIN_SMALL_LETTER_Z) + ) +} + +function isCharacterClassEscape(ch) { + return ( + ch === LATIN_SMALL_LETTER_D || + ch === LATIN_CAPITAL_LETTER_D || + ch === LATIN_SMALL_LETTER_S || + ch === LATIN_CAPITAL_LETTER_S || + ch === LATIN_SMALL_LETTER_W || + ch === LATIN_CAPITAL_LETTER_W + ) +} + +function isValidUnicode(ch) { + return ch >= 0 && ch <= 0x10FFFF +} + +function isDecimalDigit(ch) { + return ch >= DIGIT_ZERO && ch <= DIGIT_NINE +} + +function isHexDigit(ch) { + return ( + (ch >= DIGIT_ZERO && ch <= DIGIT_NINE) || + (ch >= LATIN_CAPITAL_LETTER_A && ch <= LATIN_CAPITAL_LETTER_F) || + (ch >= LATIN_SMALL_LETTER_A && ch <= LATIN_SMALL_LETTER_F) + ) +} + +function isOctalDigit(ch) { + return ch >= DIGIT_ZERO && ch <= DIGIT_SEVEN +} + +export class RegExpValidator { + /** + * Initialize this validator. + * @param {Parser} parser The parser. + */ + constructor(parser) { + this.parser = parser + this.ecmaVersion = parser.options.ecmaVersion + this.validFlags = `gim${this.ecmaVersion >= 6 ? "uy" : ""}${this.ecmaVersion >= 9 ? "s" : ""}` + this.pattern = "" + this.start = 0 + this.pos = 0 + this.numCapturingParens = 0 + this.maxBackReference = 0 + } + + /** + * Validate the flags part of a given RegExpLiteral. + * + * @param {number} start The index of the start location of the RegExp literal. + * @param {string} flags The flags part of the RegExpLiteral. + * @returns {void} + */ + validateFlags(start, flags) { + const validFlags = this.validFlags + for (let i = 0; i < flags.length; i++) { + const flag = flags.charAt(i) + if (validFlags.indexOf(flag) == -1) { + this.parser.raise(start, "Invalid regular expression flag") + } + if (flags.indexOf(flag, i + 1) > -1) { + this.parser.raise(start, "Duplicate regular expression flag") + } + } + } + + /** + * Validate the pattern part of a given RegExpLiteral. + * + * This is syntax: + * https://www.ecma-international.org/ecma-262/8.0/#sec-regular-expressions-patterns + * + * @param {number} start The index of the start location of the RegExp literal. + * @param {string} pattern The pattern part of the RegExpLiteral. + * @param {boolean} unicode `true` if the RegExp has `u` flag. + * @returns {void} + */ + validatePattern(start, pattern, unicode) { + this.start = start | 0 + this.pattern = pattern + "" + this.pos = 0 + this.numCapturingParens = 0 + this.maxBackReference = 0 + + this.disjunction(unicode) + if (this.pos !== this.pattern.length) { + // Make the same messages as V8. + if (this.eat(RIGHT_PARENTHESIS)) { + this.raise("Unmatched ')'") + } + if (this.eat(RIGHT_SQUARE_BRACKET) || this.eat(RIGHT_CURLY_BRACKET)) { + this.raise("Lone quantifier brackets") + } + } + // SyntaxError in https://www.ecma-international.org/ecma-262/8.0/#sec-atomescape + if (this.maxBackReference > this.numCapturingParens) { + this.raise("Invalid escape") + } + } + + raise(message) { + this.parser.raise(this.start, `Invalid regular expression: /${this.pattern}/: ${message}`) + } + + // Node.js 0.12/0.10 don't support String.prototype.codePointAt(). + codePointAt(i) { + const s = this.pattern + const l = s.length + if (i >= l) { + return -1 + } + const c = s.charCodeAt(i) + if (c <= 0xD7FF || c >= 0xE000 || i + 1 >= l) { + return c + } + return (c << 10) + s.charCodeAt(i + 1) - 0x35FDC00 + } + + nextIndex(i) { + const s = this.pattern + const l = s.length + if (i >= l) { + return l + } + const c = s.charCodeAt(i) + if (c <= 0xD7FF || c >= 0xE000 || i + 1 >= l) { + return i + 1 + } + return i + 2 + } + + current() { + return this.codePointAt(this.pos) + } + + lookahead() { + return this.codePointAt(this.nextIndex(this.pos)) + } + + advance() { + this.pos = this.nextIndex(this.pos) + } + + eat(ch) { + if (this.codePointAt(this.pos) === ch) { + this.advance() + return true + } + return false + } + + // https://www.ecma-international.org/ecma-262/8.0/#prod-Disjunction + disjunction(unicode) { + this.alternative(unicode) + while (this.eat(VERTICAL_LINE)) { + this.alternative(unicode) + } + + // Make the same message as V8. + if (this.eatQuantifier(unicode, true)) { + this.raise("Nothing to repeat") + } + if (this.eat(LEFT_CURLY_BRACKET)) { + this.raise("Lone quantifier brackets") + } + } + + // https://www.ecma-international.org/ecma-262/8.0/#prod-Alternative + alternative(unicode) { + while (this.pos < this.pattern.length && this.eatTerm(unicode)) + ; + } + + // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-Term + eatTerm(unicode) { + const start = this.pos + + if (this.eatQuantifiableAssertion()) { + if (this.eatQuantifier(unicode)) { + // Make the same message as V8. + if (unicode) { + this.raise("Invalid quantifier") + } + return true + } + this.pos = start + } + + if (this.eatAssertion(unicode)) { + return true + } + + if (unicode ? this.eatAtom(true) : this.eatExtendedAtom()) { + this.eatQuantifier(unicode) + return true + } + + return false + } + + // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-Assertion + eatAssertion(unicode) { + return ( + this.eat(CIRCUMFLEX_ACCENT) || + this.eat(DOLLAR_SIGN) || + this._eatWordBoundary() || + this._eatLookaheadAssertion(unicode) + ) + } + _eatWordBoundary() { + const start = this.pos + if (this.eat(REVERSE_SOLIDUS)) { + if (this.eat(LATIN_CAPITAL_LETTER_B) || this.eat(LATIN_SMALL_LETTER_B)) { + return true + } + this.pos = start + } + return false + } + _eatLookaheadAssertion(unicode) { + const start = this.pos + if (this.eat(LEFT_PARENTHESIS)) { + if (this.eat(QUESTION_MARK) && (this.eat(EQUALS_SIGN) || this.eat(EXCLAMATION_MARK))) { + this.disjunction(unicode) + if (!this.eat(RIGHT_PARENTHESIS)) { + this.raise("Unterminated group") + } + return true + } + this.pos = start + } + return false + } + + // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-QuantifiableAssertion + eatQuantifiableAssertion() { + return this._eatLookaheadAssertion(false) + } + + // https://www.ecma-international.org/ecma-262/8.0/#prod-Quantifier + eatQuantifier(unicode, noError = false) { + if (this.eatQuantifierPrefix(unicode, noError)) { + this.eat(QUESTION_MARK) + return true + } + return false + } + + // https://www.ecma-international.org/ecma-262/8.0/#prod-QuantifierPrefix + eatQuantifierPrefix(unicode, noError) { + return ( + this.eat(ASTERISK) || + this.eat(PLUS_SIGN) || + this.eat(QUESTION_MARK) || + this._eatBracedQuantifier(unicode, noError) + ) + } + _eatBracedQuantifier(unicode, noError) { + const start = this.pos + if (this.eat(LEFT_CURLY_BRACKET)) { + let i = this.pos, min = 0, max = -1 + if (this.eatDecimalDigits()) { + min = this._parseDecimalInt(i, this.pos) + if (this.eat(COMMA)) { + i = this.pos + if (this.eatDecimalDigits()) { + max = this._parseDecimalInt(i, this.pos) + } + } + if (this.eat(RIGHT_CURLY_BRACKET)) { + // SyntaxError in https://www.ecma-international.org/ecma-262/8.0/#sec-term + if (max !== -1 && max < min && !noError) { + this.raise("numbers out of order in {} quantifier") + } + return true + } + } + if (unicode && !noError) { + this.raise("Incomplete quantifier") + } + this.pos = start + } + return false + } + + // https://www.ecma-international.org/ecma-262/8.0/#prod-Atom + eatAtom(unicode) { + return ( + this.eatPatternCharacters() || + this.eat(FULL_STOP) || + this._eatReverseSolidusAtomEscape(unicode) || + this.eatCharacterClass(unicode) || + this._eatCapturingOrUncapturingGroup(unicode) + ) + } + _eatReverseSolidusAtomEscape(unicode) { + const start = this.pos + if (this.eat(REVERSE_SOLIDUS)) { + if (this.eatAtomEscape(unicode)) { + return true + } + this.pos = start + } + return false + } + _eatCapturingOrUncapturingGroup(unicode) { + if (this.eat(LEFT_PARENTHESIS)) { + const uncaptured = this.eat(QUESTION_MARK) + if (uncaptured && !this.eat(COLON)) { + this.raise("Invalid group") + } + this.disjunction(unicode) + if (!this.eat(RIGHT_PARENTHESIS)) { + this.raise("Unterminated group") + } + if (!uncaptured) { + this.numCapturingParens += 1 + } + return true + } + return false + } + + // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-ExtendedAtom + eatExtendedAtom() { + return ( + this.eat(FULL_STOP) || + this._eatReverseSolidusAtomEscape(false) || + this.eatCharacterClass(false) || + this._eatCapturingOrUncapturingGroup(false) || + this.eatInvalidBracedQuantifier() || + this.eatExtendedPatternCharacter() + ) + } + + // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-InvalidBracedQuantifier + eatInvalidBracedQuantifier() { + if (this._eatBracedQuantifier(false, true)) { + this.raise("Nothing to repeat") + } + return false + } + + // https://www.ecma-international.org/ecma-262/8.0/#prod-SyntaxCharacter + eatSyntaxCharacter() { + if (isSyntaxCharacter(this.current())) { + this.advance() + return true + } + return false + } + + // https://www.ecma-international.org/ecma-262/8.0/#prod-PatternCharacter + // But eat eager. + eatPatternCharacters() { + const start = this.pos + let ch = 0 + while ((ch = this.current()) !== -1 && !isSyntaxCharacter(ch)) { + this.advance() + } + return this.pos !== start + } + + // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-ExtendedPatternCharacter + eatExtendedPatternCharacter() { + const ch = this.current() + if ( + ch !== -1 && + ch !== CIRCUMFLEX_ACCENT && + ch !== DOLLAR_SIGN && + ch !== FULL_STOP && + ch !== ASTERISK && + ch !== PLUS_SIGN && + ch !== QUESTION_MARK && + ch !== LEFT_PARENTHESIS && + ch !== RIGHT_PARENTHESIS && + ch !== LEFT_SQUARE_BRACKET && + ch !== VERTICAL_LINE + ) { + this.advance() + return true + } + return false + } + + // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-AtomEscape + eatAtomEscape(unicode) { + const start = this.pos + if (this.eatDecimalEscape()) { + const n = this._parseDecimalInt(start, this.pos) + if (unicode) { + // For SyntaxError in https://www.ecma-international.org/ecma-262/8.0/#sec-atomescape + if (n > this.maxBackReference) { + this.maxBackReference = n + } + return true + } + if (n <= this.numCapturingParens) { + return true + } + this.pos = start + } + if (this.eatCharacterClassEscape(unicode) || this.eatCharacterEscape(unicode)) { + return true + } + if (unicode) { + // Make the same message as V8. + if (this.current() === LATIN_SMALL_LETTER_C) { + this.raise("Invalid unicode escape") + } + this.raise("Invalid escape") + } + return false + } + + // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-CharacterEscape + eatCharacterEscape(unicode) { + return ( + this.eatControlEscape() || + this._eatCControlLetter() || + this._eatZero() || + this.eatHexEscapeSequence(unicode) || + this.eatRegExpUnicodeEscapeSequence(unicode) || + (!unicode && this.eatLegacyOctalEscapeSequence()) || + this.eatIdentityEscape(unicode) + ) + } + _eatCControlLetter() { + const start = this.pos + if (this.eat(LATIN_SMALL_LETTER_C)) { + if (this.eatControlLetter()) { + return true + } + this.pos = start + } + return false + } + _eatZero() { + if (this.current() === DIGIT_ZERO && !isDecimalDigit(this.lookahead())) { + this.advance() + return true + } + return false + } + + // https://www.ecma-international.org/ecma-262/8.0/#prod-ControlEscape + eatControlEscape() { + if (isControlEscape(this.current())) { + this.advance() + return true + } + return false + } + + // https://www.ecma-international.org/ecma-262/8.0/#prod-ControlLetter + eatControlLetter() { + if (isControlLetter(this.current())) { + this.advance() + return true + } + return false + } + + // https://www.ecma-international.org/ecma-262/8.0/#prod-RegExpUnicodeEscapeSequence + eatRegExpUnicodeEscapeSequence(unicode) { + const start = this.pos + + if (this.eat(LATIN_SMALL_LETTER_U)) { + if (this._eatFixedHexDigits(4)) { + const code = this._parseHexInt(this.pos - 4, this.pos) + if (unicode && code >= 0xD800 && code <= 0xDBFF) { + const leadSurrogateEnd = this.pos + if (this.eat(REVERSE_SOLIDUS) && this.eat(LATIN_SMALL_LETTER_U) && this._eatFixedHexDigits(4)) { + const codeT = this._parseHexInt(this.pos - 4, this.pos) + if (codeT >= 0xDC00 && codeT <= 0xDFFF) { + return true + } + } + this.pos = leadSurrogateEnd + } + return true + } + if ( + unicode && + this.eat(LEFT_CURLY_BRACKET) && + this.eatHexDigits() && + this.eat(RIGHT_CURLY_BRACKET) && + isValidUnicode(this._parseHexInt(start + 2, this.pos - 1)) + ) { + return true + } + if (unicode) { + this.raise("Invalid unicode escape") + } + this.pos = start + } + + return false + } + + // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-IdentityEscape + eatIdentityEscape(unicode) { + if (unicode) { + return ( + this.eatSyntaxCharacter() || + this.eat(SOLIDUS) + ) + } + + if (this.current() !== LATIN_SMALL_LETTER_C) { + this.advance() + return true + } + + return false + } + + // https://www.ecma-international.org/ecma-262/8.0/#prod-DecimalEscape + eatDecimalEscape() { + let ch = this.current() + if (ch >= DIGIT_ONE && ch <= DIGIT_NINE) { + do { + this.advance() + } while ((ch = this.current()) >= DIGIT_ZERO && ch <= DIGIT_NINE) + return true + } + return false + } + + // https://www.ecma-international.org/ecma-262/8.0/#prod-CharacterClassEscape + eatCharacterClassEscape() { + if (isCharacterClassEscape(this.current())) { + this.advance() + return true + } + return false + } + + // https://www.ecma-international.org/ecma-262/8.0/#prod-CharacterClass + eatCharacterClass(unicode) { + if (this.eat(LEFT_SQUARE_BRACKET)) { + this.eat(CIRCUMFLEX_ACCENT) + this.classRanges(unicode) + if (this.eat(RIGHT_SQUARE_BRACKET)) { + return true + } + // Unreachable since it threw "unterminated regular expression" error before. + this.raise("Unterminated character class") + } + return false + } + + // https://www.ecma-international.org/ecma-262/8.0/#prod-ClassRanges + // https://www.ecma-international.org/ecma-262/8.0/#prod-NonemptyClassRanges + // https://www.ecma-international.org/ecma-262/8.0/#prod-NonemptyClassRangesNoDash + classRanges(unicode) { + for (; ;) { + const leftStart = this.pos + if (this.eatClassAtom(unicode)) { + const leftEnd = this.pos + if (this.eat(HYPHEN_MINUS)) { + const rightStart = this.pos + if (this.eatClassAtom(unicode)) { + const rightEnd = this.pos + const left = this._parseClassAtom(leftStart, leftEnd, unicode, false) + const right = this._parseClassAtom(rightStart, rightEnd, unicode, true) + if (unicode && (left === -1 || right === -1)) { + this.raise("Invalid character class") + } + if (left !== -1 && right !== -1 && left > right) { + this.raise("Range out of order in character class") + } + } + } + } else { + break + } + } + } + + // https://www.ecma-international.org/ecma-262/8.0/#prod-ClassAtom + // https://www.ecma-international.org/ecma-262/8.0/#prod-ClassAtomNoDash + eatClassAtom(unicode) { + const start = this.pos + + if (this.eat(REVERSE_SOLIDUS)) { + if (this.eatClassEscape(unicode)) { + return true + } + if (unicode) { + // Make the same message as V8. + const ch = this.current() + if (ch === LATIN_SMALL_LETTER_C || isOctalDigit(ch)) { + this.raise("Invalid class escape") + } + this.raise("Invalid escape") + } + this.pos = start + } + + const ch = this.current() + if (ch !== RIGHT_SQUARE_BRACKET) { + this.advance() + return true + } + + return false + } + + // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-ClassEscape + eatClassEscape(unicode) { + return ( + this.eat(LATIN_SMALL_LETTER_B) || + (unicode && this.eat(HYPHEN_MINUS)) || + (!unicode && this._eatCClassControlLetter(unicode)) || + this.eatCharacterClassEscape() || + this.eatCharacterEscape(unicode) + ) + } + _eatCClassControlLetter() { + const start = this.pos + if (this.eat(LATIN_SMALL_LETTER_C)) { + if (this.eatClassControlLetter()) { + return true + } + this.pos = start + } + return false + } + + // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-ClassControlLetter + eatClassControlLetter() { + const ch = this.current() + if (isDecimalDigit(ch) || ch === LOW_LINE) { + this.advance() + return true + } + return false + } + + // https://www.ecma-international.org/ecma-262/8.0/#prod-HexEscapeSequence + eatHexEscapeSequence(unicode) { + const start = this.pos + + if (this.eat(LATIN_SMALL_LETTER_X)) { + if (this._eatFixedHexDigits(2)) { + return true + } + if (unicode) { + this.raise("Invalid escape") + } + this.pos = start + } + + return false + } + + // https://www.ecma-international.org/ecma-262/8.0/#prod-DecimalDigits + eatDecimalDigits() { + const start = this.pos + while (isDecimalDigit(this.current())) { + this.advance() + } + return this.pos !== start + } + + // https://www.ecma-international.org/ecma-262/8.0/#prod-HexDigits + eatHexDigits() { + const start = this.pos + while (isHexDigit(this.current())) { + this.advance() + } + return this.pos !== start + } + + // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-LegacyOctalEscapeSequence + eatLegacyOctalEscapeSequence() { + const ch = this.current() + if (this.eatOctalDigit()) { + if (this.eatOctalDigit() && ch <= DIGIT_THREE) { + this.eatOctalDigit() + } + return true + } + return false + } + + // https://www.ecma-international.org/ecma-262/8.0/#prod-OctalDigit + eatOctalDigit() { + if (isOctalDigit(this.current())) { + this.advance() + return true + } + return false + } + + _eatFixedHexDigits(length) { + const start = this.pos + for (let i = 0; i < length; ++i) { + if (!isHexDigit(this.current())) { + this.pos = start + return false + } + this.advance() + } + return true + } + + _parseDecimalInt(start, end) { + return parseInt(this.pattern.slice(start, end), 10) + } + + _parseHexInt(start, end) { + return parseInt(this.pattern.slice(start, end), 16) + } + + _parseOctalInt(start, end) { + return parseInt(this.pattern.slice(start, end), 8) + } + + _parseClassAtom(start, end, unicode, isRight) { + const ch1 = this._getOneElementCharSetAt(start, unicode, isRight) + if (ch1 === REVERSE_SOLIDUS) { + const ch2 = this._getOneElementCharSetAt(start + 1, unicode, isRight) + switch (ch2) { + case LATIN_SMALL_LETTER_B: + return BACKSPACE + + // CharacterClassEscape + case LATIN_SMALL_LETTER_D: + case LATIN_CAPITAL_LETTER_D: + case LATIN_SMALL_LETTER_S: + case LATIN_CAPITAL_LETTER_S: + case LATIN_SMALL_LETTER_W: + case LATIN_CAPITAL_LETTER_W: + return -1 // Those are not single character. + + // CharacterEscape + case LATIN_SMALL_LETTER_T: + return CHARACTER_TABULATION + case LATIN_SMALL_LETTER_N: + return LINE_FEED + case LATIN_SMALL_LETTER_V: + return LINE_TABULATION + case LATIN_SMALL_LETTER_F: + return FORM_FEED + case LATIN_SMALL_LETTER_R: + return CARRIAGE_RETURN + case LATIN_SMALL_LETTER_C: + if (end - start === 3) { + return this.codePointAt(start + 2) % 32 + } + return LATIN_SMALL_LETTER_C + case LATIN_SMALL_LETTER_X: + if (end - start === 4) { + return this._parseHexInt(start + 2, end) + } + return LATIN_SMALL_LETTER_X + case LATIN_SMALL_LETTER_U: + if (end - start >= 5 && this.codePointAt(start + 2) === LEFT_CURLY_BRACKET) { + return this._parseHexInt(start + 3, end - 1) + } + if (end - start === 6) { + return this._parseHexInt(start + 2, end) + } + if (end - start === 12) { + const lead = this._parseHexInt(start + 2, start + 6) + const trail = this._parseHexInt(start + 8, start + 12) + return (lead - 0xD800) * 0x400 + (trail - 0xDC00) + 0x10000 + } + return LATIN_SMALL_LETTER_U + default: + if (!unicode && ch2 >= DIGIT_ZERO && ch2 <= DIGIT_SEVEN) { + return this._parseOctalInt(start + 1, end) + } + return ch2 + } + } + return ch1 + } + // https://www.ecma-international.org/ecma-262/8.0/#sec-notation + _getOneElementCharSetAt(i, unicode, isRight) { + const ch = this.codePointAt(i) + if (unicode || ch <= 0xFFFF) { + return ch + } + return this.pattern.charCodeAt(isRight ? i : i + 1) + } +} diff --git a/src/state.js b/src/state.js index b90403f96..583ed7b3f 100644 --- a/src/state.js +++ b/src/state.js @@ -89,6 +89,9 @@ export class Parser { // Scope tracking for duplicate variable names (see scope.js) this.scopeStack = [] this.enterFunctionScope() + + // Lazy initialization + this.regexpValidator = null } // DEPRECATED Kept for backwards compatibility until 3.0 in case a plugin uses them diff --git a/src/tokenize.js b/src/tokenize.js index 71dd8403c..ef3302fe7 100644 --- a/src/tokenize.js +++ b/src/tokenize.js @@ -3,6 +3,7 @@ import {types as tt, keywords as keywordTypes} from "./tokentype" import {Parser} from "./state" import {SourceLocation} from "./locutil" import {lineBreak, lineBreakG, isNewLine, nonASCIIwhitespace} from "./whitespace" +import {RegExpValidator} from "./regexp" // Object type used to represent tokens. Note that normally, tokens // simply exist as properties on the parser object. This is only @@ -25,9 +26,6 @@ export class Token { const pp = Parser.prototype -// Are we running under Rhino? -const isRhino = typeof Packages == "object" && Object.prototype.toString.call(Packages) == "[object JavaPackage]" - // Move to the next token pp.next = function() { @@ -369,22 +367,6 @@ pp.finishOp = function(type, size) { return this.finishToken(type, str) } -// Parse a regular expression. Some context-awareness is necessary, -// since a '/' inside a '[]' set does not end the expression. - -function tryCreateRegexp(src, flags, throwErrorAt, parser) { - try { - return new RegExp(src, flags) - } catch (e) { - if (throwErrorAt !== undefined) { - if (e instanceof SyntaxError) parser.raise(throwErrorAt, "Error parsing regular expression: " + e.message) - throw e - } - } -} - -const regexpUnicodeSupport = !!tryCreateRegexp("\uffff", "u") - pp.readRegexp = function() { let escaped, inClass, start = this.pos for (;;) { @@ -399,55 +381,27 @@ pp.readRegexp = function() { } else escaped = false ++this.pos } - let content = this.input.slice(start, this.pos) + let pattern = this.input.slice(start, this.pos) ++this.pos let flagsStart = this.pos - let mods = this.readWord1() + let flags = this.readWord1() if (this.containsEsc) this.unexpected(flagsStart) - let tmp = content, tmpFlags = "" - if (mods) { - let validFlags = "gim" - if (this.options.ecmaVersion >= 6) validFlags += "uy" - if (this.options.ecmaVersion >= 9) validFlags += "s" - for (let i = 0; i < mods.length; i++) { - let mod = mods.charAt(i) - if (validFlags.indexOf(mod) == -1) this.raise(start, "Invalid regular expression flag") - if (mods.indexOf(mod, i + 1) > -1) this.raise(start, "Duplicate regular expression flag") - } - if (mods.indexOf("u") >= 0) { - if (regexpUnicodeSupport) { - tmpFlags = "u" - } else { - // Replace each astral symbol and every Unicode escape sequence that - // possibly represents an astral symbol or a paired surrogate with a - // single ASCII symbol to avoid throwing on regular expressions that - // are only valid in combination with the `/u` flag. - // Note: replacing with the ASCII symbol `x` might cause false - // negatives in unlikely scenarios. For example, `[\u{61}-b]` is a - // perfectly valid pattern that is equivalent to `[a-b]`, but it would - // be replaced by `[x-b]` which throws an error. - tmp = tmp.replace(/\\u\{([0-9a-fA-F]+)\}/g, (_match, code, offset) => { - code = Number("0x" + code) - if (code > 0x10FFFF) this.raise(start + offset + 3, "Code point out of bounds") - return "x" - }) - tmp = tmp.replace(/\\u([a-fA-F0-9]{4})|[\uD800-\uDBFF][\uDC00-\uDFFF]/g, "x") - tmpFlags = tmpFlags.replace("u", "") - } - } - } - // Detect invalid regular expressions. + // Validate pattern + const validator = this.regexpValidator || (this.regexpValidator = new RegExpValidator(this)) + validator.validateFlags(start, flags) + validator.validatePattern(start, pattern, flags.indexOf("u") !== -1) + + // Create Literal#value property value. let value = null - // Rhino's regular expression parser is flaky and throws uncatchable exceptions, - // so don't do detection if we are running under Rhino - if (!isRhino) { - tryCreateRegexp(tmp, tmpFlags, start, this) - // Get a regular expression object for this pattern-flag pair, or `null` in - // case the current environment doesn't support the flags it uses. - value = tryCreateRegexp(content, mods) + try { + value = new RegExp(pattern, flags) + } catch (e) { + // ESTree requires null if it failed to instantiate RegExp object. + // https://github.com/estree/estree/blob/a27003adf4fd7bfad44de9cef372a2eacd527b1c/es5.md#regexpliteral } - return this.finishToken(tt.regexp, {pattern: content, flags: mods, value: value}) + + return this.finishToken(tt.regexp, {pattern, flags, value}) } // Read an integer in the given radix. Return null if zero digits diff --git a/test/run.js b/test/run.js index 91ad384c7..3a94d85ea 100644 --- a/test/run.js +++ b/test/run.js @@ -12,6 +12,7 @@ require("./tests-directive.js"); require("./tests-rest-spread-properties.js"); require("./tests-async-iteration.js"); + require("./tests-regexp.js"); acorn = require("../dist/acorn") require("../dist/acorn_loose") } else { diff --git a/test/tests-regexp.js b/test/tests-regexp.js new file mode 100644 index 000000000..6b9b2656d --- /dev/null +++ b/test/tests-regexp.js @@ -0,0 +1,1347 @@ +if (typeof exports != "undefined") { + var test = require("./driver.js").test + var testFail = require("./driver.js").testFail +} + +test("/foo/", {}, { ecmaVersion: 5 }) +test("/foo/", {}, { ecmaVersion: 2015 }) +testFail("/foo/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/foo/u", {}, { ecmaVersion: 2015 }) +test("/foo|bar/", {}, { ecmaVersion: 5 }) +test("/foo|bar/", {}, { ecmaVersion: 2015 }) +testFail("/foo|bar/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/foo|bar/u", {}, { ecmaVersion: 2015 }) +test("/||||/", {}, { ecmaVersion: 5 }) +test("/||||/", {}, { ecmaVersion: 2015 }) +testFail("/||||/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/||||/u", {}, { ecmaVersion: 2015 }) +test("/^|$|\\b|\\B/", {}, { ecmaVersion: 5 }) +test("/^|$|\\b|\\B/", {}, { ecmaVersion: 2015 }) +testFail("/^|$|\\b|\\B/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/^|$|\\b|\\B/u", {}, { ecmaVersion: 2015 }) +testFail("/(/", "Invalid regular expression: /(/: Unterminated group (1:1)", { ecmaVersion: 5 }) +testFail("/(/", "Invalid regular expression: /(/: Unterminated group (1:1)", { ecmaVersion: 2015 }) +testFail("/(/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/(/u", "Invalid regular expression: /(/: Unterminated group (1:1)", { ecmaVersion: 2015 }) +testFail("/(?/", "Invalid regular expression: /(?/: Invalid group (1:1)", { ecmaVersion: 5 }) +testFail("/(?/", "Invalid regular expression: /(?/: Invalid group (1:1)", { ecmaVersion: 2015 }) +testFail("/(?/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/(?/u", "Invalid regular expression: /(?/: Invalid group (1:1)", { ecmaVersion: 2015 }) +testFail("/(?=/", "Invalid regular expression: /(?=/: Unterminated group (1:1)", { ecmaVersion: 5 }) +testFail("/(?=/", "Invalid regular expression: /(?=/: Unterminated group (1:1)", { ecmaVersion: 2015 }) +testFail("/(?=/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/(?=/u", "Invalid regular expression: /(?=/: Unterminated group (1:1)", { ecmaVersion: 2015 }) +test("/(?=)/", {}, { ecmaVersion: 5 }) +test("/(?=)/", {}, { ecmaVersion: 2015 }) +testFail("/(?=)/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/(?=)/u", {}, { ecmaVersion: 2015 }) +testFail("/(?=foo/", "Invalid regular expression: /(?=foo/: Unterminated group (1:1)", { ecmaVersion: 5 }) +testFail("/(?=foo/", "Invalid regular expression: /(?=foo/: Unterminated group (1:1)", { ecmaVersion: 2015 }) +testFail("/(?=foo/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/(?=foo/u", "Invalid regular expression: /(?=foo/: Unterminated group (1:1)", { ecmaVersion: 2015 }) +test("/(?=foo)/", {}, { ecmaVersion: 5 }) +test("/(?=foo)/", {}, { ecmaVersion: 2015 }) +testFail("/(?=foo)/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/(?=foo)/u", {}, { ecmaVersion: 2015 }) +testFail("/(?!/", "Invalid regular expression: /(?!/: Unterminated group (1:1)", { ecmaVersion: 5 }) +testFail("/(?!/", "Invalid regular expression: /(?!/: Unterminated group (1:1)", { ecmaVersion: 2015 }) +testFail("/(?!/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/(?!/u", "Invalid regular expression: /(?!/: Unterminated group (1:1)", { ecmaVersion: 2015 }) +test("/(?!)/", {}, { ecmaVersion: 5 }) +test("/(?!)/", {}, { ecmaVersion: 2015 }) +testFail("/(?!)/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/(?!)/u", {}, { ecmaVersion: 2015 }) +testFail("/(?!foo/", "Invalid regular expression: /(?!foo/: Unterminated group (1:1)", { ecmaVersion: 5 }) +testFail("/(?!foo/", "Invalid regular expression: /(?!foo/: Unterminated group (1:1)", { ecmaVersion: 2015 }) +testFail("/(?!foo/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/(?!foo/u", "Invalid regular expression: /(?!foo/: Unterminated group (1:1)", { ecmaVersion: 2015 }) +test("/(?!foo)/", {}, { ecmaVersion: 5 }) +test("/(?!foo)/", {}, { ecmaVersion: 2015 }) +testFail("/(?!foo)/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/(?!foo)/u", {}, { ecmaVersion: 2015 }) +test("/(?=a)*/", {}, { ecmaVersion: 5 }) +test("/(?=a)*/", {}, { ecmaVersion: 2015 }) +testFail("/(?=a)*/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/(?=a)*/u", "Invalid regular expression: /(?=a)*/: Invalid quantifier (1:1)", { ecmaVersion: 2015 }) +test("/(?=a)+/", {}, { ecmaVersion: 5 }) +test("/(?=a)+/", {}, { ecmaVersion: 2015 }) +testFail("/(?=a)+/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/(?=a)+/u", "Invalid regular expression: /(?=a)+/: Invalid quantifier (1:1)", { ecmaVersion: 2015 }) +test("/(?=a)?/", {}, { ecmaVersion: 5 }) +test("/(?=a)?/", {}, { ecmaVersion: 2015 }) +testFail("/(?=a)?/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/(?=a)?/u", "Invalid regular expression: /(?=a)?/: Invalid quantifier (1:1)", { ecmaVersion: 2015 }) +test("/(?=a){/", {}, { ecmaVersion: 5 }) +test("/(?=a){/", {}, { ecmaVersion: 2015 }) +testFail("/(?=a){/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/(?=a){/u", "Invalid regular expression: /(?=a){/: Incomplete quantifier (1:1)", { ecmaVersion: 2015 }) +test("/(?=a){}/", {}, { ecmaVersion: 5 }) +test("/(?=a){}/", {}, { ecmaVersion: 2015 }) +testFail("/(?=a){}/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/(?=a){}/u", "Invalid regular expression: /(?=a){}/: Incomplete quantifier (1:1)", { ecmaVersion: 2015 }) +test("/(?=a){a}/", {}, { ecmaVersion: 5 }) +test("/(?=a){a}/", {}, { ecmaVersion: 2015 }) +testFail("/(?=a){a}/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/(?=a){a}/u", "Invalid regular expression: /(?=a){a}/: Incomplete quantifier (1:1)", { ecmaVersion: 2015 }) +test("/(?=a){1}/", {}, { ecmaVersion: 5 }) +test("/(?=a){1}/", {}, { ecmaVersion: 2015 }) +testFail("/(?=a){1}/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/(?=a){1}/u", "Invalid regular expression: /(?=a){1}/: Invalid quantifier (1:1)", { ecmaVersion: 2015 }) +test("/(?=a){1,}/", {}, { ecmaVersion: 5 }) +test("/(?=a){1,}/", {}, { ecmaVersion: 2015 }) +testFail("/(?=a){1,}/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/(?=a){1,}/u", "Invalid regular expression: /(?=a){1,}/: Invalid quantifier (1:1)", { ecmaVersion: 2015 }) +test("/(?=a){1,2}/", {}, { ecmaVersion: 5 }) +test("/(?=a){1,2}/", {}, { ecmaVersion: 2015 }) +testFail("/(?=a){1,2}/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/(?=a){1,2}/u", "Invalid regular expression: /(?=a){1,2}/: Invalid quantifier (1:1)", { ecmaVersion: 2015 }) +test("/a*/", {}, { ecmaVersion: 5 }) +test("/a*/", {}, { ecmaVersion: 2015 }) +testFail("/a*/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/a*/u", {}, { ecmaVersion: 2015 }) +test("/a+/", {}, { ecmaVersion: 5 }) +test("/a+/", {}, { ecmaVersion: 2015 }) +testFail("/a+/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/a+/u", {}, { ecmaVersion: 2015 }) +test("/a?/", {}, { ecmaVersion: 5 }) +test("/a?/", {}, { ecmaVersion: 2015 }) +testFail("/a?/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/a?/u", {}, { ecmaVersion: 2015 }) +test("/a{/", {}, { ecmaVersion: 5 }) +test("/a{/", {}, { ecmaVersion: 2015 }) +testFail("/a{/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/a{/u", "Invalid regular expression: /a{/: Incomplete quantifier (1:1)", { ecmaVersion: 2015 }) +test("/a{}/", {}, { ecmaVersion: 5 }) +test("/a{}/", {}, { ecmaVersion: 2015 }) +testFail("/a{}/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/a{}/u", "Invalid regular expression: /a{}/: Incomplete quantifier (1:1)", { ecmaVersion: 2015 }) +test("/a{a}/", {}, { ecmaVersion: 5 }) +test("/a{a}/", {}, { ecmaVersion: 2015 }) +testFail("/a{a}/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/a{a}/u", "Invalid regular expression: /a{a}/: Incomplete quantifier (1:1)", { ecmaVersion: 2015 }) +test("/a{1}/", {}, { ecmaVersion: 5 }) +test("/a{1}/", {}, { ecmaVersion: 2015 }) +testFail("/a{1}/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/a{1}/u", {}, { ecmaVersion: 2015 }) +test("/a{1/", {}, { ecmaVersion: 5 }) +test("/a{1/", {}, { ecmaVersion: 2015 }) +testFail("/a{1/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/a{1/u", "Invalid regular expression: /a{1/: Incomplete quantifier (1:1)", { ecmaVersion: 2015 }) +test("/a{1,}/", {}, { ecmaVersion: 5 }) +test("/a{1,}/", {}, { ecmaVersion: 2015 }) +testFail("/a{1,}/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/a{1,}/u", {}, { ecmaVersion: 2015 }) +test("/a{1,/", {}, { ecmaVersion: 5 }) +test("/a{1,/", {}, { ecmaVersion: 2015 }) +testFail("/a{1,/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/a{1,/u", "Invalid regular expression: /a{1,/: Incomplete quantifier (1:1)", { ecmaVersion: 2015 }) +test("/a{1,2}/", {}, { ecmaVersion: 5 }) +test("/a{1,2}/", {}, { ecmaVersion: 2015 }) +testFail("/a{1,2}/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/a{1,2}/u", {}, { ecmaVersion: 2015 }) +test("/a{1,2/", {}, { ecmaVersion: 5 }) +test("/a{1,2/", {}, { ecmaVersion: 2015 }) +testFail("/a{1,2/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/a{1,2/u", "Invalid regular expression: /a{1,2/: Incomplete quantifier (1:1)", { ecmaVersion: 2015 }) +testFail("/a{2,1}/", "Invalid regular expression: /a{2,1}/: numbers out of order in {} quantifier (1:1)", { ecmaVersion: 5 }) +testFail("/a{2,1}/", "Invalid regular expression: /a{2,1}/: numbers out of order in {} quantifier (1:1)", { ecmaVersion: 2015 }) +testFail("/a{2,1}/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/a{2,1}/u", "Invalid regular expression: /a{2,1}/: numbers out of order in {} quantifier (1:1)", { ecmaVersion: 2015 }) +test("/a{2,1/", {}, { ecmaVersion: 5 }) +test("/a{2,1/", {}, { ecmaVersion: 2015 }) +testFail("/a{2,1/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/a{2,1/u", "Invalid regular expression: /a{2,1/: Incomplete quantifier (1:1)", { ecmaVersion: 2015 }) +testFail("/(a{2,1}/", "Invalid regular expression: /(a{2,1}/: numbers out of order in {} quantifier (1:1)", { ecmaVersion: 5 }) +testFail("/(a{2,1}/", "Invalid regular expression: /(a{2,1}/: numbers out of order in {} quantifier (1:1)", { ecmaVersion: 2015 }) +testFail("/(a{2,1}/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/(a{2,1}/u", "Invalid regular expression: /(a{2,1}/: numbers out of order in {} quantifier (1:1)", { ecmaVersion: 2015 }) +test("/a*?/", {}, { ecmaVersion: 5 }) +test("/a*?/", {}, { ecmaVersion: 2015 }) +testFail("/a*?/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/a*?/u", {}, { ecmaVersion: 2015 }) +test("/a+?/", {}, { ecmaVersion: 5 }) +test("/a+?/", {}, { ecmaVersion: 2015 }) +testFail("/a+?/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/a+?/u", {}, { ecmaVersion: 2015 }) +test("/a??/", {}, { ecmaVersion: 5 }) +test("/a??/", {}, { ecmaVersion: 2015 }) +testFail("/a??/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/a??/u", {}, { ecmaVersion: 2015 }) +test("/a{?/", {}, { ecmaVersion: 5 }) +test("/a{?/", {}, { ecmaVersion: 2015 }) +testFail("/a{?/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/a{?/u", "Invalid regular expression: /a{?/: Incomplete quantifier (1:1)", { ecmaVersion: 2015 }) +test("/a{}?/", {}, { ecmaVersion: 5 }) +test("/a{}?/", {}, { ecmaVersion: 2015 }) +testFail("/a{}?/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/a{}?/u", "Invalid regular expression: /a{}?/: Incomplete quantifier (1:1)", { ecmaVersion: 2015 }) +test("/a{a}?/", {}, { ecmaVersion: 5 }) +test("/a{a}?/", {}, { ecmaVersion: 2015 }) +testFail("/a{a}?/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/a{a}?/u", "Invalid regular expression: /a{a}?/: Incomplete quantifier (1:1)", { ecmaVersion: 2015 }) +test("/a{1}?/", {}, { ecmaVersion: 5 }) +test("/a{1}?/", {}, { ecmaVersion: 2015 }) +testFail("/a{1}?/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/a{1}?/u", {}, { ecmaVersion: 2015 }) +test("/a{1?/", {}, { ecmaVersion: 5 }) +test("/a{1?/", {}, { ecmaVersion: 2015 }) +testFail("/a{1?/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/a{1?/u", "Invalid regular expression: /a{1?/: Incomplete quantifier (1:1)", { ecmaVersion: 2015 }) +test("/a{1,}?/", {}, { ecmaVersion: 5 }) +test("/a{1,}?/", {}, { ecmaVersion: 2015 }) +testFail("/a{1,}?/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/a{1,}?/u", {}, { ecmaVersion: 2015 }) +test("/a{1,?/", {}, { ecmaVersion: 5 }) +test("/a{1,?/", {}, { ecmaVersion: 2015 }) +testFail("/a{1,?/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/a{1,?/u", "Invalid regular expression: /a{1,?/: Incomplete quantifier (1:1)", { ecmaVersion: 2015 }) +test("/a{1,2}?/", {}, { ecmaVersion: 5 }) +test("/a{1,2}?/", {}, { ecmaVersion: 2015 }) +testFail("/a{1,2}?/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/a{1,2}?/u", {}, { ecmaVersion: 2015 }) +test("/a{1,2?/", {}, { ecmaVersion: 5 }) +test("/a{1,2?/", {}, { ecmaVersion: 2015 }) +testFail("/a{1,2?/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/a{1,2?/u", "Invalid regular expression: /a{1,2?/: Incomplete quantifier (1:1)", { ecmaVersion: 2015 }) +testFail("/a{2,1}?/", "Invalid regular expression: /a{2,1}?/: numbers out of order in {} quantifier (1:1)", { ecmaVersion: 5 }) +testFail("/a{2,1}?/", "Invalid regular expression: /a{2,1}?/: numbers out of order in {} quantifier (1:1)", { ecmaVersion: 2015 }) +testFail("/a{2,1}?/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/a{2,1}?/u", "Invalid regular expression: /a{2,1}?/: numbers out of order in {} quantifier (1:1)", { ecmaVersion: 2015 }) +test("/a{2,1?/", {}, { ecmaVersion: 5 }) +test("/a{2,1?/", {}, { ecmaVersion: 2015 }) +testFail("/a{2,1?/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/a{2,1?/u", "Invalid regular expression: /a{2,1?/: Incomplete quantifier (1:1)", { ecmaVersion: 2015 }) +test("/👍🚀❇️/", {}, { ecmaVersion: 5 }) +test("/👍🚀❇️/", {}, { ecmaVersion: 2015 }) +testFail("/👍🚀❇️/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/👍🚀❇️/u", {}, { ecmaVersion: 2015 }) +test("/^/", {}, { ecmaVersion: 5 }) +test("/^/", {}, { ecmaVersion: 2015 }) +testFail("/^/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/^/u", {}, { ecmaVersion: 2015 }) +test("/$/", {}, { ecmaVersion: 5 }) +test("/$/", {}, { ecmaVersion: 2015 }) +testFail("/$/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/$/u", {}, { ecmaVersion: 2015 }) +test("/./", {}, { ecmaVersion: 5 }) +test("/./", {}, { ecmaVersion: 2015 }) +testFail("/./u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/./u", {}, { ecmaVersion: 2015 }) +testFail("/(*)/", "Invalid regular expression: /(*)/: Nothing to repeat (1:1)", { ecmaVersion: 5 }) +testFail("/(*)/", "Invalid regular expression: /(*)/: Nothing to repeat (1:1)", { ecmaVersion: 2015 }) +testFail("/(*)/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/(*)/u", "Invalid regular expression: /(*)/: Nothing to repeat (1:1)", { ecmaVersion: 2015 }) +testFail("/+/", "Invalid regular expression: /+/: Nothing to repeat (1:1)", { ecmaVersion: 5 }) +testFail("/+/", "Invalid regular expression: /+/: Nothing to repeat (1:1)", { ecmaVersion: 2015 }) +testFail("/+/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/+/u", "Invalid regular expression: /+/: Nothing to repeat (1:1)", { ecmaVersion: 2015 }) +testFail("/?/", "Invalid regular expression: /?/: Nothing to repeat (1:1)", { ecmaVersion: 5 }) +testFail("/?/", "Invalid regular expression: /?/: Nothing to repeat (1:1)", { ecmaVersion: 2015 }) +testFail("/?/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/?/u", "Invalid regular expression: /?/: Nothing to repeat (1:1)", { ecmaVersion: 2015 }) +testFail("/(/", "Invalid regular expression: /(/: Unterminated group (1:1)", { ecmaVersion: 5 }) +testFail("/(/", "Invalid regular expression: /(/: Unterminated group (1:1)", { ecmaVersion: 2015 }) +testFail("/(/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/(/u", "Invalid regular expression: /(/: Unterminated group (1:1)", { ecmaVersion: 2015 }) +testFail("/)/", "Invalid regular expression: /)/: Unmatched ')' (1:1)", { ecmaVersion: 5 }) +testFail("/)/", "Invalid regular expression: /)/: Unmatched ')' (1:1)", { ecmaVersion: 2015 }) +testFail("/)/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/)/u", "Invalid regular expression: /)/: Unmatched ')' (1:1)", { ecmaVersion: 2015 }) +testFail("/[/", "Unterminated regular expression (1:1)", { ecmaVersion: 5 }) +testFail("/[/", "Unterminated regular expression (1:1)", { ecmaVersion: 2015 }) +testFail("/[/u", "Unterminated regular expression (1:1)", { ecmaVersion: 5 }) +testFail("/[/u", "Unterminated regular expression (1:1)", { ecmaVersion: 2015 }) +test("/]/", {}, { ecmaVersion: 5 }) +test("/]/", {}, { ecmaVersion: 2015 }) +testFail("/]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/]/u", "Invalid regular expression: /]/: Lone quantifier brackets (1:1)", { ecmaVersion: 2015 }) +test("/{/", {}, { ecmaVersion: 5 }) +test("/{/", {}, { ecmaVersion: 2015 }) +testFail("/{/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/{/u", "Invalid regular expression: /{/: Lone quantifier brackets (1:1)", { ecmaVersion: 2015 }) +test("/}/", {}, { ecmaVersion: 5 }) +test("/}/", {}, { ecmaVersion: 2015 }) +testFail("/}/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/}/u", "Invalid regular expression: /}/: Lone quantifier brackets (1:1)", { ecmaVersion: 2015 }) +test("/|/", {}, { ecmaVersion: 5 }) +test("/|/", {}, { ecmaVersion: 2015 }) +testFail("/|/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/|/u", {}, { ecmaVersion: 2015 }) +testFail("/^*/", "Invalid regular expression: /^*/: Nothing to repeat (1:1)", { ecmaVersion: 5 }) +testFail("/^*/", "Invalid regular expression: /^*/: Nothing to repeat (1:1)", { ecmaVersion: 2015 }) +testFail("/^*/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/^*/u", "Invalid regular expression: /^*/: Nothing to repeat (1:1)", { ecmaVersion: 2015 }) +testFail("/$*/", "Invalid regular expression: /$*/: Nothing to repeat (1:1)", { ecmaVersion: 5 }) +testFail("/$*/", "Invalid regular expression: /$*/: Nothing to repeat (1:1)", { ecmaVersion: 2015 }) +testFail("/$*/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/$*/u", "Invalid regular expression: /$*/: Nothing to repeat (1:1)", { ecmaVersion: 2015 }) +test("/${1,2/", {}, { ecmaVersion: 5 }) +test("/${1,2/", {}, { ecmaVersion: 2015 }) +testFail("/${1,2/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/${1,2/u", "Invalid regular expression: /${1,2/: Lone quantifier brackets (1:1)", { ecmaVersion: 2015 }) +testFail("/${1,2}/", "Invalid regular expression: /${1,2}/: Nothing to repeat (1:1)", { ecmaVersion: 5 }) +testFail("/${1,2}/", "Invalid regular expression: /${1,2}/: Nothing to repeat (1:1)", { ecmaVersion: 2015 }) +testFail("/${1,2}/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/${1,2}/u", "Invalid regular expression: /${1,2}/: Nothing to repeat (1:1)", { ecmaVersion: 2015 }) +testFail("/${2,1}/", "Invalid regular expression: /${2,1}/: Nothing to repeat (1:1)", { ecmaVersion: 5 }) +testFail("/${2,1}/", "Invalid regular expression: /${2,1}/: Nothing to repeat (1:1)", { ecmaVersion: 2015 }) +testFail("/${2,1}/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/${2,1}/u", "Invalid regular expression: /${2,1}/: Nothing to repeat (1:1)", { ecmaVersion: 2015 }) +test("/\\1/", {}, { ecmaVersion: 5 }) +test("/\\1/", {}, { ecmaVersion: 2015 }) +testFail("/\\1/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/\\1/u", "Invalid regular expression: /\\1/: Invalid escape (1:1)", { ecmaVersion: 2015 }) +test("/(a)\\1/", {}, { ecmaVersion: 5 }) +test("/(a)\\1/", {}, { ecmaVersion: 2015 }) +testFail("/(a)\\1/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/(a)\\1/u", {}, { ecmaVersion: 2015 }) +test("/\\1(a)/", {}, { ecmaVersion: 5 }) +test("/\\1(a)/", {}, { ecmaVersion: 2015 }) +testFail("/\\1(a)/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/\\1(a)/u", {}, { ecmaVersion: 2015 }) +testFail("/\\2(a)(/", "Invalid regular expression: /\\2(a)(/: Unterminated group (1:1)", { ecmaVersion: 5 }) +testFail("/\\2(a)(/", "Invalid regular expression: /\\2(a)(/: Unterminated group (1:1)", { ecmaVersion: 2015 }) +testFail("/\\2(a)(/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/\\2(a)(/u", "Invalid regular expression: /\\2(a)(/: Unterminated group (1:1)", { ecmaVersion: 2015 }) +test("/(?:a)\\1/", {}, { ecmaVersion: 5 }) +test("/(?:a)\\1/", {}, { ecmaVersion: 2015 }) +testFail("/(?:a)\\1/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/(?:a)\\1/u", "Invalid regular expression: /(?:a)\\1/: Invalid escape (1:1)", { ecmaVersion: 2015 }) +test("/(a)\\2/", {}, { ecmaVersion: 5 }) +test("/(a)\\2/", {}, { ecmaVersion: 2015 }) +testFail("/(a)\\2/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/(a)\\2/u", "Invalid regular expression: /(a)\\2/: Invalid escape (1:1)", { ecmaVersion: 2015 }) +test("/(?:a)\\2/", {}, { ecmaVersion: 5 }) +test("/(?:a)\\2/", {}, { ecmaVersion: 2015 }) +testFail("/(?:a)\\2/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/(?:a)\\2/u", "Invalid regular expression: /(?:a)\\2/: Invalid escape (1:1)", { ecmaVersion: 2015 }) +test("/(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)\\10/", {}, { ecmaVersion: 5 }) +test("/(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)\\10/", {}, { ecmaVersion: 2015 }) +testFail("/(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)\\10/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)\\10/u", {}, { ecmaVersion: 2015 }) +test("/(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)\\11/", {}, { ecmaVersion: 5 }) +test("/(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)\\11/", {}, { ecmaVersion: 2015 }) +testFail("/(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)\\11/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)\\11/u", "Invalid regular expression: /(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)\\11/: Invalid escape (1:1)", { ecmaVersion: 2015 }) +test("/(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)\\11/", {}, { ecmaVersion: 5 }) +test("/(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)\\11/", {}, { ecmaVersion: 2015 }) +testFail("/(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)\\11/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)\\11/u", {}, { ecmaVersion: 2015 }) +testFail("/(?/", "Invalid regular expression: /(?/: Invalid group (1:1)", { ecmaVersion: 5 }) +testFail("/(?/", "Invalid regular expression: /(?/: Invalid group (1:1)", { ecmaVersion: 2015 }) +testFail("/(?/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/(?/u", "Invalid regular expression: /(?/: Invalid group (1:1)", { ecmaVersion: 2015 }) +testFail("/(?a/", "Invalid regular expression: /(?a/: Invalid group (1:1)", { ecmaVersion: 5 }) +testFail("/(?a/", "Invalid regular expression: /(?a/: Invalid group (1:1)", { ecmaVersion: 2015 }) +testFail("/(?a/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/(?a/u", "Invalid regular expression: /(?a/: Invalid group (1:1)", { ecmaVersion: 2015 }) +testFail("/(?a)/", "Invalid regular expression: /(?a)/: Invalid group (1:1)", { ecmaVersion: 5 }) +testFail("/(?a)/", "Invalid regular expression: /(?a)/: Invalid group (1:1)", { ecmaVersion: 2015 }) +testFail("/(?a)/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/(?a)/u", "Invalid regular expression: /(?a)/: Invalid group (1:1)", { ecmaVersion: 2015 }) +testFail("/(?:/", "Invalid regular expression: /(?:/: Unterminated group (1:1)", { ecmaVersion: 5 }) +testFail("/(?:/", "Invalid regular expression: /(?:/: Unterminated group (1:1)", { ecmaVersion: 2015 }) +testFail("/(?:/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/(?:/u", "Invalid regular expression: /(?:/: Unterminated group (1:1)", { ecmaVersion: 2015 }) +testFail("/(?:a/", "Invalid regular expression: /(?:a/: Unterminated group (1:1)", { ecmaVersion: 5 }) +testFail("/(?:a/", "Invalid regular expression: /(?:a/: Unterminated group (1:1)", { ecmaVersion: 2015 }) +testFail("/(?:a/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/(?:a/u", "Invalid regular expression: /(?:a/: Unterminated group (1:1)", { ecmaVersion: 2015 }) +test("/(?:a)/", {}, { ecmaVersion: 5 }) +test("/(?:a)/", {}, { ecmaVersion: 2015 }) +testFail("/(?:a)/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/(?:a)/u", {}, { ecmaVersion: 2015 }) +testFail("/(:a/", "Invalid regular expression: /(:a/: Unterminated group (1:1)", { ecmaVersion: 5 }) +testFail("/(:a/", "Invalid regular expression: /(:a/: Unterminated group (1:1)", { ecmaVersion: 2015 }) +testFail("/(:a/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/(:a/u", "Invalid regular expression: /(:a/: Unterminated group (1:1)", { ecmaVersion: 2015 }) +test("/\\d/", {}, { ecmaVersion: 5 }) +test("/\\d/", {}, { ecmaVersion: 2015 }) +testFail("/\\d/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/\\d/u", {}, { ecmaVersion: 2015 }) +test("/\\D/", {}, { ecmaVersion: 5 }) +test("/\\D/", {}, { ecmaVersion: 2015 }) +testFail("/\\D/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/\\D/u", {}, { ecmaVersion: 2015 }) +test("/\\s/", {}, { ecmaVersion: 5 }) +test("/\\s/", {}, { ecmaVersion: 2015 }) +testFail("/\\s/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/\\s/u", {}, { ecmaVersion: 2015 }) +test("/\\S/", {}, { ecmaVersion: 5 }) +test("/\\S/", {}, { ecmaVersion: 2015 }) +testFail("/\\S/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/\\S/u", {}, { ecmaVersion: 2015 }) +test("/\\w/", {}, { ecmaVersion: 5 }) +test("/\\w/", {}, { ecmaVersion: 2015 }) +testFail("/\\w/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/\\w/u", {}, { ecmaVersion: 2015 }) +test("/\\W/", {}, { ecmaVersion: 5 }) +test("/\\W/", {}, { ecmaVersion: 2015 }) +testFail("/\\W/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/\\W/u", {}, { ecmaVersion: 2015 }) +test("/\\f/", {}, { ecmaVersion: 5 }) +test("/\\f/", {}, { ecmaVersion: 2015 }) +testFail("/\\f/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/\\f/u", {}, { ecmaVersion: 2015 }) +test("/\\n/", {}, { ecmaVersion: 5 }) +test("/\\n/", {}, { ecmaVersion: 2015 }) +testFail("/\\n/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/\\n/u", {}, { ecmaVersion: 2015 }) +test("/\\r/", {}, { ecmaVersion: 5 }) +test("/\\r/", {}, { ecmaVersion: 2015 }) +testFail("/\\r/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/\\r/u", {}, { ecmaVersion: 2015 }) +test("/\\t/", {}, { ecmaVersion: 5 }) +test("/\\t/", {}, { ecmaVersion: 2015 }) +testFail("/\\t/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/\\t/u", {}, { ecmaVersion: 2015 }) +test("/\\v/", {}, { ecmaVersion: 5 }) +test("/\\v/", {}, { ecmaVersion: 2015 }) +testFail("/\\v/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/\\v/u", {}, { ecmaVersion: 2015 }) +test("/\\cA/", {}, { ecmaVersion: 5 }) +test("/\\cA/", {}, { ecmaVersion: 2015 }) +testFail("/\\cA/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/\\cA/u", {}, { ecmaVersion: 2015 }) +test("/\\cz/", {}, { ecmaVersion: 5 }) +test("/\\cz/", {}, { ecmaVersion: 2015 }) +testFail("/\\cz/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/\\cz/u", {}, { ecmaVersion: 2015 }) +test("/\\c1/", {}, { ecmaVersion: 5 }) +test("/\\c1/", {}, { ecmaVersion: 2015 }) +testFail("/\\c1/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/\\c1/u", "Invalid regular expression: /\\c1/: Invalid unicode escape (1:1)", { ecmaVersion: 2015 }) +test("/\\c/", {}, { ecmaVersion: 5 }) +test("/\\c/", {}, { ecmaVersion: 2015 }) +testFail("/\\c/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/\\c/u", "Invalid regular expression: /\\c/: Invalid unicode escape (1:1)", { ecmaVersion: 2015 }) +test("/\\0/", {}, { ecmaVersion: 5 }) +test("/\\0/", {}, { ecmaVersion: 2015 }) +testFail("/\\0/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/\\0/u", {}, { ecmaVersion: 2015 }) +test("/\\u/", {}, { ecmaVersion: 5 }) +test("/\\u/", {}, { ecmaVersion: 2015 }) +testFail("/\\u/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/\\u/u", "Invalid regular expression: /\\u/: Invalid unicode escape (1:1)", { ecmaVersion: 2015 }) +test("/\\u1/", {}, { ecmaVersion: 5 }) +test("/\\u1/", {}, { ecmaVersion: 2015 }) +testFail("/\\u1/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/\\u1/u", "Invalid regular expression: /\\u1/: Invalid unicode escape (1:1)", { ecmaVersion: 2015 }) +test("/\\u12/", {}, { ecmaVersion: 5 }) +test("/\\u12/", {}, { ecmaVersion: 2015 }) +testFail("/\\u12/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/\\u12/u", "Invalid regular expression: /\\u12/: Invalid unicode escape (1:1)", { ecmaVersion: 2015 }) +test("/\\u123/", {}, { ecmaVersion: 5 }) +test("/\\u123/", {}, { ecmaVersion: 2015 }) +testFail("/\\u123/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/\\u123/u", "Invalid regular expression: /\\u123/: Invalid unicode escape (1:1)", { ecmaVersion: 2015 }) +test("/\\u1234/", {}, { ecmaVersion: 5 }) +test("/\\u1234/", {}, { ecmaVersion: 2015 }) +testFail("/\\u1234/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/\\u1234/u", {}, { ecmaVersion: 2015 }) +test("/\\u12345/", {}, { ecmaVersion: 5 }) +test("/\\u12345/", {}, { ecmaVersion: 2015 }) +testFail("/\\u12345/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/\\u12345/u", {}, { ecmaVersion: 2015 }) +test("/\\u{/", {}, { ecmaVersion: 5 }) +test("/\\u{/", {}, { ecmaVersion: 2015 }) +testFail("/\\u{/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/\\u{/u", "Invalid regular expression: /\\u{/: Invalid unicode escape (1:1)", { ecmaVersion: 2015 }) +test("/\\u{z/", {}, { ecmaVersion: 5 }) +test("/\\u{z/", {}, { ecmaVersion: 2015 }) +testFail("/\\u{z/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/\\u{z/u", "Invalid regular expression: /\\u{z/: Invalid unicode escape (1:1)", { ecmaVersion: 2015 }) +test("/\\u{a}/", {}, { ecmaVersion: 5 }) +test("/\\u{a}/", {}, { ecmaVersion: 2015 }) +testFail("/\\u{a}/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/\\u{a}/u", {}, { ecmaVersion: 2015 }) +test("/\\u{20/", {}, { ecmaVersion: 5 }) +test("/\\u{20/", {}, { ecmaVersion: 2015 }) +testFail("/\\u{20/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/\\u{20/u", "Invalid regular expression: /\\u{20/: Invalid unicode escape (1:1)", { ecmaVersion: 2015 }) +test("/\\u{20}/", {}, { ecmaVersion: 5 }) +test("/\\u{20}/", {}, { ecmaVersion: 2015 }) +testFail("/\\u{20}/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/\\u{20}/u", {}, { ecmaVersion: 2015 }) +test("/\\u{10FFFF}/", {}, { ecmaVersion: 5 }) +test("/\\u{10FFFF}/", {}, { ecmaVersion: 2015 }) +testFail("/\\u{10FFFF}/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/\\u{10FFFF}/u", {}, { ecmaVersion: 2015 }) +test("/\\u{110000}/", {}, { ecmaVersion: 5 }) +test("/\\u{110000}/", {}, { ecmaVersion: 2015 }) +testFail("/\\u{110000}/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/\\u{110000}/u", "Invalid regular expression: /\\u{110000}/: Invalid unicode escape (1:1)", { ecmaVersion: 2015 }) +test("/\\u{00000001}/", {}, { ecmaVersion: 5 }) +test("/\\u{00000001}/", {}, { ecmaVersion: 2015 }) +testFail("/\\u{00000001}/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/\\u{00000001}/u", {}, { ecmaVersion: 2015 }) +test("/\\377/", {}, { ecmaVersion: 5 }) +test("/\\377/", {}, { ecmaVersion: 2015 }) +testFail("/\\377/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/\\377/u", "Invalid regular expression: /\\377/: Invalid escape (1:1)", { ecmaVersion: 2015 }) +test("/\\400/", {}, { ecmaVersion: 5 }) +test("/\\400/", {}, { ecmaVersion: 2015 }) +testFail("/\\400/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/\\400/u", "Invalid regular expression: /\\400/: Invalid escape (1:1)", { ecmaVersion: 2015 }) +test("/\\^/", {}, { ecmaVersion: 5 }) +test("/\\^/", {}, { ecmaVersion: 2015 }) +testFail("/\\^/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/\\^/u", {}, { ecmaVersion: 2015 }) +test("/\\$/", {}, { ecmaVersion: 5 }) +test("/\\$/", {}, { ecmaVersion: 2015 }) +testFail("/\\$/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/\\$/u", {}, { ecmaVersion: 2015 }) +test("/\\./", {}, { ecmaVersion: 5 }) +test("/\\./", {}, { ecmaVersion: 2015 }) +testFail("/\\./u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/\\./u", {}, { ecmaVersion: 2015 }) +test("/\\+/", {}, { ecmaVersion: 5 }) +test("/\\+/", {}, { ecmaVersion: 2015 }) +testFail("/\\+/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/\\+/u", {}, { ecmaVersion: 2015 }) +test("/\\?/", {}, { ecmaVersion: 5 }) +test("/\\?/", {}, { ecmaVersion: 2015 }) +testFail("/\\?/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/\\?/u", {}, { ecmaVersion: 2015 }) +test("/\\(/", {}, { ecmaVersion: 5 }) +test("/\\(/", {}, { ecmaVersion: 2015 }) +testFail("/\\(/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/\\(/u", {}, { ecmaVersion: 2015 }) +test("/\\)/", {}, { ecmaVersion: 5 }) +test("/\\)/", {}, { ecmaVersion: 2015 }) +testFail("/\\)/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/\\)/u", {}, { ecmaVersion: 2015 }) +test("/\\[/", {}, { ecmaVersion: 5 }) +test("/\\[/", {}, { ecmaVersion: 2015 }) +testFail("/\\[/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/\\[/u", {}, { ecmaVersion: 2015 }) +test("/\\]/", {}, { ecmaVersion: 5 }) +test("/\\]/", {}, { ecmaVersion: 2015 }) +testFail("/\\]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/\\]/u", {}, { ecmaVersion: 2015 }) +test("/\\{/", {}, { ecmaVersion: 5 }) +test("/\\{/", {}, { ecmaVersion: 2015 }) +testFail("/\\{/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/\\{/u", {}, { ecmaVersion: 2015 }) +test("/\\}/", {}, { ecmaVersion: 5 }) +test("/\\}/", {}, { ecmaVersion: 2015 }) +testFail("/\\}/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/\\}/u", {}, { ecmaVersion: 2015 }) +test("/\\|/", {}, { ecmaVersion: 5 }) +test("/\\|/", {}, { ecmaVersion: 2015 }) +testFail("/\\|/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/\\|/u", {}, { ecmaVersion: 2015 }) +test("/\\//", {}, { ecmaVersion: 5 }) +test("/\\//", {}, { ecmaVersion: 2015 }) +testFail("/\\//u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/\\//u", {}, { ecmaVersion: 2015 }) +test("/\\a/", {}, { ecmaVersion: 5 }) +test("/\\a/", {}, { ecmaVersion: 2015 }) +testFail("/\\a/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/\\a/u", "Invalid regular expression: /\\a/: Invalid escape (1:1)", { ecmaVersion: 2015 }) +test("/\\s/", {}, { ecmaVersion: 5 }) +test("/\\s/", {}, { ecmaVersion: 2015 }) +testFail("/\\s/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/\\s/u", {}, { ecmaVersion: 2015 }) +test("/[]/", {}, { ecmaVersion: 5 }) +test("/[]/", {}, { ecmaVersion: 2015 }) +testFail("/[]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[]/u", {}, { ecmaVersion: 2015 }) +test("/[^-a-b-]/", {}, { ecmaVersion: 5 }) +test("/[^-a-b-]/", {}, { ecmaVersion: 2015 }) +testFail("/[^-a-b-]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[^-a-b-]/u", {}, { ecmaVersion: 2015 }) +test("/[-]/", {}, { ecmaVersion: 5 }) +test("/[-]/", {}, { ecmaVersion: 2015 }) +testFail("/[-]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[-]/u", {}, { ecmaVersion: 2015 }) +test("/[a]/", {}, { ecmaVersion: 5 }) +test("/[a]/", {}, { ecmaVersion: 2015 }) +testFail("/[a]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[a]/u", {}, { ecmaVersion: 2015 }) +test("/[--]/", {}, { ecmaVersion: 5 }) +test("/[--]/", {}, { ecmaVersion: 2015 }) +testFail("/[--]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[--]/u", {}, { ecmaVersion: 2015 }) +test("/[-a]/", {}, { ecmaVersion: 5 }) +test("/[-a]/", {}, { ecmaVersion: 2015 }) +testFail("/[-a]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[-a]/u", {}, { ecmaVersion: 2015 }) +test("/[-a-]/", {}, { ecmaVersion: 5 }) +test("/[-a-]/", {}, { ecmaVersion: 2015 }) +testFail("/[-a-]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[-a-]/u", {}, { ecmaVersion: 2015 }) +test("/[a-]/", {}, { ecmaVersion: 5 }) +test("/[a-]/", {}, { ecmaVersion: 2015 }) +testFail("/[a-]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[a-]/u", {}, { ecmaVersion: 2015 }) +test("/[a-b]/", {}, { ecmaVersion: 5 }) +test("/[a-b]/", {}, { ecmaVersion: 2015 }) +testFail("/[a-b]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[a-b]/u", {}, { ecmaVersion: 2015 }) +test("/[-a-b-]/", {}, { ecmaVersion: 5 }) +test("/[-a-b-]/", {}, { ecmaVersion: 2015 }) +testFail("/[-a-b-]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[-a-b-]/u", {}, { ecmaVersion: 2015 }) +test("/[---]/", {}, { ecmaVersion: 5 }) +test("/[---]/", {}, { ecmaVersion: 2015 }) +testFail("/[---]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[---]/u", {}, { ecmaVersion: 2015 }) +testFail("/[b-a]/", "Invalid regular expression: /[b-a]/: Range out of order in character class (1:1)", { ecmaVersion: 5 }) +testFail("/[b-a]/", "Invalid regular expression: /[b-a]/: Range out of order in character class (1:1)", { ecmaVersion: 2015 }) +testFail("/[b-a]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/[b-a]/u", "Invalid regular expression: /[b-a]/: Range out of order in character class (1:1)", { ecmaVersion: 2015 }) +test("/[a-b--/]/", {}, { ecmaVersion: 5 }) +test("/[a-b--/]/", {}, { ecmaVersion: 2015 }) +testFail("/[a-b--/]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[a-b--/]/u", {}, { ecmaVersion: 2015 }) +testFail("/[a-b--+]/", "Invalid regular expression: /[a-b--+]/: Range out of order in character class (1:1)", { ecmaVersion: 5 }) +testFail("/[a-b--+]/", "Invalid regular expression: /[a-b--+]/: Range out of order in character class (1:1)", { ecmaVersion: 2015 }) +testFail("/[a-b--+]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/[a-b--+]/u", "Invalid regular expression: /[a-b--+]/: Range out of order in character class (1:1)", { ecmaVersion: 2015 }) +test("/[\\b-\\n]/", {}, { ecmaVersion: 5 }) +test("/[\\b-\\n]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\b-\\n]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[\\b-\\n]/u", {}, { ecmaVersion: 2015 }) +test("/[b\\-a]/", {}, { ecmaVersion: 5 }) +test("/[b\\-a]/", {}, { ecmaVersion: 2015 }) +testFail("/[b\\-a]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[b\\-a]/u", {}, { ecmaVersion: 2015 }) +test("/[\\d]/", {}, { ecmaVersion: 5 }) +test("/[\\d]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\d]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[\\d]/u", {}, { ecmaVersion: 2015 }) +test("/[\\D]/", {}, { ecmaVersion: 5 }) +test("/[\\D]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\D]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[\\D]/u", {}, { ecmaVersion: 2015 }) +test("/[\\s]/", {}, { ecmaVersion: 5 }) +test("/[\\s]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\s]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[\\s]/u", {}, { ecmaVersion: 2015 }) +test("/[\\S]/", {}, { ecmaVersion: 5 }) +test("/[\\S]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\S]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[\\S]/u", {}, { ecmaVersion: 2015 }) +test("/[\\w]/", {}, { ecmaVersion: 5 }) +test("/[\\w]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\w]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[\\w]/u", {}, { ecmaVersion: 2015 }) +test("/[\\W]/", {}, { ecmaVersion: 5 }) +test("/[\\W]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\W]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[\\W]/u", {}, { ecmaVersion: 2015 }) +test("/[\\d]/", {}, { ecmaVersion: 5 }) +test("/[\\d]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\d]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[\\d]/u", {}, { ecmaVersion: 2015 }) +test("/[\\D]/", {}, { ecmaVersion: 5 }) +test("/[\\D]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\D]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[\\D]/u", {}, { ecmaVersion: 2015 }) +test("/[\\s]/", {}, { ecmaVersion: 5 }) +test("/[\\s]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\s]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[\\s]/u", {}, { ecmaVersion: 2015 }) +test("/[\\S]/", {}, { ecmaVersion: 5 }) +test("/[\\S]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\S]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[\\S]/u", {}, { ecmaVersion: 2015 }) +test("/[\\w]/", {}, { ecmaVersion: 5 }) +test("/[\\w]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\w]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[\\w]/u", {}, { ecmaVersion: 2015 }) +test("/[\\W]/", {}, { ecmaVersion: 5 }) +test("/[\\W]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\W]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[\\W]/u", {}, { ecmaVersion: 2015 }) +test("/[\\f]/", {}, { ecmaVersion: 5 }) +test("/[\\f]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\f]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[\\f]/u", {}, { ecmaVersion: 2015 }) +test("/[\\n]/", {}, { ecmaVersion: 5 }) +test("/[\\n]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\n]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[\\n]/u", {}, { ecmaVersion: 2015 }) +test("/[\\r]/", {}, { ecmaVersion: 5 }) +test("/[\\r]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\r]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[\\r]/u", {}, { ecmaVersion: 2015 }) +test("/[\\t]/", {}, { ecmaVersion: 5 }) +test("/[\\t]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\t]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[\\t]/u", {}, { ecmaVersion: 2015 }) +test("/[\\v]/", {}, { ecmaVersion: 5 }) +test("/[\\v]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\v]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[\\v]/u", {}, { ecmaVersion: 2015 }) +test("/[\\cA]/", {}, { ecmaVersion: 5 }) +test("/[\\cA]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\cA]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[\\cA]/u", {}, { ecmaVersion: 2015 }) +test("/[\\cz]/", {}, { ecmaVersion: 5 }) +test("/[\\cz]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\cz]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[\\cz]/u", {}, { ecmaVersion: 2015 }) +test("/[\\c1]/", {}, { ecmaVersion: 5 }) +test("/[\\c1]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\c1]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/[\\c1]/u", "Invalid regular expression: /[\\c1]/: Invalid class escape (1:1)", { ecmaVersion: 2015 }) +test("/[\\c]/", {}, { ecmaVersion: 5 }) +test("/[\\c]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\c]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/[\\c]/u", "Invalid regular expression: /[\\c]/: Invalid class escape (1:1)", { ecmaVersion: 2015 }) +test("/[\\0]/", {}, { ecmaVersion: 5 }) +test("/[\\0]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\0]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[\\0]/u", {}, { ecmaVersion: 2015 }) +test("/[\\x]/", {}, { ecmaVersion: 5 }) +test("/[\\x]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\x]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/[\\x]/u", "Invalid regular expression: /[\\x]/: Invalid escape (1:1)", { ecmaVersion: 2015 }) +test("/[\\xz]/", {}, { ecmaVersion: 5 }) +test("/[\\xz]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\xz]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/[\\xz]/u", "Invalid regular expression: /[\\xz]/: Invalid escape (1:1)", { ecmaVersion: 2015 }) +test("/[\\x1]/", {}, { ecmaVersion: 5 }) +test("/[\\x1]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\x1]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/[\\x1]/u", "Invalid regular expression: /[\\x1]/: Invalid escape (1:1)", { ecmaVersion: 2015 }) +test("/[\\x12]/", {}, { ecmaVersion: 5 }) +test("/[\\x12]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\x12]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[\\x12]/u", {}, { ecmaVersion: 2015 }) +test("/[\\x123]/", {}, { ecmaVersion: 5 }) +test("/[\\x123]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\x123]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[\\x123]/u", {}, { ecmaVersion: 2015 }) +test("/[\\u]/", {}, { ecmaVersion: 5 }) +test("/[\\u]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\u]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/[\\u]/u", "Invalid regular expression: /[\\u]/: Invalid unicode escape (1:1)", { ecmaVersion: 2015 }) +test("/[\\u1]/", {}, { ecmaVersion: 5 }) +test("/[\\u1]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\u1]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/[\\u1]/u", "Invalid regular expression: /[\\u1]/: Invalid unicode escape (1:1)", { ecmaVersion: 2015 }) +test("/[\\u12]/", {}, { ecmaVersion: 5 }) +test("/[\\u12]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\u12]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/[\\u12]/u", "Invalid regular expression: /[\\u12]/: Invalid unicode escape (1:1)", { ecmaVersion: 2015 }) +test("/[\\u123]/", {}, { ecmaVersion: 5 }) +test("/[\\u123]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\u123]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/[\\u123]/u", "Invalid regular expression: /[\\u123]/: Invalid unicode escape (1:1)", { ecmaVersion: 2015 }) +test("/[\\u1234]/", {}, { ecmaVersion: 5 }) +test("/[\\u1234]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\u1234]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[\\u1234]/u", {}, { ecmaVersion: 2015 }) +test("/[\\u12345]/", {}, { ecmaVersion: 5 }) +test("/[\\u12345]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\u12345]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[\\u12345]/u", {}, { ecmaVersion: 2015 }) +test("/[\\u{]/", {}, { ecmaVersion: 5 }) +test("/[\\u{]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\u{]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/[\\u{]/u", "Invalid regular expression: /[\\u{]/: Invalid unicode escape (1:1)", { ecmaVersion: 2015 }) +test("/[\\u{z]/", {}, { ecmaVersion: 5 }) +test("/[\\u{z]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\u{z]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/[\\u{z]/u", "Invalid regular expression: /[\\u{z]/: Invalid unicode escape (1:1)", { ecmaVersion: 2015 }) +test("/[\\u{a}]/", {}, { ecmaVersion: 5 }) +test("/[\\u{a}]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\u{a}]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[\\u{a}]/u", {}, { ecmaVersion: 2015 }) +test("/[\\u{20]/", {}, { ecmaVersion: 5 }) +test("/[\\u{20]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\u{20]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/[\\u{20]/u", "Invalid regular expression: /[\\u{20]/: Invalid unicode escape (1:1)", { ecmaVersion: 2015 }) +test("/[\\u{20}]/", {}, { ecmaVersion: 5 }) +test("/[\\u{20}]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\u{20}]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[\\u{20}]/u", {}, { ecmaVersion: 2015 }) +test("/[\\u{10FFFF}]/", {}, { ecmaVersion: 5 }) +test("/[\\u{10FFFF}]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\u{10FFFF}]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[\\u{10FFFF}]/u", {}, { ecmaVersion: 2015 }) +test("/[\\u{110000}]/", {}, { ecmaVersion: 5 }) +test("/[\\u{110000}]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\u{110000}]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/[\\u{110000}]/u", "Invalid regular expression: /[\\u{110000}]/: Invalid unicode escape (1:1)", { ecmaVersion: 2015 }) +test("/[\\u{00000001}]/", {}, { ecmaVersion: 5 }) +test("/[\\u{00000001}]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\u{00000001}]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[\\u{00000001}]/u", {}, { ecmaVersion: 2015 }) +test("/[\\77]/", {}, { ecmaVersion: 5 }) +test("/[\\77]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\77]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/[\\77]/u", "Invalid regular expression: /[\\77]/: Invalid class escape (1:1)", { ecmaVersion: 2015 }) +test("/[\\377]/", {}, { ecmaVersion: 5 }) +test("/[\\377]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\377]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/[\\377]/u", "Invalid regular expression: /[\\377]/: Invalid class escape (1:1)", { ecmaVersion: 2015 }) +test("/[\\400]/", {}, { ecmaVersion: 5 }) +test("/[\\400]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\400]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/[\\400]/u", "Invalid regular expression: /[\\400]/: Invalid class escape (1:1)", { ecmaVersion: 2015 }) +test("/[\\^]/", {}, { ecmaVersion: 5 }) +test("/[\\^]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\^]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[\\^]/u", {}, { ecmaVersion: 2015 }) +test("/[\\$]/", {}, { ecmaVersion: 5 }) +test("/[\\$]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\$]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[\\$]/u", {}, { ecmaVersion: 2015 }) +test("/[\\.]/", {}, { ecmaVersion: 5 }) +test("/[\\.]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\.]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[\\.]/u", {}, { ecmaVersion: 2015 }) +test("/[\\+]/", {}, { ecmaVersion: 5 }) +test("/[\\+]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\+]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[\\+]/u", {}, { ecmaVersion: 2015 }) +test("/[\\?]/", {}, { ecmaVersion: 5 }) +test("/[\\?]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\?]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[\\?]/u", {}, { ecmaVersion: 2015 }) +test("/[\\(]/", {}, { ecmaVersion: 5 }) +test("/[\\(]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\(]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[\\(]/u", {}, { ecmaVersion: 2015 }) +test("/[\\)]/", {}, { ecmaVersion: 5 }) +test("/[\\)]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\)]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[\\)]/u", {}, { ecmaVersion: 2015 }) +test("/[\\[]/", {}, { ecmaVersion: 5 }) +test("/[\\[]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\[]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[\\[]/u", {}, { ecmaVersion: 2015 }) +test("/[\\]]/", {}, { ecmaVersion: 5 }) +test("/[\\]]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\]]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[\\]]/u", {}, { ecmaVersion: 2015 }) +test("/[\\{]/", {}, { ecmaVersion: 5 }) +test("/[\\{]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\{]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[\\{]/u", {}, { ecmaVersion: 2015 }) +test("/[\\}]/", {}, { ecmaVersion: 5 }) +test("/[\\}]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\}]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[\\}]/u", {}, { ecmaVersion: 2015 }) +test("/[\\|]/", {}, { ecmaVersion: 5 }) +test("/[\\|]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\|]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[\\|]/u", {}, { ecmaVersion: 2015 }) +test("/[\\/]/", {}, { ecmaVersion: 5 }) +test("/[\\/]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\/]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[\\/]/u", {}, { ecmaVersion: 2015 }) +test("/[\\a]/", {}, { ecmaVersion: 5 }) +test("/[\\a]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\a]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/[\\a]/u", "Invalid regular expression: /[\\a]/: Invalid escape (1:1)", { ecmaVersion: 2015 }) +test("/[\\s]/", {}, { ecmaVersion: 5 }) +test("/[\\s]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\s]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[\\s]/u", {}, { ecmaVersion: 2015 }) +test("/[\\d-\\uFFFF]/", {}, { ecmaVersion: 5 }) +test("/[\\d-\\uFFFF]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\d-\\uFFFF]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/[\\d-\\uFFFF]/u", "Invalid regular expression: /[\\d-\\uFFFF]/: Invalid character class (1:1)", { ecmaVersion: 2015 }) +test("/[\\D-\\uFFFF]/", {}, { ecmaVersion: 5 }) +test("/[\\D-\\uFFFF]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\D-\\uFFFF]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/[\\D-\\uFFFF]/u", "Invalid regular expression: /[\\D-\\uFFFF]/: Invalid character class (1:1)", { ecmaVersion: 2015 }) +test("/[\\s-\\uFFFF]/", {}, { ecmaVersion: 5 }) +test("/[\\s-\\uFFFF]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\s-\\uFFFF]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/[\\s-\\uFFFF]/u", "Invalid regular expression: /[\\s-\\uFFFF]/: Invalid character class (1:1)", { ecmaVersion: 2015 }) +test("/[\\S-\\uFFFF]/", {}, { ecmaVersion: 5 }) +test("/[\\S-\\uFFFF]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\S-\\uFFFF]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/[\\S-\\uFFFF]/u", "Invalid regular expression: /[\\S-\\uFFFF]/: Invalid character class (1:1)", { ecmaVersion: 2015 }) +test("/[\\w-\\uFFFF]/", {}, { ecmaVersion: 5 }) +test("/[\\w-\\uFFFF]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\w-\\uFFFF]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/[\\w-\\uFFFF]/u", "Invalid regular expression: /[\\w-\\uFFFF]/: Invalid character class (1:1)", { ecmaVersion: 2015 }) +test("/[\\W-\\uFFFF]/", {}, { ecmaVersion: 5 }) +test("/[\\W-\\uFFFF]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\W-\\uFFFF]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/[\\W-\\uFFFF]/u", "Invalid regular expression: /[\\W-\\uFFFF]/: Invalid character class (1:1)", { ecmaVersion: 2015 }) +test("/[\\u0000-\\d]/", {}, { ecmaVersion: 5 }) +test("/[\\u0000-\\d]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\u0000-\\d]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/[\\u0000-\\d]/u", "Invalid regular expression: /[\\u0000-\\d]/: Invalid character class (1:1)", { ecmaVersion: 2015 }) +test("/[\\u0000-\\D]/", {}, { ecmaVersion: 5 }) +test("/[\\u0000-\\D]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\u0000-\\D]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/[\\u0000-\\D]/u", "Invalid regular expression: /[\\u0000-\\D]/: Invalid character class (1:1)", { ecmaVersion: 2015 }) +test("/[\\u0000-\\s]/", {}, { ecmaVersion: 5 }) +test("/[\\u0000-\\s]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\u0000-\\s]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/[\\u0000-\\s]/u", "Invalid regular expression: /[\\u0000-\\s]/: Invalid character class (1:1)", { ecmaVersion: 2015 }) +test("/[\\u0000-\\S]/", {}, { ecmaVersion: 5 }) +test("/[\\u0000-\\S]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\u0000-\\S]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/[\\u0000-\\S]/u", "Invalid regular expression: /[\\u0000-\\S]/: Invalid character class (1:1)", { ecmaVersion: 2015 }) +test("/[\\u0000-\\w]/", {}, { ecmaVersion: 5 }) +test("/[\\u0000-\\w]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\u0000-\\w]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/[\\u0000-\\w]/u", "Invalid regular expression: /[\\u0000-\\w]/: Invalid character class (1:1)", { ecmaVersion: 2015 }) +test("/[\\u0000-\\W]/", {}, { ecmaVersion: 5 }) +test("/[\\u0000-\\W]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\u0000-\\W]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/[\\u0000-\\W]/u", "Invalid regular expression: /[\\u0000-\\W]/: Invalid character class (1:1)", { ecmaVersion: 2015 }) +test("/[\\u0000-\\u0001]/", {}, { ecmaVersion: 5 }) +test("/[\\u0000-\\u0001]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\u0000-\\u0001]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[\\u0000-\\u0001]/u", {}, { ecmaVersion: 2015 }) +testFail("/[\\u0001-\\u0000]/", "Invalid regular expression: /[\\u0001-\\u0000]/: Range out of order in character class (1:1)", { ecmaVersion: 5 }) +testFail("/[\\u0001-\\u0000]/", "Invalid regular expression: /[\\u0001-\\u0000]/: Range out of order in character class (1:1)", { ecmaVersion: 2015 }) +testFail("/[\\u0001-\\u0000]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/[\\u0001-\\u0000]/u", "Invalid regular expression: /[\\u0001-\\u0000]/: Range out of order in character class (1:1)", { ecmaVersion: 2015 }) +testFail("/[\\u{1}-\\u{2}]/", "Invalid regular expression: /[\\u{1}-\\u{2}]/: Range out of order in character class (1:1)", { ecmaVersion: 5 }) +testFail("/[\\u{1}-\\u{2}]/", "Invalid regular expression: /[\\u{1}-\\u{2}]/: Range out of order in character class (1:1)", { ecmaVersion: 2015 }) +testFail("/[\\u{1}-\\u{2}]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[\\u{1}-\\u{2}]/u", {}, { ecmaVersion: 2015 }) +testFail("/[\\u{2}-\\u{1}]/", "Invalid regular expression: /[\\u{2}-\\u{1}]/: Range out of order in character class (1:1)", { ecmaVersion: 5 }) +testFail("/[\\u{2}-\\u{1}]/", "Invalid regular expression: /[\\u{2}-\\u{1}]/: Range out of order in character class (1:1)", { ecmaVersion: 2015 }) +testFail("/[\\u{2}-\\u{1}]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/[\\u{2}-\\u{1}]/u", "Invalid regular expression: /[\\u{2}-\\u{1}]/: Range out of order in character class (1:1)", { ecmaVersion: 2015 }) +test("/[\\u{2-\\u{1}]/", {}, { ecmaVersion: 5 }) +test("/[\\u{2-\\u{1}]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\u{2-\\u{1}]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/[\\u{2-\\u{1}]/u", "Invalid regular expression: /[\\u{2-\\u{1}]/: Invalid unicode escape (1:1)", { ecmaVersion: 2015 }) +test("/[\\a-\\z]/", {}, { ecmaVersion: 5 }) +test("/[\\a-\\z]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\a-\\z]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/[\\a-\\z]/u", "Invalid regular expression: /[\\a-\\z]/: Invalid escape (1:1)", { ecmaVersion: 2015 }) +testFail("/[\\z-\\a]/", "Invalid regular expression: /[\\z-\\a]/: Range out of order in character class (1:1)", { ecmaVersion: 5 }) +testFail("/[\\z-\\a]/", "Invalid regular expression: /[\\z-\\a]/: Range out of order in character class (1:1)", { ecmaVersion: 2015 }) +testFail("/[\\z-\\a]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/[\\z-\\a]/u", "Invalid regular expression: /[\\z-\\a]/: Invalid escape (1:1)", { ecmaVersion: 2015 }) +test("/[0-9--/]/", {}, { ecmaVersion: 5 }) +test("/[0-9--/]/", {}, { ecmaVersion: 2015 }) +testFail("/[0-9--/]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[0-9--/]/u", {}, { ecmaVersion: 2015 }) +testFail("/[0-9--+]/", "Invalid regular expression: /[0-9--+]/: Range out of order in character class (1:1)", { ecmaVersion: 5 }) +testFail("/[0-9--+]/", "Invalid regular expression: /[0-9--+]/: Range out of order in character class (1:1)", { ecmaVersion: 2015 }) +testFail("/[0-9--+]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/[0-9--+]/u", "Invalid regular expression: /[0-9--+]/: Range out of order in character class (1:1)", { ecmaVersion: 2015 }) +testFail("/[\\c-a]/", "Invalid regular expression: /[\\c-a]/: Range out of order in character class (1:1)", { ecmaVersion: 5 }) +testFail("/[\\c-a]/", "Invalid regular expression: /[\\c-a]/: Range out of order in character class (1:1)", { ecmaVersion: 2015 }) +testFail("/[\\c-a]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/[\\c-a]/u", "Invalid regular expression: /[\\c-a]/: Invalid class escape (1:1)", { ecmaVersion: 2015 }) +test("/[\\c0-]/", {}, { ecmaVersion: 5 }) +test("/[\\c0-]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\c0-]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/[\\c0-]/u", "Invalid regular expression: /[\\c0-]/: Invalid class escape (1:1)", { ecmaVersion: 2015 }) +test("/[\\c_]/", {}, { ecmaVersion: 5 }) +test("/[\\c_]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\c_]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/[\\c_]/u", "Invalid regular expression: /[\\c_]/: Invalid class escape (1:1)", { ecmaVersion: 2015 }) +testFail("/[🌷-🌸]/", "Invalid regular expression: /[🌷-🌸]/: Range out of order in character class (1:1)", { ecmaVersion: 5 }) +testFail("/[🌷-🌸]/", "Invalid regular expression: /[🌷-🌸]/: Range out of order in character class (1:1)", { ecmaVersion: 2015 }) +testFail("/[🌷-🌸]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[🌷-🌸]/u", {}, { ecmaVersion: 2015 }) +testFail("/[🌸-🌷]/", "Invalid regular expression: /[🌸-🌷]/: Range out of order in character class (1:1)", { ecmaVersion: 5 }) +testFail("/[🌸-🌷]/", "Invalid regular expression: /[🌸-🌷]/: Range out of order in character class (1:1)", { ecmaVersion: 2015 }) +testFail("/[🌸-🌷]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/[🌸-🌷]/u", "Invalid regular expression: /[🌸-🌷]/: Range out of order in character class (1:1)", { ecmaVersion: 2015 }) +testFail("/[\\uD834\\uDF06-\\uD834\\uDF08a-z]/", "Invalid regular expression: /[\\uD834\\uDF06-\\uD834\\uDF08a-z]/: Range out of order in character class (1:1)", { ecmaVersion: 5 }) +testFail("/[\\uD834\\uDF06-\\uD834\\uDF08a-z]/", "Invalid regular expression: /[\\uD834\\uDF06-\\uD834\\uDF08a-z]/: Range out of order in character class (1:1)", { ecmaVersion: 2015 }) +testFail("/[\\uD834\\uDF06-\\uD834\\uDF08a-z]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/[\\uD834\\uDF06-\\uD834\\uDF08a-z]/u", {}, { ecmaVersion: 2015 }) +test("/^[0-9]*$/", {}, { ecmaVersion: 5 }) +test("/^[0-9]*$/", {}, { ecmaVersion: 2015 }) +testFail("/^[0-9]*$/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/^[0-9]*$/u", {}, { ecmaVersion: 2015 }) +test("/^[0-9]+$/", {}, { ecmaVersion: 5 }) +test("/^[0-9]+$/", {}, { ecmaVersion: 2015 }) +testFail("/^[0-9]+$/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/^[0-9]+$/u", {}, { ecmaVersion: 2015 }) +test("/^[a-zA-Z]*$/", {}, { ecmaVersion: 5 }) +test("/^[a-zA-Z]*$/", {}, { ecmaVersion: 2015 }) +testFail("/^[a-zA-Z]*$/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/^[a-zA-Z]*$/u", {}, { ecmaVersion: 2015 }) +test("/^[a-zA-Z]+$/", {}, { ecmaVersion: 5 }) +test("/^[a-zA-Z]+$/", {}, { ecmaVersion: 2015 }) +testFail("/^[a-zA-Z]+$/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/^[a-zA-Z]+$/u", {}, { ecmaVersion: 2015 }) +test("/^[0-9a-zA-Z]*$/", {}, { ecmaVersion: 5 }) +test("/^[0-9a-zA-Z]*$/", {}, { ecmaVersion: 2015 }) +testFail("/^[0-9a-zA-Z]*$/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/^[0-9a-zA-Z]*$/u", {}, { ecmaVersion: 2015 }) +test("/^[a-zA-Z0-9!-/:-@\\[-`{-~]*$/", {}, { ecmaVersion: 5 }) +test("/^[a-zA-Z0-9!-/:-@\\[-`{-~]*$/", {}, { ecmaVersion: 2015 }) +testFail("/^[a-zA-Z0-9!-/:-@\\[-`{-~]*$/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/^[a-zA-Z0-9!-/:-@\\[-`{-~]*$/u", {}, { ecmaVersion: 2015 }) +test("/^([a-zA-Z0-9]{8,})$/", {}, { ecmaVersion: 5 }) +test("/^([a-zA-Z0-9]{8,})$/", {}, { ecmaVersion: 2015 }) +testFail("/^([a-zA-Z0-9]{8,})$/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/^([a-zA-Z0-9]{8,})$/u", {}, { ecmaVersion: 2015 }) +test("/^([a-zA-Z0-9]{6,8})$/", {}, { ecmaVersion: 5 }) +test("/^([a-zA-Z0-9]{6,8})$/", {}, { ecmaVersion: 2015 }) +testFail("/^([a-zA-Z0-9]{6,8})$/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/^([a-zA-Z0-9]{6,8})$/u", {}, { ecmaVersion: 2015 }) +test("/^([0-9]{0,8})$/", {}, { ecmaVersion: 5 }) +test("/^([0-9]{0,8})$/", {}, { ecmaVersion: 2015 }) +testFail("/^([0-9]{0,8})$/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/^([0-9]{0,8})$/u", {}, { ecmaVersion: 2015 }) +test("/^[0-9]{8}$/", {}, { ecmaVersion: 5 }) +test("/^[0-9]{8}$/", {}, { ecmaVersion: 2015 }) +testFail("/^[0-9]{8}$/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/^[0-9]{8}$/u", {}, { ecmaVersion: 2015 }) +test("/^https?:\\/\\//", {}, { ecmaVersion: 5 }) +test("/^https?:\\/\\//", {}, { ecmaVersion: 2015 }) +testFail("/^https?:\\/\\//u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/^https?:\\/\\//u", {}, { ecmaVersion: 2015 }) +test("/^\\d{3}-\\d{4}$/", {}, { ecmaVersion: 5 }) +test("/^\\d{3}-\\d{4}$/", {}, { ecmaVersion: 2015 }) +testFail("/^\\d{3}-\\d{4}$/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/^\\d{3}-\\d{4}$/u", {}, { ecmaVersion: 2015 }) +test("/^\\d{1,3}(.\\d{1,3}){3}$/", {}, { ecmaVersion: 5 }) +test("/^\\d{1,3}(.\\d{1,3}){3}$/", {}, { ecmaVersion: 2015 }) +testFail("/^\\d{1,3}(.\\d{1,3}){3}$/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/^\\d{1,3}(.\\d{1,3}){3}$/u", {}, { ecmaVersion: 2015 }) +test("/^([1-9][0-9]*|0)(\\.[0-9]+)?$/", {}, { ecmaVersion: 5 }) +test("/^([1-9][0-9]*|0)(\\.[0-9]+)?$/", {}, { ecmaVersion: 2015 }) +testFail("/^([1-9][0-9]*|0)(\\.[0-9]+)?$/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/^([1-9][0-9]*|0)(\\.[0-9]+)?$/u", {}, { ecmaVersion: 2015 }) +test("/^-?([1-9][0-9]*|0)(\\.[0-9]+)?$/", {}, { ecmaVersion: 5 }) +test("/^-?([1-9][0-9]*|0)(\\.[0-9]+)?$/", {}, { ecmaVersion: 2015 }) +testFail("/^-?([1-9][0-9]*|0)(\\.[0-9]+)?$/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/^-?([1-9][0-9]*|0)(\\.[0-9]+)?$/u", {}, { ecmaVersion: 2015 }) +test("/^[ぁ-んー]*$/", {}, { ecmaVersion: 5 }) +test("/^[ぁ-んー]*$/", {}, { ecmaVersion: 2015 }) +testFail("/^[ぁ-んー]*$/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/^[ぁ-んー]*$/u", {}, { ecmaVersion: 2015 }) +test("/^[ァ-ンヴー]*$/", {}, { ecmaVersion: 5 }) +test("/^[ァ-ンヴー]*$/", {}, { ecmaVersion: 2015 }) +testFail("/^[ァ-ンヴー]*$/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/^[ァ-ンヴー]*$/u", {}, { ecmaVersion: 2015 }) +test("/^[ァ-ン゙゚\\-]*$/", {}, { ecmaVersion: 5 }) +test("/^[ァ-ン゙゚\\-]*$/", {}, { ecmaVersion: 2015 }) +testFail("/^[ァ-ン゙゚\\-]*$/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/^[ァ-ン゙゚\\-]*$/u", {}, { ecmaVersion: 2015 }) +test("/^[^\\x20-\\x7e]*$/", {}, { ecmaVersion: 5 }) +test("/^[^\\x20-\\x7e]*$/", {}, { ecmaVersion: 2015 }) +testFail("/^[^\\x20-\\x7e]*$/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/^[^\\x20-\\x7e]*$/u", {}, { ecmaVersion: 2015 }) +test("/^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9-]+(?:\\.[a-zA-Z0-9-]+)*$/", {}, { ecmaVersion: 5 }) +test("/^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9-]+(?:\\.[a-zA-Z0-9-]+)*$/", {}, { ecmaVersion: 2015 }) +testFail("/^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9-]+(?:\\.[a-zA-Z0-9-]+)*$/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9-]+(?:\\.[a-zA-Z0-9-]+)*$/u", {}, { ecmaVersion: 2015 }) +test("/^((4\\d{3})|(5[1-5]\\d{2})|(6011))([- ])?\\d{4}([- ])?\\d{4}([- ])?\\d{4}|3[4,7]\\d{13}$/", {}, { ecmaVersion: 5 }) +test("/^((4\\d{3})|(5[1-5]\\d{2})|(6011))([- ])?\\d{4}([- ])?\\d{4}([- ])?\\d{4}|3[4,7]\\d{13}$/", {}, { ecmaVersion: 2015 }) +testFail("/^((4\\d{3})|(5[1-5]\\d{2})|(6011))([- ])?\\d{4}([- ])?\\d{4}([- ])?\\d{4}|3[4,7]\\d{13}$/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/^((4\\d{3})|(5[1-5]\\d{2})|(6011))([- ])?\\d{4}([- ])?\\d{4}([- ])?\\d{4}|3[4,7]\\d{13}$/u", {}, { ecmaVersion: 2015 }) +test("/^\\s*|\\s*$/", {}, { ecmaVersion: 5 }) +test("/^\\s*|\\s*$/", {}, { ecmaVersion: 2015 }) +testFail("/^\\s*|\\s*$/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +test("/^\\s*|\\s*$/u", {}, { ecmaVersion: 2015 }) +test("/[\\d][\\12-\\14]{1,}[^\\d]/", {}, { ecmaVersion: 5 }) +test("/[\\d][\\12-\\14]{1,}[^\\d]/", {}, { ecmaVersion: 2015 }) +testFail("/[\\d][\\12-\\14]{1,}[^\\d]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) +testFail("/[\\d][\\12-\\14]{1,}[^\\d]/u", "Invalid regular expression: /[\\d][\\12-\\14]{1,}[^\\d]/: Invalid class escape (1:1)", { ecmaVersion: 2015 }) + +/* +// This is test case generator. +// The tests check whether those results are same as V8 native. + +function getErrorMessage(pattern, flags) { + try { + new RegExp(pattern, flags) + return undefined + } catch (err) { + return err.message + } +} + +const patterns = [ + ["foo"], + ["foo|bar"], + ["||||"], + ["^|$|\\b|\\B"], + ["("], + ["(?"], + ["(?="], + ["(?=)"], + ["(?=foo"], + ["(?=foo)"], + ["(?!"], + ["(?!)"], + ["(?!foo"], + ["(?!foo)"], + ["(?=a)*"], + ["(?=a)+"], + ["(?=a)?"], + ["(?=a){"], + ["(?=a){}"], + ["(?=a){a}"], + ["(?=a){1}"], + ["(?=a){1,}"], + ["(?=a){1,2}"], + ["a*"], + ["a+"], + ["a?"], + ["a{"], + ["a{}"], + ["a{a}"], + ["a{1}"], + ["a{1"], + ["a{1,}"], + ["a{1,"], + ["a{1,2}"], + ["a{1,2"], + ["a{2,1}"], + ["a{2,1"], + ["(a{2,1}"], + ["a*?"], + ["a+?"], + ["a??"], + ["a{?"], + ["a{}?"], + ["a{a}?"], + ["a{1}?"], + ["a{1?"], + ["a{1,}?"], + ["a{1,?"], + ["a{1,2}?"], + ["a{1,2?"], + ["a{2,1}?"], + ["a{2,1?"], + ["👍🚀❇️"], + ["^"], + ["$"], + ["."], + ["(*)"], + ["+"], + ["?"], + ["("], + [")"], + ["[", "Unterminated regular expression", "Unterminated regular expression"], + ["]"], + ["{"], + ["}"], + ["|"], + ["^*"], + ["$*"], + ["${1,2"], + ["${1,2}"], + ["${2,1}"], + ["\\1"], + ["(a)\\1"], + ["\\1(a)"], + ["\\2(a)("], + ["(?:a)\\1"], + ["(a)\\2"], + ["(?:a)\\2"], + ["(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)\\10"], + ["(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)\\11"], + ["(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)\\11"], + ["(?"], + ["(?a"], + ["(?a)"], + ["(?:"], + ["(?:a"], + ["(?:a)"], + ["(:a"], + ["\\d"], + ["\\D"], + ["\\s"], + ["\\S"], + ["\\w"], + ["\\W"], + ["\\f"], + ["\\n"], + ["\\r"], + ["\\t"], + ["\\v"], + ["\\cA"], + ["\\cz"], + ["\\c1"], + ["\\c"], + ["\\0"], + ["\\u"], + ["\\u1"], + ["\\u12"], + ["\\u123"], + ["\\u1234"], + ["\\u12345"], + ["\\u{"], + ["\\u{z"], + ["\\u{a}"], + ["\\u{20"], + ["\\u{20}"], + ["\\u{10FFFF}"], + ["\\u{110000}"], + ["\\u{00000001}"], + ["\\377"], + ["\\400"], + ["\\^"], + ["\\$"], + ["\\."], + ["\\+"], + ["\\?"], + ["\\("], + ["\\)"], + ["\\["], + ["\\]"], + ["\\{"], + ["\\}"], + ["\\|"], + ["\\/"], + ["\\a"], + ["\\s"], + ["[]"], + ["[^-a-b-]"], + ["[-]"], + ["[a]"], + ["[--]"], + ["[-a]"], + ["[-a-]"], + ["[a-]"], + ["[a-b]"], + ["[-a-b-]"], + ["[---]"], + ["[b-a]"], + ["[a-b--/]"], + ["[a-b--+]"], + ["[\\b-\\n]"], + ["[b\\-a]"], + ["[\\d]"], + ["[\\D]"], + ["[\\s]"], + ["[\\S]"], + ["[\\w]"], + ["[\\W]"], + ["[\\d]"], + ["[\\D]"], + ["[\\s]"], + ["[\\S]"], + ["[\\w]"], + ["[\\W]"], + ["[\\f]"], + ["[\\n]"], + ["[\\r]"], + ["[\\t]"], + ["[\\v]"], + ["[\\cA]"], + ["[\\cz]"], + ["[\\c1]"], + ["[\\c]"], + ["[\\0]"], + ["[\\x]"], + ["[\\xz]"], + ["[\\x1]"], + ["[\\x12]"], + ["[\\x123]"], + ["[\\u]"], + ["[\\u1]"], + ["[\\u12]"], + ["[\\u123]"], + ["[\\u1234]"], + ["[\\u12345]"], + ["[\\u{]"], + ["[\\u{z]"], + ["[\\u{a}]"], + ["[\\u{20]"], + ["[\\u{20}]"], + ["[\\u{10FFFF}]"], + ["[\\u{110000}]"], + ["[\\u{00000001}]"], + ["[\\77]"], + ["[\\377]"], + ["[\\400]"], + ["[\\^]"], + ["[\\$]"], + ["[\\.]"], + ["[\\+]"], + ["[\\?]"], + ["[\\(]"], + ["[\\)]"], + ["[\\[]"], + ["[\\]]"], + ["[\\{]"], + ["[\\}]"], + ["[\\|]"], + ["[\\/]"], + ["[\\a]"], + ["[\\s]"], + ["[\\d-\\uFFFF]"], + ["[\\D-\\uFFFF]"], + ["[\\s-\\uFFFF]"], + ["[\\S-\\uFFFF]"], + ["[\\w-\\uFFFF]"], + ["[\\W-\\uFFFF]"], + ["[\\u0000-\\d]"], + ["[\\u0000-\\D]"], + ["[\\u0000-\\s]"], + ["[\\u0000-\\S]"], + ["[\\u0000-\\w]"], + ["[\\u0000-\\W]"], + ["[\\u0000-\\u0001]"], + ["[\\u0001-\\u0000]"], + ["[\\u{1}-\\u{2}]"], + ["[\\u{2}-\\u{1}]"], + ["[\\u{2-\\u{1}]"], + ["[\\a-\\z]"], + ["[\\z-\\a]"], + ["[0-9--/]"], + ["[0-9--+]"], + ["[\\c-a]"], + ["[\\c0-\u001F]"], + ["[\\c_]"], + ["[🌷-🌸]"], + ["[🌸-🌷]"], + ["[\\uD834\\uDF06-\\uD834\\uDF08a-z]"], + ["^[0-9]*$"], + ["^[0-9]+$"], + ["^[a-zA-Z]*$"], + ["^[a-zA-Z]+$"], + ["^[0-9a-zA-Z]*$"], + ["^[a-zA-Z0-9!-/:-@\\[-`{-~]*$"], + ["^([a-zA-Z0-9]{8,})$"], + ["^([a-zA-Z0-9]{6,8})$"], + ["^([0-9]{0,8})$"], + ["^[0-9]{8}$"], + ["^https?:\\/\\/"], + ["^\\d{3}-\\d{4}$"], + ["^\\d{1,3}(\.\\d{1,3}){3}$"], + ["^([1-9][0-9]*|0)(\\.[0-9]+)?$"], + ["^-?([1-9][0-9]*|0)(\\.[0-9]+)?$"], + ["^[ぁ-んー]*$"], + ["^[ァ-ンヴー]*$"], + ["^[ァ-ン゙゚\\-]*$"], + ["^[^\\x20-\\x7e]*$"], + ["^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9-]+(?:\\.[a-zA-Z0-9-]+)*$"], + ["^((4\\d{3})|(5[1-5]\\d{2})|(6011))([- ])?\\d{4}([- ])?\\d{4}([- ])?\\d{4}|3[4,7]\\d{13}$"], + ["^\\s*|\\s*$"], + ["[\\d][\\12-\\14]{1,}[^\\d]"] +] + +const tests = [] +for (const [pattern, message, messageU] of patterns) { + // Without u flag + let msg = message || getErrorMessage(pattern, "") + if (msg === undefined) { + tests.push(`test("/${pattern.replace(/\\/g, "\\\\")}/", {}, { ecmaVersion: 5 })`) + tests.push(`test("/${pattern.replace(/\\/g, "\\\\")}/", {}, { ecmaVersion: 2015 })`) + } else { + tests.push(`testFail("/${pattern.replace(/\\/g, "\\\\")}/", "${msg.replace(/\\/g, "\\\\")} (1:1)", { ecmaVersion: 5 })`) + tests.push(`testFail("/${pattern.replace(/\\/g, "\\\\")}/", "${msg.replace(/\\/g, "\\\\")} (1:1)", { ecmaVersion: 2015 })`) + } + + // With u flag + msg = messageU || getErrorMessage(pattern, "u") + tests.push(`testFail("/${pattern.replace(/\\/g, "\\\\")}/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 })`) + if (msg === undefined) { + tests.push(`test("/${pattern.replace(/\\/g, "\\\\")}/u", {}, { ecmaVersion: 2015 })`) + } else { + tests.push(`testFail("/${pattern.replace(/\\/g, "\\\\")}/u", "${msg.replace(/\\/g, "\\\\")} (1:1)", { ecmaVersion: 2015 })`) + } +} + +require("fs").writeFileSync("a.txt", tests.join("\n")) + +*/ From 9170ac7d411d1be1829c21c9087de1c7c5a37024 Mon Sep 17 00:00:00 2001 From: Toru Nagashima Date: Sun, 11 Feb 2018 15:42:45 +0900 Subject: [PATCH 02/18] refactor validator --- src/regexp.js | 223 +++++++++++++++++++++++++++----------------------- 1 file changed, 119 insertions(+), 104 deletions(-) diff --git a/src/regexp.js b/src/regexp.js index 01f7a2f57..29cc0d43a 100644 --- a/src/regexp.js +++ b/src/regexp.js @@ -52,73 +52,6 @@ const LEFT_CURLY_BRACKET = 0x7B // { const VERTICAL_LINE = 0x7C // | const RIGHT_CURLY_BRACKET = 0x7D // } -function isSyntaxCharacter(ch) { - return ( - ch === CIRCUMFLEX_ACCENT || - ch === DOLLAR_SIGN || - ch === REVERSE_SOLIDUS || - ch === FULL_STOP || - ch === ASTERISK || - ch === PLUS_SIGN || - ch === QUESTION_MARK || - ch === LEFT_PARENTHESIS || - ch === RIGHT_PARENTHESIS || - ch === LEFT_SQUARE_BRACKET || - ch === RIGHT_SQUARE_BRACKET || - ch === LEFT_CURLY_BRACKET || - ch === RIGHT_CURLY_BRACKET || - ch === VERTICAL_LINE - ) -} - -function isControlEscape(ch) { - return ( - ch === LATIN_SMALL_LETTER_F || - ch === LATIN_SMALL_LETTER_N || - ch === LATIN_SMALL_LETTER_R || - ch === LATIN_SMALL_LETTER_T || - ch === LATIN_SMALL_LETTER_V - ) -} - -function isControlLetter(ch) { - return ( - (ch >= LATIN_CAPITAL_LETTER_A && ch <= LATIN_CAPITAL_LETTER_Z) || - (ch >= LATIN_SMALL_LETTER_A && ch <= LATIN_SMALL_LETTER_Z) - ) -} - -function isCharacterClassEscape(ch) { - return ( - ch === LATIN_SMALL_LETTER_D || - ch === LATIN_CAPITAL_LETTER_D || - ch === LATIN_SMALL_LETTER_S || - ch === LATIN_CAPITAL_LETTER_S || - ch === LATIN_SMALL_LETTER_W || - ch === LATIN_CAPITAL_LETTER_W - ) -} - -function isValidUnicode(ch) { - return ch >= 0 && ch <= 0x10FFFF -} - -function isDecimalDigit(ch) { - return ch >= DIGIT_ZERO && ch <= DIGIT_NINE -} - -function isHexDigit(ch) { - return ( - (ch >= DIGIT_ZERO && ch <= DIGIT_NINE) || - (ch >= LATIN_CAPITAL_LETTER_A && ch <= LATIN_CAPITAL_LETTER_F) || - (ch >= LATIN_SMALL_LETTER_A && ch <= LATIN_SMALL_LETTER_F) - ) -} - -function isOctalDigit(ch) { - return ch >= DIGIT_ZERO && ch <= DIGIT_SEVEN -} - export class RegExpValidator { /** * Initialize this validator. @@ -135,6 +68,10 @@ export class RegExpValidator { this.maxBackReference = 0 } + // --------------------------------------------------------------------------- + // Public + // --------------------------------------------------------------------------- + /** * Validate the flags part of a given RegExpLiteral. * @@ -189,6 +126,10 @@ export class RegExpValidator { } } + // --------------------------------------------------------------------------- + // Helpers + // --------------------------------------------------------------------------- + raise(message) { this.parser.raise(this.start, `Invalid regular expression: /${this.pattern}/: ${message}`) } @@ -233,13 +174,29 @@ export class RegExpValidator { } eat(ch) { - if (this.codePointAt(this.pos) === ch) { + if (this.current() === ch) { this.advance() return true } return false } + parseDecimalInt(start, end) { + return parseInt(this.pattern.slice(start, end), 10) + } + + parseHexInt(start, end) { + return parseInt(this.pattern.slice(start, end), 16) + } + + parseOctalInt(start, end) { + return parseInt(this.pattern.slice(start, end), 8) + } + + // --------------------------------------------------------------------------- + // Productions + // --------------------------------------------------------------------------- + // https://www.ecma-international.org/ecma-262/8.0/#prod-Disjunction disjunction(unicode) { this.alternative(unicode) @@ -351,11 +308,11 @@ export class RegExpValidator { if (this.eat(LEFT_CURLY_BRACKET)) { let i = this.pos, min = 0, max = -1 if (this.eatDecimalDigits()) { - min = this._parseDecimalInt(i, this.pos) + min = this.parseDecimalInt(i, this.pos) if (this.eat(COMMA)) { i = this.pos if (this.eatDecimalDigits()) { - max = this._parseDecimalInt(i, this.pos) + max = this.parseDecimalInt(i, this.pos) } } if (this.eat(RIGHT_CURLY_BRACKET)) { @@ -434,19 +391,37 @@ export class RegExpValidator { // https://www.ecma-international.org/ecma-262/8.0/#prod-SyntaxCharacter eatSyntaxCharacter() { - if (isSyntaxCharacter(this.current())) { + if (this._isSyntaxCharacter(this.current())) { this.advance() return true } return false } + _isSyntaxCharacter(ch) { + return ( + ch === CIRCUMFLEX_ACCENT || + ch === DOLLAR_SIGN || + ch === REVERSE_SOLIDUS || + ch === FULL_STOP || + ch === ASTERISK || + ch === PLUS_SIGN || + ch === QUESTION_MARK || + ch === LEFT_PARENTHESIS || + ch === RIGHT_PARENTHESIS || + ch === LEFT_SQUARE_BRACKET || + ch === RIGHT_SQUARE_BRACKET || + ch === LEFT_CURLY_BRACKET || + ch === RIGHT_CURLY_BRACKET || + ch === VERTICAL_LINE + ) + } // https://www.ecma-international.org/ecma-262/8.0/#prod-PatternCharacter // But eat eager. eatPatternCharacters() { const start = this.pos let ch = 0 - while ((ch = this.current()) !== -1 && !isSyntaxCharacter(ch)) { + while ((ch = this.current()) !== -1 && !this._isSyntaxCharacter(ch)) { this.advance() } return this.pos !== start @@ -478,7 +453,7 @@ export class RegExpValidator { eatAtomEscape(unicode) { const start = this.pos if (this.eatDecimalEscape()) { - const n = this._parseDecimalInt(start, this.pos) + const n = this.parseDecimalInt(start, this.pos) if (unicode) { // For SyntaxError in https://www.ecma-international.org/ecma-262/8.0/#sec-atomescape if (n > this.maxBackReference) { @@ -527,7 +502,7 @@ export class RegExpValidator { return false } _eatZero() { - if (this.current() === DIGIT_ZERO && !isDecimalDigit(this.lookahead())) { + if (this.current() === DIGIT_ZERO && !this._isDecimalDigit(this.lookahead())) { this.advance() return true } @@ -536,21 +511,36 @@ export class RegExpValidator { // https://www.ecma-international.org/ecma-262/8.0/#prod-ControlEscape eatControlEscape() { - if (isControlEscape(this.current())) { + if (this._isControlEscape(this.current())) { this.advance() return true } return false } + _isControlEscape(ch) { + return ( + ch === LATIN_SMALL_LETTER_F || + ch === LATIN_SMALL_LETTER_N || + ch === LATIN_SMALL_LETTER_R || + ch === LATIN_SMALL_LETTER_T || + ch === LATIN_SMALL_LETTER_V + ) + } // https://www.ecma-international.org/ecma-262/8.0/#prod-ControlLetter eatControlLetter() { - if (isControlLetter(this.current())) { + if (this._isControlLetter(this.current())) { this.advance() return true } return false } + _isControlLetter(ch) { + return ( + (ch >= LATIN_CAPITAL_LETTER_A && ch <= LATIN_CAPITAL_LETTER_Z) || + (ch >= LATIN_SMALL_LETTER_A && ch <= LATIN_SMALL_LETTER_Z) + ) + } // https://www.ecma-international.org/ecma-262/8.0/#prod-RegExpUnicodeEscapeSequence eatRegExpUnicodeEscapeSequence(unicode) { @@ -558,11 +548,11 @@ export class RegExpValidator { if (this.eat(LATIN_SMALL_LETTER_U)) { if (this._eatFixedHexDigits(4)) { - const code = this._parseHexInt(this.pos - 4, this.pos) + const code = this.parseHexInt(this.pos - 4, this.pos) if (unicode && code >= 0xD800 && code <= 0xDBFF) { const leadSurrogateEnd = this.pos if (this.eat(REVERSE_SOLIDUS) && this.eat(LATIN_SMALL_LETTER_U) && this._eatFixedHexDigits(4)) { - const codeT = this._parseHexInt(this.pos - 4, this.pos) + const codeT = this.parseHexInt(this.pos - 4, this.pos) if (codeT >= 0xDC00 && codeT <= 0xDFFF) { return true } @@ -576,7 +566,7 @@ export class RegExpValidator { this.eat(LEFT_CURLY_BRACKET) && this.eatHexDigits() && this.eat(RIGHT_CURLY_BRACKET) && - isValidUnicode(this._parseHexInt(start + 2, this.pos - 1)) + this._isValidUnicode(this.parseHexInt(start + 2, this.pos - 1)) ) { return true } @@ -588,6 +578,9 @@ export class RegExpValidator { return false } + _isValidUnicode(ch) { + return ch >= 0 && ch <= 0x10FFFF + } // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-IdentityEscape eatIdentityEscape(unicode) { @@ -620,12 +613,22 @@ export class RegExpValidator { // https://www.ecma-international.org/ecma-262/8.0/#prod-CharacterClassEscape eatCharacterClassEscape() { - if (isCharacterClassEscape(this.current())) { + if (this._isCharacterClassEscape(this.current())) { this.advance() return true } return false } + _isCharacterClassEscape(ch) { + return ( + ch === LATIN_SMALL_LETTER_D || + ch === LATIN_CAPITAL_LETTER_D || + ch === LATIN_SMALL_LETTER_S || + ch === LATIN_CAPITAL_LETTER_S || + ch === LATIN_SMALL_LETTER_W || + ch === LATIN_CAPITAL_LETTER_W + ) + } // https://www.ecma-international.org/ecma-262/8.0/#prod-CharacterClass eatCharacterClass(unicode) { @@ -681,7 +684,7 @@ export class RegExpValidator { if (unicode) { // Make the same message as V8. const ch = this.current() - if (ch === LATIN_SMALL_LETTER_C || isOctalDigit(ch)) { + if (ch === LATIN_SMALL_LETTER_C || this._isOctalDigit(ch)) { this.raise("Invalid class escape") } this.raise("Invalid escape") @@ -722,7 +725,7 @@ export class RegExpValidator { // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-ClassControlLetter eatClassControlLetter() { const ch = this.current() - if (isDecimalDigit(ch) || ch === LOW_LINE) { + if (this._isDecimalDigit(ch) || ch === LOW_LINE) { this.advance() return true } @@ -749,22 +752,33 @@ export class RegExpValidator { // https://www.ecma-international.org/ecma-262/8.0/#prod-DecimalDigits eatDecimalDigits() { const start = this.pos - while (isDecimalDigit(this.current())) { + while (this._isDecimalDigit(this.current())) { this.advance() } return this.pos !== start } + _isDecimalDigit(ch) { + return ch >= DIGIT_ZERO && ch <= DIGIT_NINE + } // https://www.ecma-international.org/ecma-262/8.0/#prod-HexDigits eatHexDigits() { const start = this.pos - while (isHexDigit(this.current())) { + while (this._isHexDigit(this.current())) { this.advance() } return this.pos !== start } + _isHexDigit(ch) { + return ( + (ch >= DIGIT_ZERO && ch <= DIGIT_NINE) || + (ch >= LATIN_CAPITAL_LETTER_A && ch <= LATIN_CAPITAL_LETTER_F) || + (ch >= LATIN_SMALL_LETTER_A && ch <= LATIN_SMALL_LETTER_F) + ) + } // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-LegacyOctalEscapeSequence + // Allows only 0-377(octal) i.e. 0-255(decimal). eatLegacyOctalEscapeSequence() { const ch = this.current() if (this.eatOctalDigit()) { @@ -778,17 +792,23 @@ export class RegExpValidator { // https://www.ecma-international.org/ecma-262/8.0/#prod-OctalDigit eatOctalDigit() { - if (isOctalDigit(this.current())) { + if (this._isOctalDigit(this.current())) { this.advance() return true } return false } + _isOctalDigit(ch) { + return ch >= DIGIT_ZERO && ch <= DIGIT_SEVEN + } + // https://www.ecma-international.org/ecma-262/8.0/#prod-Hex4Digits + // https://www.ecma-international.org/ecma-262/8.0/#prod-HexDigit + // And HexDigit HexDigit in https://www.ecma-international.org/ecma-262/8.0/#prod-HexEscapeSequence _eatFixedHexDigits(length) { const start = this.pos for (let i = 0; i < length; ++i) { - if (!isHexDigit(this.current())) { + if (!this._isHexDigit(this.current())) { this.pos = start return false } @@ -797,18 +817,10 @@ export class RegExpValidator { return true } - _parseDecimalInt(start, end) { - return parseInt(this.pattern.slice(start, end), 10) - } - - _parseHexInt(start, end) { - return parseInt(this.pattern.slice(start, end), 16) - } - - _parseOctalInt(start, end) { - return parseInt(this.pattern.slice(start, end), 8) - } - + // https://www.ecma-international.org/ecma-262/8.0/#sec-classatom + // https://www.ecma-international.org/ecma-262/8.0/#sec-classatomnodash + // https://www.ecma-international.org/ecma-262/8.0/#sec-classescape + // Get the value of chracters to validate class ranges (e.g., [a-z]). _parseClassAtom(start, end, unicode, isRight) { const ch1 = this._getOneElementCharSetAt(start, unicode, isRight) if (ch1 === REVERSE_SOLIDUS) { @@ -844,25 +856,25 @@ export class RegExpValidator { return LATIN_SMALL_LETTER_C case LATIN_SMALL_LETTER_X: if (end - start === 4) { - return this._parseHexInt(start + 2, end) + return this.parseHexInt(start + 2, end) } return LATIN_SMALL_LETTER_X case LATIN_SMALL_LETTER_U: if (end - start >= 5 && this.codePointAt(start + 2) === LEFT_CURLY_BRACKET) { - return this._parseHexInt(start + 3, end - 1) + return this.parseHexInt(start + 3, end - 1) } if (end - start === 6) { - return this._parseHexInt(start + 2, end) + return this.parseHexInt(start + 2, end) } if (end - start === 12) { - const lead = this._parseHexInt(start + 2, start + 6) - const trail = this._parseHexInt(start + 8, start + 12) + const lead = this.parseHexInt(start + 2, start + 6) + const trail = this.parseHexInt(start + 8, start + 12) return (lead - 0xD800) * 0x400 + (trail - 0xDC00) + 0x10000 } return LATIN_SMALL_LETTER_U default: if (!unicode && ch2 >= DIGIT_ZERO && ch2 <= DIGIT_SEVEN) { - return this._parseOctalInt(start + 1, end) + return this.parseOctalInt(start + 1, end) } return ch2 } @@ -875,6 +887,9 @@ export class RegExpValidator { if (unicode || ch <= 0xFFFF) { return ch } + // This is a surrogate pair and no `u` flag, so returns a code point. + // If the right of `-` then returns the lead surrogate. + // If the left of `-` then returns the trail surrogate. return this.pattern.charCodeAt(isRight ? i : i + 1) } } From 7b0444010ecf07e331f57061e940ac2e28155bfa Mon Sep 17 00:00:00 2001 From: Toru Nagashima Date: Sun, 11 Feb 2018 16:08:36 +0900 Subject: [PATCH 03/18] =?UTF-8?q?rename=20this.pattern=20=E2=86=92=20this.?= =?UTF-8?q?source?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/regexp.js | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/regexp.js b/src/regexp.js index 29cc0d43a..2430dcace 100644 --- a/src/regexp.js +++ b/src/regexp.js @@ -61,7 +61,7 @@ export class RegExpValidator { this.parser = parser this.ecmaVersion = parser.options.ecmaVersion this.validFlags = `gim${this.ecmaVersion >= 6 ? "uy" : ""}${this.ecmaVersion >= 9 ? "s" : ""}` - this.pattern = "" + this.source = "" this.start = 0 this.pos = 0 this.numCapturingParens = 0 @@ -105,13 +105,13 @@ export class RegExpValidator { */ validatePattern(start, pattern, unicode) { this.start = start | 0 - this.pattern = pattern + "" + this.source = pattern + "" this.pos = 0 this.numCapturingParens = 0 this.maxBackReference = 0 this.disjunction(unicode) - if (this.pos !== this.pattern.length) { + if (this.pos !== this.source.length) { // Make the same messages as V8. if (this.eat(RIGHT_PARENTHESIS)) { this.raise("Unmatched ')'") @@ -131,12 +131,12 @@ export class RegExpValidator { // --------------------------------------------------------------------------- raise(message) { - this.parser.raise(this.start, `Invalid regular expression: /${this.pattern}/: ${message}`) + this.parser.raise(this.start, `Invalid regular expression: /${this.source}/: ${message}`) } // Node.js 0.12/0.10 don't support String.prototype.codePointAt(). codePointAt(i) { - const s = this.pattern + const s = this.source const l = s.length if (i >= l) { return -1 @@ -149,7 +149,7 @@ export class RegExpValidator { } nextIndex(i) { - const s = this.pattern + const s = this.source const l = s.length if (i >= l) { return l @@ -182,15 +182,15 @@ export class RegExpValidator { } parseDecimalInt(start, end) { - return parseInt(this.pattern.slice(start, end), 10) + return parseInt(this.source.slice(start, end), 10) } parseHexInt(start, end) { - return parseInt(this.pattern.slice(start, end), 16) + return parseInt(this.source.slice(start, end), 16) } parseOctalInt(start, end) { - return parseInt(this.pattern.slice(start, end), 8) + return parseInt(this.source.slice(start, end), 8) } // --------------------------------------------------------------------------- @@ -215,7 +215,7 @@ export class RegExpValidator { // https://www.ecma-international.org/ecma-262/8.0/#prod-Alternative alternative(unicode) { - while (this.pos < this.pattern.length && this.eatTerm(unicode)) + while (this.pos < this.source.length && this.eatTerm(unicode)) ; } @@ -890,6 +890,6 @@ export class RegExpValidator { // This is a surrogate pair and no `u` flag, so returns a code point. // If the right of `-` then returns the lead surrogate. // If the left of `-` then returns the trail surrogate. - return this.pattern.charCodeAt(isRight ? i : i + 1) + return this.source.charCodeAt(isRight ? i : i + 1) } } From 6fb13e05a5b071f62f1f1c0fac89070c3ccd0be9 Mon Sep 17 00:00:00 2001 From: Toru Nagashima Date: Sun, 11 Feb 2018 22:00:31 +0900 Subject: [PATCH 04/18] add RegExp named capture groups --- bin/run_test262.js | 1 - src/regexp.js | 338 ++++++++++++++++++++++++++++---------- test/run.js | 1 + test/tests-regexp-2018.js | 69 ++++++++ 4 files changed, 323 insertions(+), 86 deletions(-) create mode 100644 test/tests-regexp-2018.js diff --git a/bin/run_test262.js b/bin/run_test262.js index 880ffa229..e0e60a882 100644 --- a/bin/run_test262.js +++ b/bin/run_test262.js @@ -10,7 +10,6 @@ const unsupportedFeatures = [ "class-fields-public", "optional-catch-binding", "regexp-lookbehind", - "regexp-named-groups", "regexp-unicode-property-escapes" ]; diff --git a/src/regexp.js b/src/regexp.js index 2430dcace..2f43b2bad 100644 --- a/src/regexp.js +++ b/src/regexp.js @@ -1,3 +1,5 @@ +import {isIdentifierStart, isIdentifierChar} from "./identifier.js" + const BACKSPACE = 0x08 const CHARACTER_TABULATION = 0x09 const LINE_FEED = 0x0A @@ -20,7 +22,9 @@ const DIGIT_THREE = 0x33 // 3 const DIGIT_SEVEN = 0x37 // 7 const DIGIT_NINE = 0x39 // 9 const COLON = 0x3A // : +const LESS_THAN_SIGN = 0x3C // < const EQUALS_SIGN = 0x3D // = +const GREATER_THAN_SIGN = 0x3E // > const QUESTION_MARK = 0x3F // ? const LATIN_CAPITAL_LETTER_A = 0x41 // A const LATIN_CAPITAL_LETTER_B = 0x42 // B @@ -35,6 +39,7 @@ const LATIN_SMALL_LETTER_B = 0x62 // b const LATIN_SMALL_LETTER_C = 0x63 // c const LATIN_SMALL_LETTER_D = 0x64 // d const LATIN_SMALL_LETTER_F = 0x66 // f +const LATIN_SMALL_LETTER_K = 0x6B // k const LATIN_SMALL_LETTER_N = 0x6E // n const LATIN_SMALL_LETTER_R = 0x72 // r const LATIN_SMALL_LETTER_S = 0x73 // s @@ -51,6 +56,8 @@ const CIRCUMFLEX_ACCENT = 0x5E // ^ const LEFT_CURLY_BRACKET = 0x7B // { const VERTICAL_LINE = 0x7C // | const RIGHT_CURLY_BRACKET = 0x7D // } +const ZERO_WIDTH_NON_JOINER = 0x200C +const ZERO_WIDTH_JOINER = 0x200D export class RegExpValidator { /** @@ -66,6 +73,9 @@ export class RegExpValidator { this.pos = 0 this.numCapturingParens = 0 this.maxBackReference = 0 + this.lastGroupName = "" + this.groupNames = [] + this.backReferenceNames = [] } // --------------------------------------------------------------------------- @@ -106,23 +116,15 @@ export class RegExpValidator { validatePattern(start, pattern, unicode) { this.start = start | 0 this.source = pattern + "" - this.pos = 0 - this.numCapturingParens = 0 - this.maxBackReference = 0 - - this.disjunction(unicode) - if (this.pos !== this.source.length) { - // Make the same messages as V8. - if (this.eat(RIGHT_PARENTHESIS)) { - this.raise("Unmatched ')'") - } - if (this.eat(RIGHT_SQUARE_BRACKET) || this.eat(RIGHT_CURLY_BRACKET)) { - this.raise("Lone quantifier brackets") - } - } - // SyntaxError in https://www.ecma-international.org/ecma-262/8.0/#sec-atomescape - if (this.maxBackReference > this.numCapturingParens) { - this.raise("Invalid escape") + this.pattern(unicode, unicode && this.ecmaVersion >= 9) + + // The goal symbol for the parse is |Pattern[~U, ~N]|. If the result of + // parsing contains a |GroupName|, reparse with the goal symbol + // |Pattern[~U, +N]| and use this result instead. Throw a *SyntaxError* + // exception if _P_ did not conform to the grammar, if any elements of _P_ + // were not matched by the parse, or if any Early Error conditions exist. + if (!unicode && this.ecmaVersion >= 9 && this.groupNames.length > 0) { + this.pattern(false, true) } } @@ -193,15 +195,52 @@ export class RegExpValidator { return parseInt(this.source.slice(start, end), 8) } + codePointToString(ch) { + if (ch <= 0xFFFF) { + return String.fromCharCode(ch) + } + ch -= 0x10000 + return String.fromCharCode((ch >> 10) + 0xD800, (ch & 0x03FF) + 0xDC00) + } + // --------------------------------------------------------------------------- // Productions // --------------------------------------------------------------------------- + // https://www.ecma-international.org/ecma-262/8.0/#prod-Pattern + pattern(unicode, namedGroups) { + this.pos = 0 + this.numCapturingParens = 0 + this.maxBackReference = 0 + this.groupNames.length = 0 + this.backReferenceNames.length = 0 + + this.disjunction(unicode, namedGroups) + + if (this.pos !== this.source.length) { + // Make the same messages as V8. + if (this.eat(RIGHT_PARENTHESIS)) { + this.raise("Unmatched ')'") + } + if (this.eat(RIGHT_SQUARE_BRACKET) || this.eat(RIGHT_CURLY_BRACKET)) { + this.raise("Lone quantifier brackets") + } + } + if (this.maxBackReference > this.numCapturingParens) { + this.raise("Invalid escape") + } + for (const name of this.backReferenceNames) { + if (this.groupNames.indexOf(name) === -1) { + this.raise("Invalid named capture referenced") + } + } + } + // https://www.ecma-international.org/ecma-262/8.0/#prod-Disjunction - disjunction(unicode) { - this.alternative(unicode) + disjunction(unicode, namedGroups) { + this.alternative(unicode, namedGroups) while (this.eat(VERTICAL_LINE)) { - this.alternative(unicode) + this.alternative(unicode, namedGroups) } // Make the same message as V8. @@ -214,16 +253,16 @@ export class RegExpValidator { } // https://www.ecma-international.org/ecma-262/8.0/#prod-Alternative - alternative(unicode) { - while (this.pos < this.source.length && this.eatTerm(unicode)) + alternative(unicode, namedGroups) { + while (this.pos < this.source.length && this.eatTerm(unicode, namedGroups)) ; } // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-Term - eatTerm(unicode) { + eatTerm(unicode, namedGroups) { const start = this.pos - if (this.eatQuantifiableAssertion()) { + if (this.eatQuantifiableAssertion(namedGroups)) { if (this.eatQuantifier(unicode)) { // Make the same message as V8. if (unicode) { @@ -234,11 +273,11 @@ export class RegExpValidator { this.pos = start } - if (this.eatAssertion(unicode)) { + if (this.eatAssertion(unicode, namedGroups)) { return true } - if (unicode ? this.eatAtom(true) : this.eatExtendedAtom()) { + if (unicode ? this.eatAtom(true, namedGroups) : this.eatExtendedAtom(namedGroups)) { this.eatQuantifier(unicode) return true } @@ -247,12 +286,12 @@ export class RegExpValidator { } // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-Assertion - eatAssertion(unicode) { + eatAssertion(unicode, namedGroups) { return ( this.eat(CIRCUMFLEX_ACCENT) || this.eat(DOLLAR_SIGN) || this._eatWordBoundary() || - this._eatLookaheadAssertion(unicode) + this._eatLookaheadAssertion(unicode, namedGroups) ) } _eatWordBoundary() { @@ -265,11 +304,11 @@ export class RegExpValidator { } return false } - _eatLookaheadAssertion(unicode) { + _eatLookaheadAssertion(unicode, namedGroups) { const start = this.pos if (this.eat(LEFT_PARENTHESIS)) { if (this.eat(QUESTION_MARK) && (this.eat(EQUALS_SIGN) || this.eat(EXCLAMATION_MARK))) { - this.disjunction(unicode) + this.disjunction(unicode, namedGroups) if (!this.eat(RIGHT_PARENTHESIS)) { this.raise("Unterminated group") } @@ -281,8 +320,8 @@ export class RegExpValidator { } // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-QuantifiableAssertion - eatQuantifiableAssertion() { - return this._eatLookaheadAssertion(false) + eatQuantifiableAssertion(namedGroups) { + return this._eatLookaheadAssertion(false, namedGroups) } // https://www.ecma-international.org/ecma-262/8.0/#prod-Quantifier @@ -332,50 +371,65 @@ export class RegExpValidator { } // https://www.ecma-international.org/ecma-262/8.0/#prod-Atom - eatAtom(unicode) { + eatAtom(unicode, namedGroups) { return ( this.eatPatternCharacters() || this.eat(FULL_STOP) || - this._eatReverseSolidusAtomEscape(unicode) || - this.eatCharacterClass(unicode) || - this._eatCapturingOrUncapturingGroup(unicode) + this._eatReverseSolidusAtomEscape(unicode, namedGroups) || + this.eatCharacterClass(unicode, namedGroups) || + this._eatUncapturingGroup(unicode, namedGroups) || + this._eatCapturingGroup(unicode, namedGroups) ) } - _eatReverseSolidusAtomEscape(unicode) { + _eatReverseSolidusAtomEscape(unicode, namedGroups) { const start = this.pos if (this.eat(REVERSE_SOLIDUS)) { - if (this.eatAtomEscape(unicode)) { + if (this.eatAtomEscape(unicode, namedGroups)) { return true } this.pos = start } return false } - _eatCapturingOrUncapturingGroup(unicode) { + _eatUncapturingGroup(unicode, namedGroups) { + const start = this.pos if (this.eat(LEFT_PARENTHESIS)) { - const uncaptured = this.eat(QUESTION_MARK) - if (uncaptured && !this.eat(COLON)) { - this.raise("Invalid group") - } - this.disjunction(unicode) - if (!this.eat(RIGHT_PARENTHESIS)) { + if (this.eat(QUESTION_MARK) && this.eat(COLON)) { + this.disjunction(unicode, namedGroups) + if (this.eat(RIGHT_PARENTHESIS)) { + return true + } this.raise("Unterminated group") } - if (!uncaptured) { + this.pos = start + } + return false + } + _eatCapturingGroup(unicode, namedGroups) { + if (this.eat(LEFT_PARENTHESIS)) { + if (this.ecmaVersion >= 9) { + this.groupSpecifier(unicode) + } else if (this.current() === QUESTION_MARK) { + this.raise("Invalid group") + } + this.disjunction(unicode, namedGroups) + if (this.eat(RIGHT_PARENTHESIS)) { this.numCapturingParens += 1 + return true } - return true + this.raise("Unterminated group") } return false } // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-ExtendedAtom - eatExtendedAtom() { + eatExtendedAtom(namedGroups) { return ( this.eat(FULL_STOP) || - this._eatReverseSolidusAtomEscape(false) || - this.eatCharacterClass(false) || - this._eatCapturingOrUncapturingGroup(false) || + this._eatReverseSolidusAtomEscape(false, namedGroups) || + this.eatCharacterClass(false, namedGroups) || + this._eatUncapturingGroup(false, namedGroups) || + this._eatCapturingGroup(false, namedGroups) || this.eatInvalidBracedQuantifier() || this.eatExtendedPatternCharacter() ) @@ -449,8 +503,117 @@ export class RegExpValidator { return false } + // GroupSpecifier[U] :: + // [empty] + // `?` GroupName[?U] + groupSpecifier(unicode) { + if (this.eat(QUESTION_MARK)) { + if (this.eatGroupName(unicode)) { + if (this.groupNames.indexOf(this.lastGroupName) !== -1) { + this.raise("Duplicate capture group name") + } + this.groupNames.push(this.lastGroupName) + return + } + this.raise("Invalid group") + } + } + + // GroupName[U] :: + // `<` RegExpIdentifierName[?U] `>` + // RegExpIdentifierName[U] :: + // RegExpIdentifierStart[?U] + // RegExpIdentifierName[?U] RegExpIdentifierPart[?U] + // Note: this updates `this.lastGroupName` property with the eaten name. + eatGroupName(unicode) { + this.lastGroupName = "" + if (this.eat(LESS_THAN_SIGN)) { + if (this.eatRegExpIdentifierStart(unicode)) { + while (this.eatRegExpIdentifierPart(unicode)) + ; + if (this.eat(GREATER_THAN_SIGN)) { + return true + } + } + this.raise("Invalid capture group name") + } + return false + } + + // RegExpIdentifierStart[U] :: + // UnicodeIDStart + // `$` + // `_` + // `\` RegExpUnicodeEscapeSequence[?U] + // Note: this appends the eaten character to `this.lastGroupName` property. + eatRegExpIdentifierStart(unicode) { + const start = this.pos + let ch = this.current() + this.advance() + + if (ch === REVERSE_SOLIDUS && this.eatRegExpUnicodeEscapeSequence(unicode)) { + ch = this._parseRegExpUnicodeEscapeSequence(start + 1, this.pos) + } + if (this._isRegExpIdentifierStart(ch)) { + this.lastGroupName += this.codePointToString(ch) + return true + } + + this.pos = start + return false + } + _isRegExpIdentifierStart(ch) { + return isIdentifierStart(ch, true) || ch === DOLLAR_SIGN || ch === LOW_LINE + } + + // RegExpIdentifierPart[U] :: + // UnicodeIDContinue + // `$` + // `_` + // `\` RegExpUnicodeEscapeSequence[?U] + // + // + // Note: this appends the eaten character to `this.lastGroupName` property. + eatRegExpIdentifierPart(unicode) { + const start = this.pos + let ch = this.current() + this.advance() + + if (ch === REVERSE_SOLIDUS && this.eatRegExpUnicodeEscapeSequence(unicode)) { + ch = this._parseRegExpUnicodeEscapeSequence(start + 1, this.pos) + } + if (this._isRegExpIdentifierPart(ch)) { + this.lastGroupName += this.codePointToString(ch) + return true + } + + this.pos = start + return false + } + _isRegExpIdentifierPart(ch) { + return isIdentifierChar(ch, true) || ch === DOLLAR_SIGN || ch === LOW_LINE || ch === ZERO_WIDTH_NON_JOINER || ch === ZERO_WIDTH_JOINER + } + // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-AtomEscape - eatAtomEscape(unicode) { + eatAtomEscape(unicode, namedGroups) { + if ( + this._eatBackReference(unicode) || + this.eatCharacterClassEscape(unicode) || + this.eatCharacterEscape(unicode, namedGroups) || + (namedGroups && this._eatKGroupName(unicode)) + ) { + return true + } + if (unicode) { + // Make the same message as V8. + if (this.current() === LATIN_SMALL_LETTER_C) { + this.raise("Invalid unicode escape") + } + this.raise("Invalid escape") + } + return false + } + _eatBackReference(unicode) { const start = this.pos if (this.eatDecimalEscape()) { const n = this.parseDecimalInt(start, this.pos) @@ -466,21 +629,21 @@ export class RegExpValidator { } this.pos = start } - if (this.eatCharacterClassEscape(unicode) || this.eatCharacterEscape(unicode)) { - return true - } - if (unicode) { - // Make the same message as V8. - if (this.current() === LATIN_SMALL_LETTER_C) { - this.raise("Invalid unicode escape") + return false + } + _eatKGroupName(unicode) { + if (this.eat(LATIN_SMALL_LETTER_K)) { + if (this.eatGroupName(unicode)) { + this.backReferenceNames.push(this.lastGroupName) + return true } - this.raise("Invalid escape") + this.raise("Invalid named reference") } return false } // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-CharacterEscape - eatCharacterEscape(unicode) { + eatCharacterEscape(unicode, namedGroups) { return ( this.eatControlEscape() || this._eatCControlLetter() || @@ -488,7 +651,7 @@ export class RegExpValidator { this.eatHexEscapeSequence(unicode) || this.eatRegExpUnicodeEscapeSequence(unicode) || (!unicode && this.eatLegacyOctalEscapeSequence()) || - this.eatIdentityEscape(unicode) + this.eatIdentityEscape(unicode, namedGroups) ) } _eatCControlLetter() { @@ -581,9 +744,24 @@ export class RegExpValidator { _isValidUnicode(ch) { return ch >= 0 && ch <= 0x10FFFF } + _parseRegExpUnicodeEscapeSequence(start, end) { + start += 1 // skip `u` + if (end - start >= 3 && this.codePointAt(start) === LEFT_CURLY_BRACKET) { + return this.parseHexInt(start + 1, end - 1) + } + if (end - start === 4) { + return this.parseHexInt(start, end) + } + if (end - start === 10) { + const lead = this.parseHexInt(start, start + 4) + const trail = this.parseHexInt(end - 4, end) + return (lead - 0xD800) * 0x400 + (trail - 0xDC00) + 0x10000 + } + return LATIN_SMALL_LETTER_U + } // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-IdentityEscape - eatIdentityEscape(unicode) { + eatIdentityEscape(unicode, namedGroups) { if (unicode) { return ( this.eatSyntaxCharacter() || @@ -591,7 +769,8 @@ export class RegExpValidator { ) } - if (this.current() !== LATIN_SMALL_LETTER_C) { + const ch = this.current() + if (ch !== LATIN_SMALL_LETTER_C && (!namedGroups || ch !== LATIN_SMALL_LETTER_K)) { this.advance() return true } @@ -631,10 +810,10 @@ export class RegExpValidator { } // https://www.ecma-international.org/ecma-262/8.0/#prod-CharacterClass - eatCharacterClass(unicode) { + eatCharacterClass(unicode, namedGroups) { if (this.eat(LEFT_SQUARE_BRACKET)) { this.eat(CIRCUMFLEX_ACCENT) - this.classRanges(unicode) + this.classRanges(unicode, namedGroups) if (this.eat(RIGHT_SQUARE_BRACKET)) { return true } @@ -647,14 +826,14 @@ export class RegExpValidator { // https://www.ecma-international.org/ecma-262/8.0/#prod-ClassRanges // https://www.ecma-international.org/ecma-262/8.0/#prod-NonemptyClassRanges // https://www.ecma-international.org/ecma-262/8.0/#prod-NonemptyClassRangesNoDash - classRanges(unicode) { + classRanges(unicode, namedGroups) { for (; ;) { const leftStart = this.pos - if (this.eatClassAtom(unicode)) { + if (this.eatClassAtom(unicode, namedGroups)) { const leftEnd = this.pos if (this.eat(HYPHEN_MINUS)) { const rightStart = this.pos - if (this.eatClassAtom(unicode)) { + if (this.eatClassAtom(unicode, namedGroups)) { const rightEnd = this.pos const left = this._parseClassAtom(leftStart, leftEnd, unicode, false) const right = this._parseClassAtom(rightStart, rightEnd, unicode, true) @@ -674,11 +853,11 @@ export class RegExpValidator { // https://www.ecma-international.org/ecma-262/8.0/#prod-ClassAtom // https://www.ecma-international.org/ecma-262/8.0/#prod-ClassAtomNoDash - eatClassAtom(unicode) { + eatClassAtom(unicode, namedGroups) { const start = this.pos if (this.eat(REVERSE_SOLIDUS)) { - if (this.eatClassEscape(unicode)) { + if (this.eatClassEscape(unicode, namedGroups)) { return true } if (unicode) { @@ -702,13 +881,13 @@ export class RegExpValidator { } // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-ClassEscape - eatClassEscape(unicode) { + eatClassEscape(unicode, namedGroups) { return ( this.eat(LATIN_SMALL_LETTER_B) || (unicode && this.eat(HYPHEN_MINUS)) || (!unicode && this._eatCClassControlLetter(unicode)) || this.eatCharacterClassEscape() || - this.eatCharacterEscape(unicode) + this.eatCharacterEscape(unicode, namedGroups) ) } _eatCClassControlLetter() { @@ -860,18 +1039,7 @@ export class RegExpValidator { } return LATIN_SMALL_LETTER_X case LATIN_SMALL_LETTER_U: - if (end - start >= 5 && this.codePointAt(start + 2) === LEFT_CURLY_BRACKET) { - return this.parseHexInt(start + 3, end - 1) - } - if (end - start === 6) { - return this.parseHexInt(start + 2, end) - } - if (end - start === 12) { - const lead = this.parseHexInt(start + 2, start + 6) - const trail = this.parseHexInt(start + 8, start + 12) - return (lead - 0xD800) * 0x400 + (trail - 0xDC00) + 0x10000 - } - return LATIN_SMALL_LETTER_U + return this._parseRegExpUnicodeEscapeSequence(start + 1, end) default: if (!unicode && ch2 >= DIGIT_ZERO && ch2 <= DIGIT_SEVEN) { return this.parseOctalInt(start + 1, end) diff --git a/test/run.js b/test/run.js index 3a94d85ea..025f6503d 100644 --- a/test/run.js +++ b/test/run.js @@ -13,6 +13,7 @@ require("./tests-rest-spread-properties.js"); require("./tests-async-iteration.js"); require("./tests-regexp.js"); + require("./tests-regexp-2018.js"); acorn = require("../dist/acorn") require("../dist/acorn_loose") } else { diff --git a/test/tests-regexp-2018.js b/test/tests-regexp-2018.js new file mode 100644 index 000000000..7f9c9b60c --- /dev/null +++ b/test/tests-regexp-2018.js @@ -0,0 +1,69 @@ +if (typeof exports != "undefined") { + var test = require("./driver.js").test + var testFail = require("./driver.js").testFail +} + +//------------------------------------------------------------------------------ +// Named capture groups +//------------------------------------------------------------------------------ + +test("/(a)/", {}, { ecmaVersion: 2018 }) +test("/(?:a)/", {}, { ecmaVersion: 2018 }) +testFail("/(?a/", "Invalid regular expression: /(?a/: Invalid group (1:1)", { ecmaVersion: 2018 }) +testFail("/(?a)/", "Invalid regular expression: /(?a)/: Invalid group (1:1)", { ecmaVersion: 2018 }) +testFail("/(?)/", {}, { ecmaVersion: 2018 }) +test("/\\k/", {}, { ecmaVersion: 2017 }) +test("/\\k/", {}, { ecmaVersion: 2018 }) +testFail("/\\k/u", "Invalid regular expression: /\\k/: Invalid escape (1:1)", { ecmaVersion: 2017 }) +testFail("/\\k/u", "Invalid regular expression: /\\k/: Invalid named reference (1:1)", { ecmaVersion: 2018 }) +test("/\\k/", {}, { ecmaVersion: 2017 }) +test("/\\k/", {}, { ecmaVersion: 2018 }) +testFail("/\\k/u", "Invalid regular expression: /\\k/: Invalid escape (1:1)", { ecmaVersion: 2017 }) +testFail("/\\k/u", "Invalid regular expression: /\\k/: Invalid named capture referenced (1:1)", { ecmaVersion: 2018 }) +testFail("/(?a)\\ka)\\ka)\\ka)\\ka)\\ka)\\ka)\\ka)\\ka)\\ka)\\ka)\\ka)\\ka)\\ka)\\ka)\\ka)\\ka)\\k/", "Invalid regular expression: /(?a)\\k/: Invalid group (1:1)", { ecmaVersion: 2017 }) +test("/(?a)\\k/", {}, { ecmaVersion: 2018 }) +testFail("/(?a)\\k/u", "Invalid regular expression: /(?a)\\k/: Invalid group (1:1)", { ecmaVersion: 2017 }) +test("/(?a)\\k/u", {}, { ecmaVersion: 2018 }) + +test("/(?a)\\1/", {}, { ecmaVersion: 2018 }) +test("/(?a)\\1/u", {}, { ecmaVersion: 2018 }) +test("/(?a)\\2/", {}, { ecmaVersion: 2018 }) +testFail("/(?a)\\2/u", "Invalid regular expression: /(?a)\\2/: Invalid escape (1:1)", { ecmaVersion: 2018 }) +testFail("/(?a)\\k/", "Invalid regular expression: /(?a)\\k/: Invalid named capture referenced (1:1)", { ecmaVersion: 2018 }) +testFail("/(?a)\\k/u", "Invalid regular expression: /(?a)\\k/: Invalid named capture referenced (1:1)", { ecmaVersion: 2018 }) +testFail("/(?a)(?a)/", "Invalid regular expression: /(?a)(?a)/: Duplicate capture group name (1:1)", { ecmaVersion: 2018 }) +testFail("/(?a)(?a)/u", "Invalid regular expression: /(?a)(?a)/: Duplicate capture group name (1:1)", { ecmaVersion: 2018 }) +testFail("/(?a)(?<\\u{61}>a)/u", "Invalid regular expression: /(?a)(?<\\u{61}>a)/: Duplicate capture group name (1:1)", { ecmaVersion: 2018 }) +testFail("/(?a)(?<\\u0061>a)/u", "Invalid regular expression: /(?a)(?<\\u0061>a)/: Duplicate capture group name (1:1)", { ecmaVersion: 2018 }) +test("/(?a)(?a)/", {}, { ecmaVersion: 2018 }) +test("/(?a)(?a)/u", {}, { ecmaVersion: 2018 }) + +test("/\\k(?a)/", {}, { ecmaVersion: 2018 }) +test("/\\k(?a)/u", {}, { ecmaVersion: 2018 }) +test("/\\1(?a)/", {}, { ecmaVersion: 2018 }) +test("/\\1(?a)/u", {}, { ecmaVersion: 2018 }) + +test("/(?<$abc>a)\\k<$abc>/u", {}, { ecmaVersion: 2018 }) +test("/(?<あ>a)\\k<あ>/u", {}, { ecmaVersion: 2018 }) +test("/(?<𠮷>a)\\k<\\u{20bb7}>/u", {}, { ecmaVersion: 2018 }) +test("/(?<\\uD842\\uDFB7>a)\\k<\\u{20bb7}>/u", {}, { ecmaVersion: 2018 }) +test("/(?<\\u{20bb7}>a)\\k<\\uD842\\uDFB7>/u", {}, { ecmaVersion: 2018 }) +testFail("/(?<☀>a)\\k<☀>/u", "Invalid regular expression: /(?<☀>a)\\k<☀>/: Invalid capture group name (1:1)", { ecmaVersion: 2018 }) +testFail("/(?<\\u0020>a)\\k<\\u0020>/u", "Invalid regular expression: /(?<\\u0020>a)\\k<\\u0020>/: Invalid capture group name (1:1)", { ecmaVersion: 2018 }) +test("/(?a)\\k<\\u0061\\u0062\\u0063>/u", {}, { ecmaVersion: 2018 }) +test("/(?<\\u0061\\u0062\\u0063>a)\\k/u", {}, { ecmaVersion: 2018 }) +test("/(?<\\u0061\\u0062\\u0063>a)\\k<\\u{61}\\u{62}\\u{63}>/u", {}, { ecmaVersion: 2018 }) +testFail("/(?<\\u0061\\u0062\\u0063>a)\\k/u", "Invalid regular expression: /(?<\\u0061\\u0062\\u0063>a)\\k/: Invalid named capture referenced (1:1)", { ecmaVersion: 2018 }) +testFail("/(?<11>a)\\k<11>/u", "Invalid regular expression: /(?<11>a)\\k<11>/: Invalid capture group name (1:1)", { ecmaVersion: 2018 }) +test("/(?a)\\k/u", {}, { ecmaVersion: 2018 }) From 9fa439aece9f432842b52a8b982bc5fddaaa28ba Mon Sep 17 00:00:00 2001 From: Toru Nagashima Date: Tue, 13 Feb 2018 14:41:32 +0900 Subject: [PATCH 05/18] move parameters to fields MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit unicode → this.switchU namedGroups → this.switchN This makes easy to enhance the validator by plugins --- src/regexp.js | 209 ++++++++++++++++++++++++++------------------------ 1 file changed, 107 insertions(+), 102 deletions(-) diff --git a/src/regexp.js b/src/regexp.js index 2f43b2bad..a572998b9 100644 --- a/src/regexp.js +++ b/src/regexp.js @@ -70,6 +70,8 @@ export class RegExpValidator { this.validFlags = `gim${this.ecmaVersion >= 6 ? "uy" : ""}${this.ecmaVersion >= 9 ? "s" : ""}` this.source = "" this.start = 0 + this.switchU = false + this.switchN = false this.pos = 0 this.numCapturingParens = 0 this.maxBackReference = 0 @@ -116,15 +118,18 @@ export class RegExpValidator { validatePattern(start, pattern, unicode) { this.start = start | 0 this.source = pattern + "" - this.pattern(unicode, unicode && this.ecmaVersion >= 9) + this.switchU = !!unicode && this.ecmaVersion >= 6 + this.switchN = !!unicode && this.ecmaVersion >= 9 + this.pattern() // The goal symbol for the parse is |Pattern[~U, ~N]|. If the result of // parsing contains a |GroupName|, reparse with the goal symbol // |Pattern[~U, +N]| and use this result instead. Throw a *SyntaxError* // exception if _P_ did not conform to the grammar, if any elements of _P_ // were not matched by the parse, or if any Early Error conditions exist. - if (!unicode && this.ecmaVersion >= 9 && this.groupNames.length > 0) { - this.pattern(false, true) + if (!this.switchN && this.ecmaVersion >= 9 && this.groupNames.length > 0) { + this.switchN = true + this.pattern() } } @@ -208,14 +213,14 @@ export class RegExpValidator { // --------------------------------------------------------------------------- // https://www.ecma-international.org/ecma-262/8.0/#prod-Pattern - pattern(unicode, namedGroups) { + pattern() { this.pos = 0 this.numCapturingParens = 0 this.maxBackReference = 0 this.groupNames.length = 0 this.backReferenceNames.length = 0 - this.disjunction(unicode, namedGroups) + this.disjunction() if (this.pos !== this.source.length) { // Make the same messages as V8. @@ -237,14 +242,14 @@ export class RegExpValidator { } // https://www.ecma-international.org/ecma-262/8.0/#prod-Disjunction - disjunction(unicode, namedGroups) { - this.alternative(unicode, namedGroups) + disjunction() { + this.alternative() while (this.eat(VERTICAL_LINE)) { - this.alternative(unicode, namedGroups) + this.alternative() } // Make the same message as V8. - if (this.eatQuantifier(unicode, true)) { + if (this.eatQuantifier(true)) { this.raise("Nothing to repeat") } if (this.eat(LEFT_CURLY_BRACKET)) { @@ -253,19 +258,19 @@ export class RegExpValidator { } // https://www.ecma-international.org/ecma-262/8.0/#prod-Alternative - alternative(unicode, namedGroups) { - while (this.pos < this.source.length && this.eatTerm(unicode, namedGroups)) + alternative() { + while (this.pos < this.source.length && this.eatTerm()) ; } // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-Term - eatTerm(unicode, namedGroups) { + eatTerm() { const start = this.pos - if (this.eatQuantifiableAssertion(namedGroups)) { - if (this.eatQuantifier(unicode)) { + if (this.eatQuantifiableAssertion()) { + if (this.eatQuantifier()) { // Make the same message as V8. - if (unicode) { + if (this.switchU) { this.raise("Invalid quantifier") } return true @@ -273,12 +278,12 @@ export class RegExpValidator { this.pos = start } - if (this.eatAssertion(unicode, namedGroups)) { + if (this.eatAssertion()) { return true } - if (unicode ? this.eatAtom(true, namedGroups) : this.eatExtendedAtom(namedGroups)) { - this.eatQuantifier(unicode) + if (this.switchU ? this.eatAtom() : this.eatExtendedAtom()) { + this.eatQuantifier() return true } @@ -286,12 +291,12 @@ export class RegExpValidator { } // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-Assertion - eatAssertion(unicode, namedGroups) { + eatAssertion() { return ( this.eat(CIRCUMFLEX_ACCENT) || this.eat(DOLLAR_SIGN) || this._eatWordBoundary() || - this._eatLookaheadAssertion(unicode, namedGroups) + this._eatLookaheadAssertion() ) } _eatWordBoundary() { @@ -304,11 +309,11 @@ export class RegExpValidator { } return false } - _eatLookaheadAssertion(unicode, namedGroups) { + _eatLookaheadAssertion() { const start = this.pos if (this.eat(LEFT_PARENTHESIS)) { if (this.eat(QUESTION_MARK) && (this.eat(EQUALS_SIGN) || this.eat(EXCLAMATION_MARK))) { - this.disjunction(unicode, namedGroups) + this.disjunction() if (!this.eat(RIGHT_PARENTHESIS)) { this.raise("Unterminated group") } @@ -320,13 +325,13 @@ export class RegExpValidator { } // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-QuantifiableAssertion - eatQuantifiableAssertion(namedGroups) { - return this._eatLookaheadAssertion(false, namedGroups) + eatQuantifiableAssertion() { + return this._eatLookaheadAssertion() } // https://www.ecma-international.org/ecma-262/8.0/#prod-Quantifier - eatQuantifier(unicode, noError = false) { - if (this.eatQuantifierPrefix(unicode, noError)) { + eatQuantifier(noError = false) { + if (this.eatQuantifierPrefix(noError)) { this.eat(QUESTION_MARK) return true } @@ -334,15 +339,15 @@ export class RegExpValidator { } // https://www.ecma-international.org/ecma-262/8.0/#prod-QuantifierPrefix - eatQuantifierPrefix(unicode, noError) { + eatQuantifierPrefix(noError) { return ( this.eat(ASTERISK) || this.eat(PLUS_SIGN) || this.eat(QUESTION_MARK) || - this._eatBracedQuantifier(unicode, noError) + this._eatBracedQuantifier(noError) ) } - _eatBracedQuantifier(unicode, noError) { + _eatBracedQuantifier(noError) { const start = this.pos if (this.eat(LEFT_CURLY_BRACKET)) { let i = this.pos, min = 0, max = -1 @@ -362,7 +367,7 @@ export class RegExpValidator { return true } } - if (unicode && !noError) { + if (this.switchU && !noError) { this.raise("Incomplete quantifier") } this.pos = start @@ -371,31 +376,31 @@ export class RegExpValidator { } // https://www.ecma-international.org/ecma-262/8.0/#prod-Atom - eatAtom(unicode, namedGroups) { + eatAtom() { return ( this.eatPatternCharacters() || this.eat(FULL_STOP) || - this._eatReverseSolidusAtomEscape(unicode, namedGroups) || - this.eatCharacterClass(unicode, namedGroups) || - this._eatUncapturingGroup(unicode, namedGroups) || - this._eatCapturingGroup(unicode, namedGroups) + this._eatReverseSolidusAtomEscape() || + this.eatCharacterClass() || + this._eatUncapturingGroup() || + this._eatCapturingGroup() ) } - _eatReverseSolidusAtomEscape(unicode, namedGroups) { + _eatReverseSolidusAtomEscape() { const start = this.pos if (this.eat(REVERSE_SOLIDUS)) { - if (this.eatAtomEscape(unicode, namedGroups)) { + if (this.eatAtomEscape()) { return true } this.pos = start } return false } - _eatUncapturingGroup(unicode, namedGroups) { + _eatUncapturingGroup() { const start = this.pos if (this.eat(LEFT_PARENTHESIS)) { if (this.eat(QUESTION_MARK) && this.eat(COLON)) { - this.disjunction(unicode, namedGroups) + this.disjunction() if (this.eat(RIGHT_PARENTHESIS)) { return true } @@ -405,14 +410,14 @@ export class RegExpValidator { } return false } - _eatCapturingGroup(unicode, namedGroups) { + _eatCapturingGroup() { if (this.eat(LEFT_PARENTHESIS)) { if (this.ecmaVersion >= 9) { - this.groupSpecifier(unicode) + this.groupSpecifier() } else if (this.current() === QUESTION_MARK) { this.raise("Invalid group") } - this.disjunction(unicode, namedGroups) + this.disjunction() if (this.eat(RIGHT_PARENTHESIS)) { this.numCapturingParens += 1 return true @@ -423,13 +428,13 @@ export class RegExpValidator { } // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-ExtendedAtom - eatExtendedAtom(namedGroups) { + eatExtendedAtom() { return ( this.eat(FULL_STOP) || - this._eatReverseSolidusAtomEscape(false, namedGroups) || - this.eatCharacterClass(false, namedGroups) || - this._eatUncapturingGroup(false, namedGroups) || - this._eatCapturingGroup(false, namedGroups) || + this._eatReverseSolidusAtomEscape() || + this.eatCharacterClass() || + this._eatUncapturingGroup() || + this._eatCapturingGroup() || this.eatInvalidBracedQuantifier() || this.eatExtendedPatternCharacter() ) @@ -437,7 +442,7 @@ export class RegExpValidator { // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-InvalidBracedQuantifier eatInvalidBracedQuantifier() { - if (this._eatBracedQuantifier(false, true)) { + if (this._eatBracedQuantifier(true)) { this.raise("Nothing to repeat") } return false @@ -506,9 +511,9 @@ export class RegExpValidator { // GroupSpecifier[U] :: // [empty] // `?` GroupName[?U] - groupSpecifier(unicode) { + groupSpecifier() { if (this.eat(QUESTION_MARK)) { - if (this.eatGroupName(unicode)) { + if (this.eatGroupName()) { if (this.groupNames.indexOf(this.lastGroupName) !== -1) { this.raise("Duplicate capture group name") } @@ -525,11 +530,11 @@ export class RegExpValidator { // RegExpIdentifierStart[?U] // RegExpIdentifierName[?U] RegExpIdentifierPart[?U] // Note: this updates `this.lastGroupName` property with the eaten name. - eatGroupName(unicode) { + eatGroupName() { this.lastGroupName = "" if (this.eat(LESS_THAN_SIGN)) { - if (this.eatRegExpIdentifierStart(unicode)) { - while (this.eatRegExpIdentifierPart(unicode)) + if (this.eatRegExpIdentifierStart()) { + while (this.eatRegExpIdentifierPart()) ; if (this.eat(GREATER_THAN_SIGN)) { return true @@ -546,12 +551,12 @@ export class RegExpValidator { // `_` // `\` RegExpUnicodeEscapeSequence[?U] // Note: this appends the eaten character to `this.lastGroupName` property. - eatRegExpIdentifierStart(unicode) { + eatRegExpIdentifierStart() { const start = this.pos let ch = this.current() this.advance() - if (ch === REVERSE_SOLIDUS && this.eatRegExpUnicodeEscapeSequence(unicode)) { + if (ch === REVERSE_SOLIDUS && this.eatRegExpUnicodeEscapeSequence()) { ch = this._parseRegExpUnicodeEscapeSequence(start + 1, this.pos) } if (this._isRegExpIdentifierStart(ch)) { @@ -574,12 +579,12 @@ export class RegExpValidator { // // // Note: this appends the eaten character to `this.lastGroupName` property. - eatRegExpIdentifierPart(unicode) { + eatRegExpIdentifierPart() { const start = this.pos let ch = this.current() this.advance() - if (ch === REVERSE_SOLIDUS && this.eatRegExpUnicodeEscapeSequence(unicode)) { + if (ch === REVERSE_SOLIDUS && this.eatRegExpUnicodeEscapeSequence()) { ch = this._parseRegExpUnicodeEscapeSequence(start + 1, this.pos) } if (this._isRegExpIdentifierPart(ch)) { @@ -595,16 +600,16 @@ export class RegExpValidator { } // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-AtomEscape - eatAtomEscape(unicode, namedGroups) { + eatAtomEscape() { if ( - this._eatBackReference(unicode) || - this.eatCharacterClassEscape(unicode) || - this.eatCharacterEscape(unicode, namedGroups) || - (namedGroups && this._eatKGroupName(unicode)) + this._eatBackReference() || + this.eatCharacterClassEscape() || + this.eatCharacterEscape() || + (this.switchN && this._eatKGroupName()) ) { return true } - if (unicode) { + if (this.switchU) { // Make the same message as V8. if (this.current() === LATIN_SMALL_LETTER_C) { this.raise("Invalid unicode escape") @@ -613,11 +618,11 @@ export class RegExpValidator { } return false } - _eatBackReference(unicode) { + _eatBackReference() { const start = this.pos if (this.eatDecimalEscape()) { const n = this.parseDecimalInt(start, this.pos) - if (unicode) { + if (this.switchU) { // For SyntaxError in https://www.ecma-international.org/ecma-262/8.0/#sec-atomescape if (n > this.maxBackReference) { this.maxBackReference = n @@ -631,9 +636,9 @@ export class RegExpValidator { } return false } - _eatKGroupName(unicode) { + _eatKGroupName() { if (this.eat(LATIN_SMALL_LETTER_K)) { - if (this.eatGroupName(unicode)) { + if (this.eatGroupName()) { this.backReferenceNames.push(this.lastGroupName) return true } @@ -643,15 +648,15 @@ export class RegExpValidator { } // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-CharacterEscape - eatCharacterEscape(unicode, namedGroups) { + eatCharacterEscape() { return ( this.eatControlEscape() || this._eatCControlLetter() || this._eatZero() || - this.eatHexEscapeSequence(unicode) || - this.eatRegExpUnicodeEscapeSequence(unicode) || - (!unicode && this.eatLegacyOctalEscapeSequence()) || - this.eatIdentityEscape(unicode, namedGroups) + this.eatHexEscapeSequence() || + this.eatRegExpUnicodeEscapeSequence() || + (!this.switchU && this.eatLegacyOctalEscapeSequence()) || + this.eatIdentityEscape() ) } _eatCControlLetter() { @@ -706,13 +711,13 @@ export class RegExpValidator { } // https://www.ecma-international.org/ecma-262/8.0/#prod-RegExpUnicodeEscapeSequence - eatRegExpUnicodeEscapeSequence(unicode) { + eatRegExpUnicodeEscapeSequence() { const start = this.pos if (this.eat(LATIN_SMALL_LETTER_U)) { if (this._eatFixedHexDigits(4)) { const code = this.parseHexInt(this.pos - 4, this.pos) - if (unicode && code >= 0xD800 && code <= 0xDBFF) { + if (this.switchU && code >= 0xD800 && code <= 0xDBFF) { const leadSurrogateEnd = this.pos if (this.eat(REVERSE_SOLIDUS) && this.eat(LATIN_SMALL_LETTER_U) && this._eatFixedHexDigits(4)) { const codeT = this.parseHexInt(this.pos - 4, this.pos) @@ -725,7 +730,7 @@ export class RegExpValidator { return true } if ( - unicode && + this.switchU && this.eat(LEFT_CURLY_BRACKET) && this.eatHexDigits() && this.eat(RIGHT_CURLY_BRACKET) && @@ -733,7 +738,7 @@ export class RegExpValidator { ) { return true } - if (unicode) { + if (this.switchU) { this.raise("Invalid unicode escape") } this.pos = start @@ -761,8 +766,8 @@ export class RegExpValidator { } // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-IdentityEscape - eatIdentityEscape(unicode, namedGroups) { - if (unicode) { + eatIdentityEscape() { + if (this.switchU) { return ( this.eatSyntaxCharacter() || this.eat(SOLIDUS) @@ -770,7 +775,7 @@ export class RegExpValidator { } const ch = this.current() - if (ch !== LATIN_SMALL_LETTER_C && (!namedGroups || ch !== LATIN_SMALL_LETTER_K)) { + if (ch !== LATIN_SMALL_LETTER_C && (!this.switchN || ch !== LATIN_SMALL_LETTER_K)) { this.advance() return true } @@ -810,10 +815,10 @@ export class RegExpValidator { } // https://www.ecma-international.org/ecma-262/8.0/#prod-CharacterClass - eatCharacterClass(unicode, namedGroups) { + eatCharacterClass() { if (this.eat(LEFT_SQUARE_BRACKET)) { this.eat(CIRCUMFLEX_ACCENT) - this.classRanges(unicode, namedGroups) + this.classRanges() if (this.eat(RIGHT_SQUARE_BRACKET)) { return true } @@ -826,18 +831,18 @@ export class RegExpValidator { // https://www.ecma-international.org/ecma-262/8.0/#prod-ClassRanges // https://www.ecma-international.org/ecma-262/8.0/#prod-NonemptyClassRanges // https://www.ecma-international.org/ecma-262/8.0/#prod-NonemptyClassRangesNoDash - classRanges(unicode, namedGroups) { + classRanges() { for (; ;) { const leftStart = this.pos - if (this.eatClassAtom(unicode, namedGroups)) { + if (this.eatClassAtom()) { const leftEnd = this.pos if (this.eat(HYPHEN_MINUS)) { const rightStart = this.pos - if (this.eatClassAtom(unicode, namedGroups)) { + if (this.eatClassAtom()) { const rightEnd = this.pos - const left = this._parseClassAtom(leftStart, leftEnd, unicode, false) - const right = this._parseClassAtom(rightStart, rightEnd, unicode, true) - if (unicode && (left === -1 || right === -1)) { + const left = this._parseClassAtom(leftStart, leftEnd, false) + const right = this._parseClassAtom(rightStart, rightEnd, true) + if (this.switchU && (left === -1 || right === -1)) { this.raise("Invalid character class") } if (left !== -1 && right !== -1 && left > right) { @@ -853,14 +858,14 @@ export class RegExpValidator { // https://www.ecma-international.org/ecma-262/8.0/#prod-ClassAtom // https://www.ecma-international.org/ecma-262/8.0/#prod-ClassAtomNoDash - eatClassAtom(unicode, namedGroups) { + eatClassAtom() { const start = this.pos if (this.eat(REVERSE_SOLIDUS)) { - if (this.eatClassEscape(unicode, namedGroups)) { + if (this.eatClassEscape()) { return true } - if (unicode) { + if (this.switchU) { // Make the same message as V8. const ch = this.current() if (ch === LATIN_SMALL_LETTER_C || this._isOctalDigit(ch)) { @@ -881,13 +886,13 @@ export class RegExpValidator { } // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-ClassEscape - eatClassEscape(unicode, namedGroups) { + eatClassEscape() { return ( this.eat(LATIN_SMALL_LETTER_B) || - (unicode && this.eat(HYPHEN_MINUS)) || - (!unicode && this._eatCClassControlLetter(unicode)) || + (this.switchU && this.eat(HYPHEN_MINUS)) || + (!this.switchU && this._eatCClassControlLetter()) || this.eatCharacterClassEscape() || - this.eatCharacterEscape(unicode, namedGroups) + this.eatCharacterEscape() ) } _eatCClassControlLetter() { @@ -912,14 +917,14 @@ export class RegExpValidator { } // https://www.ecma-international.org/ecma-262/8.0/#prod-HexEscapeSequence - eatHexEscapeSequence(unicode) { + eatHexEscapeSequence() { const start = this.pos if (this.eat(LATIN_SMALL_LETTER_X)) { if (this._eatFixedHexDigits(2)) { return true } - if (unicode) { + if (this.switchU) { this.raise("Invalid escape") } this.pos = start @@ -1000,10 +1005,10 @@ export class RegExpValidator { // https://www.ecma-international.org/ecma-262/8.0/#sec-classatomnodash // https://www.ecma-international.org/ecma-262/8.0/#sec-classescape // Get the value of chracters to validate class ranges (e.g., [a-z]). - _parseClassAtom(start, end, unicode, isRight) { - const ch1 = this._getOneElementCharSetAt(start, unicode, isRight) + _parseClassAtom(start, end, isRight) { + const ch1 = this._getOneElementCharSetAt(start, isRight) if (ch1 === REVERSE_SOLIDUS) { - const ch2 = this._getOneElementCharSetAt(start + 1, unicode, isRight) + const ch2 = this._getOneElementCharSetAt(start + 1, isRight) switch (ch2) { case LATIN_SMALL_LETTER_B: return BACKSPACE @@ -1041,7 +1046,7 @@ export class RegExpValidator { case LATIN_SMALL_LETTER_U: return this._parseRegExpUnicodeEscapeSequence(start + 1, end) default: - if (!unicode && ch2 >= DIGIT_ZERO && ch2 <= DIGIT_SEVEN) { + if (!this.switchU && ch2 >= DIGIT_ZERO && ch2 <= DIGIT_SEVEN) { return this.parseOctalInt(start + 1, end) } return ch2 @@ -1050,9 +1055,9 @@ export class RegExpValidator { return ch1 } // https://www.ecma-international.org/ecma-262/8.0/#sec-notation - _getOneElementCharSetAt(i, unicode, isRight) { + _getOneElementCharSetAt(i, isRight) { const ch = this.codePointAt(i) - if (unicode || ch <= 0xFFFF) { + if (this.switchU || ch <= 0xFFFF) { return ch } // This is a surrogate pair and no `u` flag, so returns a code point. From f7d0ef86b9605a0cb2c984becb87f8e4d231778e Mon Sep 17 00:00:00 2001 From: Toru Nagashima Date: Tue, 13 Feb 2018 16:32:53 +0900 Subject: [PATCH 06/18] add RegExp Unicode property escapes --- bin/run_test262.js | 3 +- src/regexp.js | 105 +++++++- src/unicode-property-data.js | 463 +++++++++++++++++++++++++++++++++++ test/tests-regexp-2018.js | 33 +++ 4 files changed, 591 insertions(+), 13 deletions(-) create mode 100644 src/unicode-property-data.js diff --git a/bin/run_test262.js b/bin/run_test262.js index e0e60a882..7ab5153af 100644 --- a/bin/run_test262.js +++ b/bin/run_test262.js @@ -9,8 +9,7 @@ const unsupportedFeatures = [ "class-fields-private", "class-fields-public", "optional-catch-binding", - "regexp-lookbehind", - "regexp-unicode-property-escapes" + "regexp-lookbehind" ]; run( diff --git a/src/regexp.js b/src/regexp.js index a572998b9..604ae17b7 100644 --- a/src/regexp.js +++ b/src/regexp.js @@ -1,4 +1,5 @@ import {isIdentifierStart, isIdentifierChar} from "./identifier.js" +import UNICODE_PROPERTY_VALUES from "./unicode-property-data.js" const BACKSPACE = 0x08 const CHARACTER_TABULATION = 0x09 @@ -30,6 +31,7 @@ const LATIN_CAPITAL_LETTER_A = 0x41 // A const LATIN_CAPITAL_LETTER_B = 0x42 // B const LATIN_CAPITAL_LETTER_D = 0x44 // D const LATIN_CAPITAL_LETTER_F = 0x46 // F +const LATIN_CAPITAL_LETTER_P = 0x50 // P const LATIN_CAPITAL_LETTER_S = 0x53 // S const LATIN_CAPITAL_LETTER_W = 0x57 // W const LATIN_CAPITAL_LETTER_Z = 0x5A // Z @@ -41,6 +43,7 @@ const LATIN_SMALL_LETTER_D = 0x64 // d const LATIN_SMALL_LETTER_F = 0x66 // f const LATIN_SMALL_LETTER_K = 0x6B // k const LATIN_SMALL_LETTER_N = 0x6E // n +const LATIN_SMALL_LETTER_P = 0x70 // p const LATIN_SMALL_LETTER_R = 0x72 // r const LATIN_SMALL_LETTER_S = 0x73 // s const LATIN_SMALL_LETTER_T = 0x74 // t @@ -73,9 +76,9 @@ export class RegExpValidator { this.switchU = false this.switchN = false this.pos = 0 + this.lastStringValue = "" this.numCapturingParens = 0 this.maxBackReference = 0 - this.lastGroupName = "" this.groupNames = [] this.backReferenceNames = [] } @@ -514,10 +517,10 @@ export class RegExpValidator { groupSpecifier() { if (this.eat(QUESTION_MARK)) { if (this.eatGroupName()) { - if (this.groupNames.indexOf(this.lastGroupName) !== -1) { + if (this.groupNames.indexOf(this.lastStringValue) !== -1) { this.raise("Duplicate capture group name") } - this.groupNames.push(this.lastGroupName) + this.groupNames.push(this.lastStringValue) return } this.raise("Invalid group") @@ -529,9 +532,9 @@ export class RegExpValidator { // RegExpIdentifierName[U] :: // RegExpIdentifierStart[?U] // RegExpIdentifierName[?U] RegExpIdentifierPart[?U] - // Note: this updates `this.lastGroupName` property with the eaten name. + // Note: this updates `this.lastStringValue` property with the eaten name. eatGroupName() { - this.lastGroupName = "" + this.lastStringValue = "" if (this.eat(LESS_THAN_SIGN)) { if (this.eatRegExpIdentifierStart()) { while (this.eatRegExpIdentifierPart()) @@ -550,7 +553,7 @@ export class RegExpValidator { // `$` // `_` // `\` RegExpUnicodeEscapeSequence[?U] - // Note: this appends the eaten character to `this.lastGroupName` property. + // Note: this appends the eaten character to `this.lastStringValue` property. eatRegExpIdentifierStart() { const start = this.pos let ch = this.current() @@ -560,7 +563,7 @@ export class RegExpValidator { ch = this._parseRegExpUnicodeEscapeSequence(start + 1, this.pos) } if (this._isRegExpIdentifierStart(ch)) { - this.lastGroupName += this.codePointToString(ch) + this.lastStringValue += this.codePointToString(ch) return true } @@ -578,7 +581,7 @@ export class RegExpValidator { // `\` RegExpUnicodeEscapeSequence[?U] // // - // Note: this appends the eaten character to `this.lastGroupName` property. + // Note: this appends the eaten character to `this.lastStringValue` property. eatRegExpIdentifierPart() { const start = this.pos let ch = this.current() @@ -588,7 +591,7 @@ export class RegExpValidator { ch = this._parseRegExpUnicodeEscapeSequence(start + 1, this.pos) } if (this._isRegExpIdentifierPart(ch)) { - this.lastGroupName += this.codePointToString(ch) + this.lastStringValue += this.codePointToString(ch) return true } @@ -639,7 +642,7 @@ export class RegExpValidator { _eatKGroupName() { if (this.eat(LATIN_SMALL_LETTER_K)) { if (this.eatGroupName()) { - this.backReferenceNames.push(this.lastGroupName) + this.backReferenceNames.push(this.lastStringValue) return true } this.raise("Invalid named reference") @@ -797,10 +800,18 @@ export class RegExpValidator { // https://www.ecma-international.org/ecma-262/8.0/#prod-CharacterClassEscape eatCharacterClassEscape() { - if (this._isCharacterClassEscape(this.current())) { + const ch = this.current() + if (this._isCharacterClassEscape(ch)) { this.advance() return true } + if (this.switchU && this.ecmaVersion >= 9 && (ch === LATIN_CAPITAL_LETTER_P || ch === LATIN_SMALL_LETTER_P)) { + this.advance() + if (this.eat(LEFT_CURLY_BRACKET) && this.eatUnicodePropertyValueExpression() && this.eat(RIGHT_CURLY_BRACKET)) { + return true + } + this.raise("Invalid property name") + } return false } _isCharacterClassEscape(ch) { @@ -814,6 +825,76 @@ export class RegExpValidator { ) } + // UnicodePropertyValueExpression :: + // UnicodePropertyName `=` UnicodePropertyValue + // LoneUnicodePropertyNameOrValue + eatUnicodePropertyValueExpression() { + const start = this.pos + + if (this.eatUnicodePropertyName() && this.eat(EQUALS_SIGN)) { + const name = this.lastStringValue + if (this.eatUnicodePropertyValue()) { + const value = this.lastStringValue + this._validateUnicodePropertyNameAndValue(name, value) + return true + } + } + this.pos = start + + if (this.eatLoneUnicodePropertyNameOrValue()) { + const nameOrValue = this.lastStringValue + this._validateUnicodePropertyNameOrValue(nameOrValue) + return true + } + return false + } + _validateUnicodePropertyNameAndValue(name, value) { + if (!UNICODE_PROPERTY_VALUES.hasOwnProperty(name) || UNICODE_PROPERTY_VALUES[name].indexOf(value) === -1) { + this.raise("Invalid property name") + } + } + _validateUnicodePropertyNameOrValue(nameOrValue) { + if (UNICODE_PROPERTY_VALUES.$LONE.indexOf(nameOrValue) === -1) { + this.raise("Invalid property name") + } + } + + // UnicodePropertyName :: + // UnicodePropertyNameCharacters + eatUnicodePropertyName() { + let ch = 0 + this.lastStringValue = "" + while (this._isUnicodePropertyNameCharacter(ch = this.current())) { + this.lastStringValue += this.codePointToString(ch) + this.advance() + } + return this.lastStringValue !== "" + } + _isUnicodePropertyNameCharacter(ch) { + return this._isControlLetter(ch) || ch === LOW_LINE + } + + // UnicodePropertyValue :: + // UnicodePropertyValueCharacters + eatUnicodePropertyValue() { + let ch = 0 + this.lastStringValue = "" + while (this._isUnicodePropertyValueCharacter(ch = this.current())) { + this.lastStringValue += this.codePointToString(ch) + this.advance() + } + return this.lastStringValue !== "" + } + _isUnicodePropertyValueCharacter(ch) { + return this._isUnicodePropertyNameCharacter(ch) || this._isDecimalDigit(ch) + } + + // LoneUnicodePropertyNameOrValue :: + // UnicodePropertyValueCharacters + eatLoneUnicodePropertyNameOrValue() { + return this.eatUnicodePropertyValue() + } + // https://www.ecma-international.org/ecma-262/8.0/#prod-CharacterClass eatCharacterClass() { if (this.eat(LEFT_SQUARE_BRACKET)) { @@ -1020,6 +1101,8 @@ export class RegExpValidator { case LATIN_CAPITAL_LETTER_S: case LATIN_SMALL_LETTER_W: case LATIN_CAPITAL_LETTER_W: + case LATIN_SMALL_LETTER_P: + case LATIN_CAPITAL_LETTER_P: return -1 // Those are not single character. // CharacterEscape diff --git a/src/unicode-property-data.js b/src/unicode-property-data.js new file mode 100644 index 000000000..d44f1a2c1 --- /dev/null +++ b/src/unicode-property-data.js @@ -0,0 +1,463 @@ +const data = { + "$LONE": [ + "ASCII", + "ASCII_Hex_Digit", + "AHex", + "Alphabetic", + "Alpha", + "Any", + "Assigned", + "Bidi_Control", + "Bidi_C", + "Bidi_Mirrored", + "Bidi_M", + "Case_Ignorable", + "CI", + "Cased", + "Changes_When_Casefolded", + "CWCF", + "Changes_When_Casemapped", + "CWCM", + "Changes_When_Lowercased", + "CWL", + "Changes_When_NFKC_Casefolded", + "CWKCF", + "Changes_When_Titlecased", + "CWT", + "Changes_When_Uppercased", + "CWU", + "Dash", + "Default_Ignorable_Code_Point", + "DI", + "Deprecated", + "Dep", + "Diacritic", + "Dia", + "Emoji", + "Emoji_Component", + "Emoji_Modifier", + "Emoji_Modifier_Base", + "Emoji_Presentation", + "Extender", + "Ext", + "Grapheme_Base", + "Gr_Base", + "Grapheme_Extend", + "Gr_Ext", + "Hex_Digit", + "Hex", + "IDS_Binary_Operator", + "IDSB", + "IDS_Trinary_Operator", + "IDST", + "ID_Continue", + "IDC", + "ID_Start", + "IDS", + "Ideographic", + "Ideo", + "Join_Control", + "Join_C", + "Logical_Order_Exception", + "LOE", + "Lowercase", + "Lower", + "Math", + "Noncharacter_Code_Point", + "NChar", + "Pattern_Syntax", + "Pat_Syn", + "Pattern_White_Space", + "Pat_WS", + "Quotation_Mark", + "QMark", + "Radical", + "Regional_Indicator", + "RI", + "Sentence_Terminal", + "STerm", + "Soft_Dotted", + "SD", + "Terminal_Punctuation", + "Term", + "Unified_Ideograph", + "UIdeo", + "Uppercase", + "Upper", + "Variation_Selector", + "VS", + "White_Space", + "space", + "XID_Continue", + "XIDC", + "XID_Start", + "XIDS" + ], + "General_Category": [ + "Cased_Letter", + "LC", + "Close_Punctuation", + "Pe", + "Connector_Punctuation", + "Pc", + "Control", + "Cc", + "cntrl", + "Currency_Symbol", + "Sc", + "Dash_Punctuation", + "Pd", + "Decimal_Number", + "Nd", + "digit", + "Enclosing_Mark", + "Me", + "Final_Punctuation", + "Pf", + "Format", + "Cf", + "Initial_Punctuation", + "Pi", + "Letter", + "L", + "Letter_Number", + "Nl", + "Line_Separator", + "Zl", + "Lowercase_Letter", + "Ll", + "Mark", + "M", + "Combining_Mark", + "Math_Symbol", + "Sm", + "Modifier_Letter", + "Lm", + "Modifier_Symbol", + "Sk", + "Nonspacing_Mark", + "Mn", + "Number", + "N", + "Open_Punctuation", + "Ps", + "Other", + "C", + "Other_Letter", + "Lo", + "Other_Number", + "No", + "Other_Punctuation", + "Po", + "Other_Symbol", + "So", + "Paragraph_Separator", + "Zp", + "Private_Use", + "Co", + "Punctuation", + "P", + "punct", + "Separator", + "Z", + "Space_Separator", + "Zs", + "Spacing_Mark", + "Mc", + "Surrogate", + "Cs", + "Symbol", + "S", + "Titlecase_Letter", + "Lt", + "Unassigned", + "Cn", + "Uppercase_Letter", + "Lu" + ], + "Script": [ + "Adlam", + "Adlm", + "Ahom", + "Anatolian_Hieroglyphs", + "Hluw", + "Arabic", + "Arab", + "Armenian", + "Armn", + "Avestan", + "Avst", + "Balinese", + "Bali", + "Bamum", + "Bamu", + "Bassa_Vah", + "Bass", + "Batak", + "Batk", + "Bengali", + "Beng", + "Bhaiksuki", + "Bhks", + "Bopomofo", + "Bopo", + "Brahmi", + "Brah", + "Braille", + "Brai", + "Buginese", + "Bugi", + "Buhid", + "Buhd", + "Canadian_Aboriginal", + "Cans", + "Carian", + "Cari", + "Caucasian_Albanian", + "Aghb", + "Chakma", + "Cakm", + "Cham", + "Cherokee", + "Cher", + "Common", + "Zyyy", + "Coptic", + "Copt", + "Qaac", + "Cuneiform", + "Xsux", + "Cypriot", + "Cprt", + "Cyrillic", + "Cyrl", + "Deseret", + "Dsrt", + "Devanagari", + "Deva", + "Duployan", + "Dupl", + "Egyptian_Hieroglyphs", + "Egyp", + "Elbasan", + "Elba", + "Ethiopic", + "Ethi", + "Georgian", + "Geor", + "Glagolitic", + "Glag", + "Gothic", + "Goth", + "Grantha", + "Gran", + "Greek", + "Grek", + "Gujarati", + "Gujr", + "Gurmukhi", + "Guru", + "Han", + "Hani", + "Hangul", + "Hang", + "Hanunoo", + "Hano", + "Hatran", + "Hatr", + "Hebrew", + "Hebr", + "Hiragana", + "Hira", + "Imperial_Aramaic", + "Armi", + "Inherited", + "Zinh", + "Qaai", + "Inscriptional_Pahlavi", + "Phli", + "Inscriptional_Parthian", + "Prti", + "Javanese", + "Java", + "Kaithi", + "Kthi", + "Kannada", + "Knda", + "Katakana", + "Kana", + "Kayah_Li", + "Kali", + "Kharoshthi", + "Khar", + "Khmer", + "Khmr", + "Khojki", + "Khoj", + "Khudawadi", + "Sind", + "Lao", + "Laoo", + "Latin", + "Latn", + "Lepcha", + "Lepc", + "Limbu", + "Limb", + "Linear_A", + "Lina", + "Linear_B", + "Linb", + "Lisu", + "Lycian", + "Lyci", + "Lydian", + "Lydi", + "Mahajani", + "Mahj", + "Malayalam", + "Mlym", + "Mandaic", + "Mand", + "Manichaean", + "Mani", + "Marchen", + "Marc", + "Masaram_Gondi", + "Gonm", + "Meetei_Mayek", + "Mtei", + "Mende_Kikakui", + "Mend", + "Meroitic_Cursive", + "Merc", + "Meroitic_Hieroglyphs", + "Mero", + "Miao", + "Plrd", + "Modi", + "Mongolian", + "Mong", + "Mro", + "Mroo", + "Multani", + "Mult", + "Myanmar", + "Mymr", + "Nabataean", + "Nbat", + "New_Tai_Lue", + "Talu", + "Newa", + "Nko", + "Nkoo", + "Nushu", + "Nshu", + "Ogham", + "Ogam", + "Ol_Chiki", + "Olck", + "Old_Hungarian", + "Hung", + "Old_Italic", + "Ital", + "Old_North_Arabian", + "Narb", + "Old_Permic", + "Perm", + "Old_Persian", + "Xpeo", + "Old_South_Arabian", + "Sarb", + "Old_Turkic", + "Orkh", + "Oriya", + "Orya", + "Osage", + "Osge", + "Osmanya", + "Osma", + "Pahawh_Hmong", + "Hmng", + "Palmyrene", + "Palm", + "Pau_Cin_Hau", + "Pauc", + "Phags_Pa", + "Phag", + "Phoenician", + "Phnx", + "Psalter_Pahlavi", + "Phlp", + "Rejang", + "Rjng", + "Runic", + "Runr", + "Samaritan", + "Samr", + "Saurashtra", + "Saur", + "Sharada", + "Shrd", + "Shavian", + "Shaw", + "Siddham", + "Sidd", + "SignWriting", + "Sgnw", + "Sinhala", + "Sinh", + "Sora_Sompeng", + "Sora", + "Soyombo", + "Soyo", + "Sundanese", + "Sund", + "Syloti_Nagri", + "Sylo", + "Syriac", + "Syrc", + "Tagalog", + "Tglg", + "Tagbanwa", + "Tagb", + "Tai_Le", + "Tale", + "Tai_Tham", + "Lana", + "Tai_Viet", + "Tavt", + "Takri", + "Takr", + "Tamil", + "Taml", + "Tangut", + "Tang", + "Telugu", + "Telu", + "Thaana", + "Thaa", + "Thai", + "Tibetan", + "Tibt", + "Tifinagh", + "Tfng", + "Tirhuta", + "Tirh", + "Ugaritic", + "Ugar", + "Vai", + "Vaii", + "Warang_Citi", + "Wara", + "Yi", + "Yiii", + "Zanabazar_Square", + "Zanb" + ] +} +Array.prototype.push.apply(data.$LONE, data.General_Category) +data.gc = data.General_Category +data.sc = data.Script_Extensions = data.scx = data.Script + +export default data diff --git a/test/tests-regexp-2018.js b/test/tests-regexp-2018.js index 7f9c9b60c..4107d4499 100644 --- a/test/tests-regexp-2018.js +++ b/test/tests-regexp-2018.js @@ -67,3 +67,36 @@ test("/(?<\\u0061\\u0062\\u0063>a)\\k<\\u{61}\\u{62}\\u{63}>/u", {}, { ecmaVersi testFail("/(?<\\u0061\\u0062\\u0063>a)\\k/u", "Invalid regular expression: /(?<\\u0061\\u0062\\u0063>a)\\k/: Invalid named capture referenced (1:1)", { ecmaVersion: 2018 }) testFail("/(?<11>a)\\k<11>/u", "Invalid regular expression: /(?<11>a)\\k<11>/: Invalid capture group name (1:1)", { ecmaVersion: 2018 }) test("/(?a)\\k/u", {}, { ecmaVersion: 2018 }) + +//------------------------------------------------------------------------------ +// Unicode property escapes +//------------------------------------------------------------------------------ + +test("/\\p/", {}, { ecmaVersion: 2017 }) +testFail("/\\p/u", "Invalid regular expression: /\\p/: Invalid escape (1:1)", { ecmaVersion: 2017 }) +test("/\\p/", {}, { ecmaVersion: 2018 }) +testFail("/\\p/u", "Invalid regular expression: /\\p/: Invalid property name (1:1)", { ecmaVersion: 2018 }) +test("/\\p{/", {}, { ecmaVersion: 2017 }) +testFail("/\\p{/u", "Invalid regular expression: /\\p{/: Invalid escape (1:1)", { ecmaVersion: 2017 }) +test("/\\p{/", {}, { ecmaVersion: 2018 }) +testFail("/\\p{/u", "Invalid regular expression: /\\p{/: Invalid property name (1:1)", { ecmaVersion: 2018 }) +test("/\\p{ASCII/", {}, { ecmaVersion: 2017 }) +testFail("/\\p{ASCII/u", "Invalid regular expression: /\\p{ASCII/: Invalid escape (1:1)", { ecmaVersion: 2017 }) +test("/\\p{ASCII/", {}, { ecmaVersion: 2018 }) +testFail("/\\p{ASCII/u", "Invalid regular expression: /\\p{ASCII/: Invalid property name (1:1)", { ecmaVersion: 2018 }) +test("/\\p{ASCII}/", {}, { ecmaVersion: 2017 }) +testFail("/\\p{ASCII}/u", "Invalid regular expression: /\\p{ASCII}/: Invalid escape (1:1)", { ecmaVersion: 2017 }) +test("/\\p{ASCII}/", {}, { ecmaVersion: 2018 }) +test("/\\p{ASCII}/u", {}, { ecmaVersion: 2018 }) + +test("/\\p{Emoji}/u", {}, { ecmaVersion: 2018 }) +testFail("/\\p{General_Category}/u", "Invalid regular expression: /\\p{General_Category}/: Invalid property name (1:1)", { ecmaVersion: 2018 }) +testFail("/\\p{General_Category=}/u", "Invalid regular expression: /\\p{General_Category=}/: Invalid property name (1:1)", { ecmaVersion: 2018 }) +testFail("/\\p{General_Category/u", "Invalid regular expression: /\\p{General_Category/: Invalid property name (1:1)", { ecmaVersion: 2018 }) +testFail("/\\p{General_Category=/u", "Invalid regular expression: /\\p{General_Category=/: Invalid property name (1:1)", { ecmaVersion: 2018 }) +testFail("/\\p{General_Category=Letter/u", "Invalid regular expression: /\\p{General_Category=Letter/: Invalid property name (1:1)", { ecmaVersion: 2018 }) +test("/\\p{General_Category=Letter}/u", {}, { ecmaVersion: 2018 }) +testFail("/\\p{General_Category=Hiragana}/u", "Invalid regular expression: /\\p{General_Category=Hiragana}/: Invalid property name (1:1)", { ecmaVersion: 2018 }) +test("/\\p{Script=Hiragana}/u", {}, { ecmaVersion: 2018 }) +testFail("/[\\p{Script=Hiragana}-\\p{Script=Katakana}]/u", "Invalid regular expression: /[\\p{Script=Hiragana}-\\p{Script=Katakana}]/: Invalid character class (1:1)", { ecmaVersion: 2018 }) +test("/[\\p{Script=Hiragana}\\-\\p{Script=Katakana}]/u", {}, { ecmaVersion: 2018 }) From ff7de099ee48e7b5606ef56050f7751bff2604ec Mon Sep 17 00:00:00 2001 From: Toru Nagashima Date: Tue, 13 Feb 2018 18:55:02 +0900 Subject: [PATCH 07/18] refactor This commit changes the approach validating values. Before, it has used `parseXxx(start, end)` methods after eating production. Now, each `eat` methods make `this.lastIntValue` while parsing, then it uses the `this.lastIntValue` to validate values. --- src/regexp.js | 284 +++++++++++++++++++++----------------------------- 1 file changed, 121 insertions(+), 163 deletions(-) diff --git a/src/regexp.js b/src/regexp.js index 604ae17b7..06de61128 100644 --- a/src/regexp.js +++ b/src/regexp.js @@ -19,7 +19,6 @@ const FULL_STOP = 0x2E // . const SOLIDUS = 0x2F // / const DIGIT_ZERO = 0x30 // 0 const DIGIT_ONE = 0x31 // 1 -const DIGIT_THREE = 0x33 // 3 const DIGIT_SEVEN = 0x37 // 7 const DIGIT_NINE = 0x39 // 9 const COLON = 0x3A // : @@ -76,6 +75,7 @@ export class RegExpValidator { this.switchU = false this.switchN = false this.pos = 0 + this.lastIntValue = 0 this.lastStringValue = "" this.numCapturingParens = 0 this.maxBackReference = 0 @@ -191,24 +191,17 @@ export class RegExpValidator { return false } - parseDecimalInt(start, end) { - return parseInt(this.source.slice(start, end), 10) - } - - parseHexInt(start, end) { - return parseInt(this.source.slice(start, end), 16) - } - - parseOctalInt(start, end) { - return parseInt(this.source.slice(start, end), 8) - } - codePointToString(ch) { if (ch <= 0xFFFF) { return String.fromCharCode(ch) } - ch -= 0x10000 - return String.fromCharCode((ch >> 10) + 0xD800, (ch & 0x03FF) + 0xDC00) + return String.fromCharCode(this._getLeadSurrogate(ch), this._getTrailSurrogate(ch)) + } + _getLeadSurrogate(ch) { + return ((ch - 0x10000) >> 10) + 0xD800 + } + _getTrailSurrogate(ch) { + return ((ch - 0x10000) & 0x03FF) + 0xDC00 } // --------------------------------------------------------------------------- @@ -353,14 +346,11 @@ export class RegExpValidator { _eatBracedQuantifier(noError) { const start = this.pos if (this.eat(LEFT_CURLY_BRACKET)) { - let i = this.pos, min = 0, max = -1 + let min = 0, max = -1 if (this.eatDecimalDigits()) { - min = this.parseDecimalInt(i, this.pos) - if (this.eat(COMMA)) { - i = this.pos - if (this.eatDecimalDigits()) { - max = this.parseDecimalInt(i, this.pos) - } + min = this.lastIntValue + if (this.eat(COMMA) && this.eatDecimalDigits()) { + max = this.lastIntValue } if (this.eat(RIGHT_CURLY_BRACKET)) { // SyntaxError in https://www.ecma-international.org/ecma-262/8.0/#sec-term @@ -453,7 +443,9 @@ export class RegExpValidator { // https://www.ecma-international.org/ecma-262/8.0/#prod-SyntaxCharacter eatSyntaxCharacter() { - if (this._isSyntaxCharacter(this.current())) { + const ch = this.current() + if (this._isSyntaxCharacter(ch)) { + this.lastIntValue = ch this.advance() return true } @@ -560,7 +552,7 @@ export class RegExpValidator { this.advance() if (ch === REVERSE_SOLIDUS && this.eatRegExpUnicodeEscapeSequence()) { - ch = this._parseRegExpUnicodeEscapeSequence(start + 1, this.pos) + ch = this.lastIntValue } if (this._isRegExpIdentifierStart(ch)) { this.lastStringValue += this.codePointToString(ch) @@ -588,7 +580,7 @@ export class RegExpValidator { this.advance() if (ch === REVERSE_SOLIDUS && this.eatRegExpUnicodeEscapeSequence()) { - ch = this._parseRegExpUnicodeEscapeSequence(start + 1, this.pos) + ch = this.lastIntValue } if (this._isRegExpIdentifierPart(ch)) { this.lastStringValue += this.codePointToString(ch) @@ -624,7 +616,7 @@ export class RegExpValidator { _eatBackReference() { const start = this.pos if (this.eatDecimalEscape()) { - const n = this.parseDecimalInt(start, this.pos) + const n = this.lastIntValue if (this.switchU) { // For SyntaxError in https://www.ecma-international.org/ecma-262/8.0/#sec-atomescape if (n > this.maxBackReference) { @@ -674,6 +666,7 @@ export class RegExpValidator { } _eatZero() { if (this.current() === DIGIT_ZERO && !this._isDecimalDigit(this.lookahead())) { + this.lastIntValue = 0 this.advance() return true } @@ -682,25 +675,40 @@ export class RegExpValidator { // https://www.ecma-international.org/ecma-262/8.0/#prod-ControlEscape eatControlEscape() { - if (this._isControlEscape(this.current())) { + const ch = this.current() + if (ch === LATIN_SMALL_LETTER_T) { + this.lastIntValue = CHARACTER_TABULATION + this.advance() + return true + } + if (ch === LATIN_SMALL_LETTER_N) { + this.lastIntValue = LINE_FEED + this.advance() + return true + } + if (ch === LATIN_SMALL_LETTER_V) { + this.lastIntValue = LINE_TABULATION + this.advance() + return true + } + if (ch === LATIN_SMALL_LETTER_F) { + this.lastIntValue = FORM_FEED + this.advance() + return true + } + if (ch === LATIN_SMALL_LETTER_R) { + this.lastIntValue = CARRIAGE_RETURN this.advance() return true } return false } - _isControlEscape(ch) { - return ( - ch === LATIN_SMALL_LETTER_F || - ch === LATIN_SMALL_LETTER_N || - ch === LATIN_SMALL_LETTER_R || - ch === LATIN_SMALL_LETTER_T || - ch === LATIN_SMALL_LETTER_V - ) - } // https://www.ecma-international.org/ecma-262/8.0/#prod-ControlLetter eatControlLetter() { - if (this._isControlLetter(this.current())) { + const ch = this.current() + if (this._isControlLetter(ch)) { + this.lastIntValue = ch % 0x20 this.advance() return true } @@ -719,16 +727,18 @@ export class RegExpValidator { if (this.eat(LATIN_SMALL_LETTER_U)) { if (this._eatFixedHexDigits(4)) { - const code = this.parseHexInt(this.pos - 4, this.pos) - if (this.switchU && code >= 0xD800 && code <= 0xDBFF) { + const lead = this.lastIntValue + if (this.switchU && lead >= 0xD800 && lead <= 0xDBFF) { const leadSurrogateEnd = this.pos if (this.eat(REVERSE_SOLIDUS) && this.eat(LATIN_SMALL_LETTER_U) && this._eatFixedHexDigits(4)) { - const codeT = this.parseHexInt(this.pos - 4, this.pos) - if (codeT >= 0xDC00 && codeT <= 0xDFFF) { + const trail = this.lastIntValue + if (trail >= 0xDC00 && trail <= 0xDFFF) { + this.lastIntValue = (lead - 0xD800) * 0x400 + (trail - 0xDC00) + 0x10000 return true } } this.pos = leadSurrogateEnd + this.lastIntValue = lead } return true } @@ -737,7 +747,7 @@ export class RegExpValidator { this.eat(LEFT_CURLY_BRACKET) && this.eatHexDigits() && this.eat(RIGHT_CURLY_BRACKET) && - this._isValidUnicode(this.parseHexInt(start + 2, this.pos - 1)) + this._isValidUnicode(this.lastIntValue) ) { return true } @@ -752,33 +762,23 @@ export class RegExpValidator { _isValidUnicode(ch) { return ch >= 0 && ch <= 0x10FFFF } - _parseRegExpUnicodeEscapeSequence(start, end) { - start += 1 // skip `u` - if (end - start >= 3 && this.codePointAt(start) === LEFT_CURLY_BRACKET) { - return this.parseHexInt(start + 1, end - 1) - } - if (end - start === 4) { - return this.parseHexInt(start, end) - } - if (end - start === 10) { - const lead = this.parseHexInt(start, start + 4) - const trail = this.parseHexInt(end - 4, end) - return (lead - 0xD800) * 0x400 + (trail - 0xDC00) + 0x10000 - } - return LATIN_SMALL_LETTER_U - } // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-IdentityEscape eatIdentityEscape() { if (this.switchU) { - return ( - this.eatSyntaxCharacter() || - this.eat(SOLIDUS) - ) + if (this.eatSyntaxCharacter()) { + return true + } + if (this.eat(SOLIDUS)) { + this.lastIntValue = SOLIDUS + return true + } + return false } const ch = this.current() if (ch !== LATIN_SMALL_LETTER_C && (!this.switchN || ch !== LATIN_SMALL_LETTER_K)) { + this.lastIntValue = ch this.advance() return true } @@ -788,9 +788,11 @@ export class RegExpValidator { // https://www.ecma-international.org/ecma-262/8.0/#prod-DecimalEscape eatDecimalEscape() { + this.lastIntValue = 0 let ch = this.current() if (ch >= DIGIT_ONE && ch <= DIGIT_NINE) { do { + this.lastIntValue = 10 * this.lastIntValue + (ch - DIGIT_ZERO) this.advance() } while ((ch = this.current()) >= DIGIT_ZERO && ch <= DIGIT_NINE) return true @@ -802,10 +804,12 @@ export class RegExpValidator { eatCharacterClassEscape() { const ch = this.current() if (this._isCharacterClassEscape(ch)) { + this.lastIntValue = -1 this.advance() return true } if (this.switchU && this.ecmaVersion >= 9 && (ch === LATIN_CAPITAL_LETTER_P || ch === LATIN_SMALL_LETTER_P)) { + this.lastIntValue = -1 this.advance() if (this.eat(LEFT_CURLY_BRACKET) && this.eatUnicodePropertyValueExpression() && this.eat(RIGHT_CURLY_BRACKET)) { return true @@ -913,22 +917,16 @@ export class RegExpValidator { // https://www.ecma-international.org/ecma-262/8.0/#prod-NonemptyClassRanges // https://www.ecma-international.org/ecma-262/8.0/#prod-NonemptyClassRangesNoDash classRanges() { - for (; ;) { - const leftStart = this.pos + for (;;) { if (this.eatClassAtom()) { - const leftEnd = this.pos - if (this.eat(HYPHEN_MINUS)) { - const rightStart = this.pos - if (this.eatClassAtom()) { - const rightEnd = this.pos - const left = this._parseClassAtom(leftStart, leftEnd, false) - const right = this._parseClassAtom(rightStart, rightEnd, true) - if (this.switchU && (left === -1 || right === -1)) { - this.raise("Invalid character class") - } - if (left !== -1 && right !== -1 && left > right) { - this.raise("Range out of order in character class") - } + const left = (this.switchU || this.lastIntValue <= 0xFFFF) ? this.lastIntValue : this._getTrailSurrogate(this.lastIntValue) + if (this.eat(HYPHEN_MINUS) && this.eatClassAtom()) { + const right = (this.switchU || this.lastIntValue <= 0xFFFF) ? this.lastIntValue : this._getLeadSurrogate(this.lastIntValue) + if (this.switchU && (left === -1 || right === -1)) { + this.raise("Invalid character class") + } + if (left !== -1 && right !== -1 && left > right) { + this.raise("Range out of order in character class") } } } else { @@ -959,6 +957,7 @@ export class RegExpValidator { const ch = this.current() if (ch !== RIGHT_SQUARE_BRACKET) { + this.lastIntValue = ch this.advance() return true } @@ -968,29 +967,29 @@ export class RegExpValidator { // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-ClassEscape eatClassEscape() { - return ( - this.eat(LATIN_SMALL_LETTER_B) || - (this.switchU && this.eat(HYPHEN_MINUS)) || - (!this.switchU && this._eatCClassControlLetter()) || - this.eatCharacterClassEscape() || - this.eatCharacterEscape() - ) - } - _eatCClassControlLetter() { const start = this.pos - if (this.eat(LATIN_SMALL_LETTER_C)) { + if (this.eat(LATIN_SMALL_LETTER_B)) { + this.lastIntValue = BACKSPACE + return true + } + if (this.switchU && this.eat(HYPHEN_MINUS)) { + this.lastIntValue = HYPHEN_MINUS + return true + } + if (!this.switchU && this.eat(LATIN_SMALL_LETTER_C)) { if (this.eatClassControlLetter()) { return true } this.pos = start } - return false + return this.eatCharacterClassEscape() || this.eatCharacterEscape() } // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-ClassControlLetter eatClassControlLetter() { const ch = this.current() if (this._isDecimalDigit(ch) || ch === LOW_LINE) { + this.lastIntValue = ch % 0x20 this.advance() return true } @@ -1000,7 +999,6 @@ export class RegExpValidator { // https://www.ecma-international.org/ecma-262/8.0/#prod-HexEscapeSequence eatHexEscapeSequence() { const start = this.pos - if (this.eat(LATIN_SMALL_LETTER_X)) { if (this._eatFixedHexDigits(2)) { return true @@ -1010,14 +1008,16 @@ export class RegExpValidator { } this.pos = start } - return false } // https://www.ecma-international.org/ecma-262/8.0/#prod-DecimalDigits eatDecimalDigits() { const start = this.pos - while (this._isDecimalDigit(this.current())) { + let ch = 0 + this.lastIntValue = 0 + while (this._isDecimalDigit(ch = this.current())) { + this.lastIntValue = 10 * this.lastIntValue + (ch - DIGIT_ZERO) this.advance() } return this.pos !== start @@ -1029,7 +1029,10 @@ export class RegExpValidator { // https://www.ecma-international.org/ecma-262/8.0/#prod-HexDigits eatHexDigits() { const start = this.pos - while (this._isHexDigit(this.current())) { + let ch = 0 + this.lastIntValue = 0 + while (this._isHexDigit(ch = this.current())) { + this.lastIntValue = 16 * this.lastIntValue + this._hexToInt(ch) this.advance() } return this.pos !== start @@ -1041,14 +1044,30 @@ export class RegExpValidator { (ch >= LATIN_SMALL_LETTER_A && ch <= LATIN_SMALL_LETTER_F) ) } + _hexToInt(ch) { + if (ch >= LATIN_CAPITAL_LETTER_A && ch <= LATIN_CAPITAL_LETTER_F) { + return 10 + (ch - LATIN_CAPITAL_LETTER_A) + } + if (ch >= LATIN_SMALL_LETTER_A && ch <= LATIN_SMALL_LETTER_F) { + return 10 + (ch - LATIN_SMALL_LETTER_A) + } + return ch - DIGIT_ZERO + } // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-LegacyOctalEscapeSequence // Allows only 0-377(octal) i.e. 0-255(decimal). eatLegacyOctalEscapeSequence() { - const ch = this.current() if (this.eatOctalDigit()) { - if (this.eatOctalDigit() && ch <= DIGIT_THREE) { - this.eatOctalDigit() + const n1 = this.lastIntValue + if (this.eatOctalDigit()) { + const n2 = this.lastIntValue + if (n1 <= 3 && this.eatOctalDigit()) { + this.lastIntValue = n1 * 64 + n2 * 8 + this.lastIntValue + } else { + this.lastIntValue = n1 * 8 + n2 + } + } else { + this.lastIntValue = n1 } return true } @@ -1057,10 +1076,13 @@ export class RegExpValidator { // https://www.ecma-international.org/ecma-262/8.0/#prod-OctalDigit eatOctalDigit() { - if (this._isOctalDigit(this.current())) { + const ch = this.current() + if (this._isOctalDigit(ch)) { + this.lastIntValue = ch - DIGIT_ZERO this.advance() return true } + this.lastIntValue = 0 return false } _isOctalDigit(ch) { @@ -1072,80 +1094,16 @@ export class RegExpValidator { // And HexDigit HexDigit in https://www.ecma-international.org/ecma-262/8.0/#prod-HexEscapeSequence _eatFixedHexDigits(length) { const start = this.pos + this.lastIntValue = 0 for (let i = 0; i < length; ++i) { - if (!this._isHexDigit(this.current())) { + const ch = this.current() + if (!this._isHexDigit(ch)) { this.pos = start return false } + this.lastIntValue = 16 * this.lastIntValue + this._hexToInt(ch) this.advance() } return true } - - // https://www.ecma-international.org/ecma-262/8.0/#sec-classatom - // https://www.ecma-international.org/ecma-262/8.0/#sec-classatomnodash - // https://www.ecma-international.org/ecma-262/8.0/#sec-classescape - // Get the value of chracters to validate class ranges (e.g., [a-z]). - _parseClassAtom(start, end, isRight) { - const ch1 = this._getOneElementCharSetAt(start, isRight) - if (ch1 === REVERSE_SOLIDUS) { - const ch2 = this._getOneElementCharSetAt(start + 1, isRight) - switch (ch2) { - case LATIN_SMALL_LETTER_B: - return BACKSPACE - - // CharacterClassEscape - case LATIN_SMALL_LETTER_D: - case LATIN_CAPITAL_LETTER_D: - case LATIN_SMALL_LETTER_S: - case LATIN_CAPITAL_LETTER_S: - case LATIN_SMALL_LETTER_W: - case LATIN_CAPITAL_LETTER_W: - case LATIN_SMALL_LETTER_P: - case LATIN_CAPITAL_LETTER_P: - return -1 // Those are not single character. - - // CharacterEscape - case LATIN_SMALL_LETTER_T: - return CHARACTER_TABULATION - case LATIN_SMALL_LETTER_N: - return LINE_FEED - case LATIN_SMALL_LETTER_V: - return LINE_TABULATION - case LATIN_SMALL_LETTER_F: - return FORM_FEED - case LATIN_SMALL_LETTER_R: - return CARRIAGE_RETURN - case LATIN_SMALL_LETTER_C: - if (end - start === 3) { - return this.codePointAt(start + 2) % 32 - } - return LATIN_SMALL_LETTER_C - case LATIN_SMALL_LETTER_X: - if (end - start === 4) { - return this.parseHexInt(start + 2, end) - } - return LATIN_SMALL_LETTER_X - case LATIN_SMALL_LETTER_U: - return this._parseRegExpUnicodeEscapeSequence(start + 1, end) - default: - if (!this.switchU && ch2 >= DIGIT_ZERO && ch2 <= DIGIT_SEVEN) { - return this.parseOctalInt(start + 1, end) - } - return ch2 - } - } - return ch1 - } - // https://www.ecma-international.org/ecma-262/8.0/#sec-notation - _getOneElementCharSetAt(i, isRight) { - const ch = this.codePointAt(i) - if (this.switchU || ch <= 0xFFFF) { - return ch - } - // This is a surrogate pair and no `u` flag, so returns a code point. - // If the right of `-` then returns the lead surrogate. - // If the left of `-` then returns the trail surrogate. - return this.source.charCodeAt(isRight ? i : i + 1) - } } From eb752089aa8f80339b0bb3f89d674ef58a1abef8 Mon Sep 17 00:00:00 2001 From: Toru Nagashima Date: Tue, 13 Feb 2018 20:03:32 +0900 Subject: [PATCH 08/18] add RegExp lookbehind assertions --- bin/run_test262.js | 3 +-- src/regexp.js | 38 +++++++++++++++---------------- test/tests-regexp-2018.js | 48 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 68 insertions(+), 21 deletions(-) diff --git a/bin/run_test262.js b/bin/run_test262.js index 7ab5153af..2fda57893 100644 --- a/bin/run_test262.js +++ b/bin/run_test262.js @@ -8,8 +8,7 @@ const unsupportedFeatures = [ "class-fields", "class-fields-private", "class-fields-public", - "optional-catch-binding", - "regexp-lookbehind" + "optional-catch-binding" ]; run( diff --git a/src/regexp.js b/src/regexp.js index 06de61128..ca0c13c20 100644 --- a/src/regexp.js +++ b/src/regexp.js @@ -77,6 +77,7 @@ export class RegExpValidator { this.pos = 0 this.lastIntValue = 0 this.lastStringValue = "" + this.lastAssertionIsQuantifiable = false this.numCapturingParens = 0 this.maxBackReference = 0 this.groupNames = [] @@ -211,6 +212,9 @@ export class RegExpValidator { // https://www.ecma-international.org/ecma-262/8.0/#prod-Pattern pattern() { this.pos = 0 + this.lastIntValue = 0 + this.lastStringValue = "" + this.lastAssertionIsQuantifiable = false this.numCapturingParens = 0 this.maxBackReference = 0 this.groupNames.length = 0 @@ -261,20 +265,16 @@ export class RegExpValidator { // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-Term eatTerm() { - const start = this.pos - - if (this.eatQuantifiableAssertion()) { - if (this.eatQuantifier()) { + if (this.eatAssertion()) { + // Handle `QuantifiableAssertion Quantifier` alternative. + // `this.lastAssertionIsQuantifiable` is true if the last eaten Assertion + // is a QuantifiableAssertion. + if (this.lastAssertionIsQuantifiable && this.eatQuantifier()) { // Make the same message as V8. if (this.switchU) { this.raise("Invalid quantifier") } - return true } - this.pos = start - } - - if (this.eatAssertion()) { return true } @@ -288,11 +288,12 @@ export class RegExpValidator { // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-Assertion eatAssertion() { + this.lastAssertionIsQuantifiable = false return ( this.eat(CIRCUMFLEX_ACCENT) || this.eat(DOLLAR_SIGN) || this._eatWordBoundary() || - this._eatLookaheadAssertion() + this._eatLookaheadOrLookbehindAssertion() ) } _eatWordBoundary() { @@ -305,26 +306,25 @@ export class RegExpValidator { } return false } - _eatLookaheadAssertion() { + _eatLookaheadOrLookbehindAssertion() { const start = this.pos - if (this.eat(LEFT_PARENTHESIS)) { - if (this.eat(QUESTION_MARK) && (this.eat(EQUALS_SIGN) || this.eat(EXCLAMATION_MARK))) { + if (this.eat(LEFT_PARENTHESIS) && this.eat(QUESTION_MARK)) { + if (this.ecmaVersion >= 9) { + this.eat(LESS_THAN_SIGN) + } + if (this.eat(EQUALS_SIGN) || this.eat(EXCLAMATION_MARK)) { this.disjunction() if (!this.eat(RIGHT_PARENTHESIS)) { this.raise("Unterminated group") } + this.lastAssertionIsQuantifiable = true return true } - this.pos = start } + this.pos = start return false } - // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-QuantifiableAssertion - eatQuantifiableAssertion() { - return this._eatLookaheadAssertion() - } - // https://www.ecma-international.org/ecma-262/8.0/#prod-Quantifier eatQuantifier(noError = false) { if (this.eatQuantifierPrefix(noError)) { diff --git a/test/tests-regexp-2018.js b/test/tests-regexp-2018.js index 4107d4499..1add3a75e 100644 --- a/test/tests-regexp-2018.js +++ b/test/tests-regexp-2018.js @@ -100,3 +100,51 @@ testFail("/\\p{General_Category=Hiragana}/u", "Invalid regular expression: /\\p{ test("/\\p{Script=Hiragana}/u", {}, { ecmaVersion: 2018 }) testFail("/[\\p{Script=Hiragana}-\\p{Script=Katakana}]/u", "Invalid regular expression: /[\\p{Script=Hiragana}-\\p{Script=Katakana}]/: Invalid character class (1:1)", { ecmaVersion: 2018 }) test("/[\\p{Script=Hiragana}\\-\\p{Script=Katakana}]/u", {}, { ecmaVersion: 2018 }) + +//------------------------------------------------------------------------------ +// Lookbehind assertions +//------------------------------------------------------------------------------ + +testFail("/(?\\w){3})f/u", {}, { ecmaVersion: 2018 }) +test("/((?<=\\w{3}))f/u", {}, { ecmaVersion: 2018 }) +test("/(?(?<=\\w{3}))f/u", {}, { ecmaVersion: 2018 }) +test("/(?\\d){3})f/u", {}, { ecmaVersion: 2018 }) +test("/(?\\D){3})f|f/u", {}, { ecmaVersion: 2018 }) +test("/(?(?\\w){3})f/", {}, { ecmaVersion: 2018 }) +test("/((?<=\\w{3}))f/", {}, { ecmaVersion: 2018 }) +test("/(?(?<=\\w{3}))f/", {}, { ecmaVersion: 2018 }) +test("/(?\\d){3})f/", {}, { ecmaVersion: 2018 }) +test("/(?(?.)|(?.))/u", {}, { ecmaVersion: 2018 }) From 4164a1f491859e86f7dab75c427b99ff124de6cc Mon Sep 17 00:00:00 2001 From: Toru Nagashima Date: Tue, 13 Feb 2018 22:10:40 +0900 Subject: [PATCH 09/18] refactor: move methods to Parser.prototype --- src/regexp.js | 1668 ++++++++++++++++++++++++----------------------- src/state.js | 4 +- src/tokenize.js | 9 +- 3 files changed, 850 insertions(+), 831 deletions(-) diff --git a/src/regexp.js b/src/regexp.js index ca0c13c20..acc6a853d 100644 --- a/src/regexp.js +++ b/src/regexp.js @@ -1,6 +1,9 @@ import {isIdentifierStart, isIdentifierChar} from "./identifier.js" +import {Parser} from "./state.js" import UNICODE_PROPERTY_VALUES from "./unicode-property-data.js" +/* eslint no-invalid-this: error */ + const BACKSPACE = 0x08 const CHARACTER_TABULATION = 0x09 const LINE_FEED = 0x0A @@ -61,16 +64,14 @@ const RIGHT_CURLY_BRACKET = 0x7D // } const ZERO_WIDTH_NON_JOINER = 0x200C const ZERO_WIDTH_JOINER = 0x200D -export class RegExpValidator { - /** - * Initialize this validator. - * @param {Parser} parser The parser. - */ +const pp = Parser.prototype + +export class RegExpValidationState { constructor(parser) { this.parser = parser - this.ecmaVersion = parser.options.ecmaVersion - this.validFlags = `gim${this.ecmaVersion >= 6 ? "uy" : ""}${this.ecmaVersion >= 9 ? "s" : ""}` + this.validFlags = `gim${parser.options.ecmaVersion >= 6 ? "uy" : ""}${parser.options.ecmaVersion >= 9 ? "s" : ""}` this.source = "" + this.flags = "" this.start = 0 this.switchU = false this.switchN = false @@ -84,63 +85,15 @@ export class RegExpValidator { this.backReferenceNames = [] } - // --------------------------------------------------------------------------- - // Public - // --------------------------------------------------------------------------- - - /** - * Validate the flags part of a given RegExpLiteral. - * - * @param {number} start The index of the start location of the RegExp literal. - * @param {string} flags The flags part of the RegExpLiteral. - * @returns {void} - */ - validateFlags(start, flags) { - const validFlags = this.validFlags - for (let i = 0; i < flags.length; i++) { - const flag = flags.charAt(i) - if (validFlags.indexOf(flag) == -1) { - this.parser.raise(start, "Invalid regular expression flag") - } - if (flags.indexOf(flag, i + 1) > -1) { - this.parser.raise(start, "Duplicate regular expression flag") - } - } - } - - /** - * Validate the pattern part of a given RegExpLiteral. - * - * This is syntax: - * https://www.ecma-international.org/ecma-262/8.0/#sec-regular-expressions-patterns - * - * @param {number} start The index of the start location of the RegExp literal. - * @param {string} pattern The pattern part of the RegExpLiteral. - * @param {boolean} unicode `true` if the RegExp has `u` flag. - * @returns {void} - */ - validatePattern(start, pattern, unicode) { + reset(start, pattern, flags) { + const unicode = flags.indexOf("u") !== -1 this.start = start | 0 this.source = pattern + "" - this.switchU = !!unicode && this.ecmaVersion >= 6 - this.switchN = !!unicode && this.ecmaVersion >= 9 - this.pattern() - - // The goal symbol for the parse is |Pattern[~U, ~N]|. If the result of - // parsing contains a |GroupName|, reparse with the goal symbol - // |Pattern[~U, +N]| and use this result instead. Throw a *SyntaxError* - // exception if _P_ did not conform to the grammar, if any elements of _P_ - // were not matched by the parse, or if any Early Error conditions exist. - if (!this.switchN && this.ecmaVersion >= 9 && this.groupNames.length > 0) { - this.switchN = true - this.pattern() - } + this.flags = flags + this.switchU = unicode && this.parser.options.ecmaVersion >= 6 + this.switchN = unicode && this.parser.options.ecmaVersion >= 9 } - // --------------------------------------------------------------------------- - // Helpers - // --------------------------------------------------------------------------- - raise(message) { this.parser.raise(this.start, `Invalid regular expression: /${this.source}/: ${message}`) } @@ -191,919 +144,984 @@ export class RegExpValidator { } return false } +} - codePointToString(ch) { - if (ch <= 0xFFFF) { - return String.fromCharCode(ch) - } - return String.fromCharCode(this._getLeadSurrogate(ch), this._getTrailSurrogate(ch)) +function codePointToString(ch) { + if (ch <= 0xFFFF) { + return String.fromCharCode(ch) } - _getLeadSurrogate(ch) { - return ((ch - 0x10000) >> 10) + 0xD800 + return String.fromCharCode(getLeadSurrogate(ch), getTrailSurrogate(ch)) +} +function getLeadSurrogate(ch) { + return ((ch - 0x10000) >> 10) + 0xD800 +} +function getTrailSurrogate(ch) { + return ((ch - 0x10000) & 0x03FF) + 0xDC00 +} + +/** + * Validate the flags part of a given RegExpLiteral. + * + * @param {RegExpValidationState} state The state to validate RegExp. + * @returns {void} + */ +pp.validateRegExpFlags = function(state) { + const validFlags = state.validFlags + const flags = state.flags + + for (let i = 0; i < flags.length; i++) { + const flag = flags.charAt(i) + if (validFlags.indexOf(flag) == -1) { + this.raise(state.start, "Invalid regular expression flag") + } + if (flags.indexOf(flag, i + 1) > -1) { + this.raise(state.start, "Duplicate regular expression flag") + } } - _getTrailSurrogate(ch) { - return ((ch - 0x10000) & 0x03FF) + 0xDC00 +} + +/** + * Validate the pattern part of a given RegExpLiteral. + * + * @param {RegExpValidationState} state The state to validate RegExp. + * @returns {void} + */ +pp.validateRegExpPattern = function(state) { + this.validateRegExp_pattern(state) + + // The goal symbol for the parse is |Pattern[~U, ~N]|. If the result of + // parsing contains a |GroupName|, reparse with the goal symbol + // |Pattern[~U, +N]| and use this result instead. Throw a *SyntaxError* + // exception if _P_ did not conform to the grammar, if any elements of _P_ + // were not matched by the parse, or if any Early Error conditions exist. + if (!state.switchN && this.options.ecmaVersion >= 9 && state.groupNames.length > 0) { + state.switchN = true + this.validateRegExp_pattern(state) } +} - // --------------------------------------------------------------------------- - // Productions - // --------------------------------------------------------------------------- +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- - // https://www.ecma-international.org/ecma-262/8.0/#prod-Pattern - pattern() { - this.pos = 0 - this.lastIntValue = 0 - this.lastStringValue = "" - this.lastAssertionIsQuantifiable = false - this.numCapturingParens = 0 - this.maxBackReference = 0 - this.groupNames.length = 0 - this.backReferenceNames.length = 0 +// --------------------------------------------------------------------------- +// Productions +// --------------------------------------------------------------------------- - this.disjunction() +// https://www.ecma-international.org/ecma-262/8.0/#prod-Pattern +pp.validateRegExp_pattern = function(state) { + state.pos = 0 + state.lastIntValue = 0 + state.lastStringValue = "" + state.lastAssertionIsQuantifiable = false + state.numCapturingParens = 0 + state.maxBackReference = 0 + state.groupNames.length = 0 + state.backReferenceNames.length = 0 - if (this.pos !== this.source.length) { - // Make the same messages as V8. - if (this.eat(RIGHT_PARENTHESIS)) { - this.raise("Unmatched ')'") - } - if (this.eat(RIGHT_SQUARE_BRACKET) || this.eat(RIGHT_CURLY_BRACKET)) { - this.raise("Lone quantifier brackets") - } - } - if (this.maxBackReference > this.numCapturingParens) { - this.raise("Invalid escape") + this.validateRegExp_disjunction(state) + + if (state.pos !== state.source.length) { + // Make the same messages as V8. + if (state.eat(RIGHT_PARENTHESIS)) { + state.raise("Unmatched ')'") } - for (const name of this.backReferenceNames) { - if (this.groupNames.indexOf(name) === -1) { - this.raise("Invalid named capture referenced") - } + if (state.eat(RIGHT_SQUARE_BRACKET) || state.eat(RIGHT_CURLY_BRACKET)) { + state.raise("Lone quantifier brackets") } } - - // https://www.ecma-international.org/ecma-262/8.0/#prod-Disjunction - disjunction() { - this.alternative() - while (this.eat(VERTICAL_LINE)) { - this.alternative() + if (state.maxBackReference > state.numCapturingParens) { + state.raise("Invalid escape") + } + for (const name of state.backReferenceNames) { + if (state.groupNames.indexOf(name) === -1) { + state.raise("Invalid named capture referenced") } + } +} - // Make the same message as V8. - if (this.eatQuantifier(true)) { - this.raise("Nothing to repeat") - } - if (this.eat(LEFT_CURLY_BRACKET)) { - this.raise("Lone quantifier brackets") - } +// https://www.ecma-international.org/ecma-262/8.0/#prod-Disjunction +pp.validateRegExp_disjunction = function(state) { + this.validateRegExp_alternative(state) + while (state.eat(VERTICAL_LINE)) { + this.validateRegExp_alternative(state) } - // https://www.ecma-international.org/ecma-262/8.0/#prod-Alternative - alternative() { - while (this.pos < this.source.length && this.eatTerm()) - ; + // Make the same message as V8. + if (this.validateRegExp_eatQuantifier(state, true)) { + state.raise("Nothing to repeat") } + if (state.eat(LEFT_CURLY_BRACKET)) { + state.raise("Lone quantifier brackets") + } +} - // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-Term - eatTerm() { - if (this.eatAssertion()) { - // Handle `QuantifiableAssertion Quantifier` alternative. - // `this.lastAssertionIsQuantifiable` is true if the last eaten Assertion - // is a QuantifiableAssertion. - if (this.lastAssertionIsQuantifiable && this.eatQuantifier()) { - // Make the same message as V8. - if (this.switchU) { - this.raise("Invalid quantifier") - } +// https://www.ecma-international.org/ecma-262/8.0/#prod-Alternative +pp.validateRegExp_alternative = function(state) { + while (state.pos < state.source.length && this.validateRegExp_eatTerm(state)) + ; +} + +// https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-Term +pp.validateRegExp_eatTerm = function(state) { + if (this.validateRegExp_eatAssertion(state)) { + // Handle `QuantifiableAssertion Quantifier` alternative. + // `state.lastAssertionIsQuantifiable` is true if the last eaten Assertion + // is a QuantifiableAssertion. + if (state.lastAssertionIsQuantifiable && this.validateRegExp_eatQuantifier(state)) { + // Make the same message as V8. + if (state.switchU) { + state.raise("Invalid quantifier") } - return true } + return true + } - if (this.switchU ? this.eatAtom() : this.eatExtendedAtom()) { - this.eatQuantifier() - return true - } + if (state.switchU ? this.validateRegExp_eatAtom(state) : this.validateRegExp_eatExtendedAtom(state)) { + this.validateRegExp_eatQuantifier(state) + return true + } - return false + return false +} + +// https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-Assertion +pp.validateRegExp_eatAssertion = function(state) { + const start = state.pos + state.lastAssertionIsQuantifiable = false + + // ^, $ + if (state.eat(CIRCUMFLEX_ACCENT) || state.eat(DOLLAR_SIGN)) { + return true } - // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-Assertion - eatAssertion() { - this.lastAssertionIsQuantifiable = false - return ( - this.eat(CIRCUMFLEX_ACCENT) || - this.eat(DOLLAR_SIGN) || - this._eatWordBoundary() || - this._eatLookaheadOrLookbehindAssertion() - ) - } - _eatWordBoundary() { - const start = this.pos - if (this.eat(REVERSE_SOLIDUS)) { - if (this.eat(LATIN_CAPITAL_LETTER_B) || this.eat(LATIN_SMALL_LETTER_B)) { - return true - } - this.pos = start + // \b \B + if (state.eat(REVERSE_SOLIDUS)) { + if (state.eat(LATIN_CAPITAL_LETTER_B) || state.eat(LATIN_SMALL_LETTER_B)) { + return true } - return false + state.pos = start } - _eatLookaheadOrLookbehindAssertion() { - const start = this.pos - if (this.eat(LEFT_PARENTHESIS) && this.eat(QUESTION_MARK)) { - if (this.ecmaVersion >= 9) { - this.eat(LESS_THAN_SIGN) - } - if (this.eat(EQUALS_SIGN) || this.eat(EXCLAMATION_MARK)) { - this.disjunction() - if (!this.eat(RIGHT_PARENTHESIS)) { - this.raise("Unterminated group") - } - this.lastAssertionIsQuantifiable = true - return true + + // Lookahead / Lookbehind + if (state.eat(LEFT_PARENTHESIS) && state.eat(QUESTION_MARK)) { + if (this.options.ecmaVersion >= 9) { + state.eat(LESS_THAN_SIGN) + } + if (state.eat(EQUALS_SIGN) || state.eat(EXCLAMATION_MARK)) { + this.validateRegExp_disjunction(state) + if (!state.eat(RIGHT_PARENTHESIS)) { + state.raise("Unterminated group") } + state.lastAssertionIsQuantifiable = true + return true } - this.pos = start - return false } - // https://www.ecma-international.org/ecma-262/8.0/#prod-Quantifier - eatQuantifier(noError = false) { - if (this.eatQuantifierPrefix(noError)) { - this.eat(QUESTION_MARK) - return true - } - return false + state.pos = start + return false +} + +// https://www.ecma-international.org/ecma-262/8.0/#prod-Quantifier +pp.validateRegExp_eatQuantifier = function(state, noError = false) { + if (this.validateRegExp_eatQuantifierPrefix(state, noError)) { + state.eat(QUESTION_MARK) + return true } + return false +} - // https://www.ecma-international.org/ecma-262/8.0/#prod-QuantifierPrefix - eatQuantifierPrefix(noError) { - return ( - this.eat(ASTERISK) || - this.eat(PLUS_SIGN) || - this.eat(QUESTION_MARK) || - this._eatBracedQuantifier(noError) - ) - } - _eatBracedQuantifier(noError) { - const start = this.pos - if (this.eat(LEFT_CURLY_BRACKET)) { - let min = 0, max = -1 - if (this.eatDecimalDigits()) { - min = this.lastIntValue - if (this.eat(COMMA) && this.eatDecimalDigits()) { - max = this.lastIntValue - } - if (this.eat(RIGHT_CURLY_BRACKET)) { - // SyntaxError in https://www.ecma-international.org/ecma-262/8.0/#sec-term - if (max !== -1 && max < min && !noError) { - this.raise("numbers out of order in {} quantifier") - } - return true - } +// https://www.ecma-international.org/ecma-262/8.0/#prod-QuantifierPrefix +pp.validateRegExp_eatQuantifierPrefix = function(state, noError) { + return ( + state.eat(ASTERISK) || + state.eat(PLUS_SIGN) || + state.eat(QUESTION_MARK) || + this.validateRegExp_eatBracedQuantifier(state, noError) + ) +} +pp.validateRegExp_eatBracedQuantifier = function(state, noError) { + const start = state.pos + if (state.eat(LEFT_CURLY_BRACKET)) { + let min = 0, max = -1 + if (this.validateRegExp_eatDecimalDigits(state)) { + min = state.lastIntValue + if (state.eat(COMMA) && this.validateRegExp_eatDecimalDigits(state)) { + max = state.lastIntValue } - if (this.switchU && !noError) { - this.raise("Incomplete quantifier") + if (state.eat(RIGHT_CURLY_BRACKET)) { + // SyntaxError in https://www.ecma-international.org/ecma-262/8.0/#sec-term + if (max !== -1 && max < min && !noError) { + state.raise("numbers out of order in {} quantifier") + } + return true } - this.pos = start } - return false + if (state.switchU && !noError) { + state.raise("Incomplete quantifier") + } + state.pos = start } + return false +} - // https://www.ecma-international.org/ecma-262/8.0/#prod-Atom - eatAtom() { - return ( - this.eatPatternCharacters() || - this.eat(FULL_STOP) || - this._eatReverseSolidusAtomEscape() || - this.eatCharacterClass() || - this._eatUncapturingGroup() || - this._eatCapturingGroup() - ) - } - _eatReverseSolidusAtomEscape() { - const start = this.pos - if (this.eat(REVERSE_SOLIDUS)) { - if (this.eatAtomEscape()) { - return true - } - this.pos = start +// https://www.ecma-international.org/ecma-262/8.0/#prod-Atom +pp.validateRegExp_eatAtom = function(state) { + return ( + this.validateRegExp_eatPatternCharacters(state) || + state.eat(FULL_STOP) || + this.validateRegExp_eatReverseSolidusAtomEscape(state) || + this.validateRegExp_eatCharacterClass(state) || + this.validateRegExp_eatUncapturingGroup(state) || + this.validateRegExp_eatCapturingGroup(state) + ) +} +pp.validateRegExp_eatReverseSolidusAtomEscape = function(state) { + const start = state.pos + if (state.eat(REVERSE_SOLIDUS)) { + if (this.validateRegExp_eatAtomEscape(state)) { + return true } - return false + state.pos = start } - _eatUncapturingGroup() { - const start = this.pos - if (this.eat(LEFT_PARENTHESIS)) { - if (this.eat(QUESTION_MARK) && this.eat(COLON)) { - this.disjunction() - if (this.eat(RIGHT_PARENTHESIS)) { - return true - } - this.raise("Unterminated group") + return false +} +pp.validateRegExp_eatUncapturingGroup = function(state) { + const start = state.pos + if (state.eat(LEFT_PARENTHESIS)) { + if (state.eat(QUESTION_MARK) && state.eat(COLON)) { + this.validateRegExp_disjunction(state) + if (state.eat(RIGHT_PARENTHESIS)) { + return true } - this.pos = start + state.raise("Unterminated group") } - return false + state.pos = start } - _eatCapturingGroup() { - if (this.eat(LEFT_PARENTHESIS)) { - if (this.ecmaVersion >= 9) { - this.groupSpecifier() - } else if (this.current() === QUESTION_MARK) { - this.raise("Invalid group") - } - this.disjunction() - if (this.eat(RIGHT_PARENTHESIS)) { - this.numCapturingParens += 1 - return true - } - this.raise("Unterminated group") + return false +} +pp.validateRegExp_eatCapturingGroup = function(state) { + if (state.eat(LEFT_PARENTHESIS)) { + if (this.options.ecmaVersion >= 9) { + this.validateRegExp_groupSpecifier(state) + } else if (state.current() === QUESTION_MARK) { + state.raise("Invalid group") + } + this.validateRegExp_disjunction(state) + if (state.eat(RIGHT_PARENTHESIS)) { + state.numCapturingParens += 1 + return true } - return false + state.raise("Unterminated group") } + return false +} - // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-ExtendedAtom - eatExtendedAtom() { - return ( - this.eat(FULL_STOP) || - this._eatReverseSolidusAtomEscape() || - this.eatCharacterClass() || - this._eatUncapturingGroup() || - this._eatCapturingGroup() || - this.eatInvalidBracedQuantifier() || - this.eatExtendedPatternCharacter() - ) - } +// https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-ExtendedAtom +pp.validateRegExp_eatExtendedAtom = function(state) { + return ( + state.eat(FULL_STOP) || + this.validateRegExp_eatReverseSolidusAtomEscape(state) || + this.validateRegExp_eatCharacterClass(state) || + this.validateRegExp_eatUncapturingGroup(state) || + this.validateRegExp_eatCapturingGroup(state) || + this.validateRegExp_eatInvalidBracedQuantifier(state) || + this.validateRegExp_eatExtendedPatternCharacter(state) + ) +} - // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-InvalidBracedQuantifier - eatInvalidBracedQuantifier() { - if (this._eatBracedQuantifier(true)) { - this.raise("Nothing to repeat") - } - return false +// https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-InvalidBracedQuantifier +pp.validateRegExp_eatInvalidBracedQuantifier = function(state) { + if (this.validateRegExp_eatBracedQuantifier(state, true)) { + state.raise("Nothing to repeat") } + return false +} - // https://www.ecma-international.org/ecma-262/8.0/#prod-SyntaxCharacter - eatSyntaxCharacter() { - const ch = this.current() - if (this._isSyntaxCharacter(ch)) { - this.lastIntValue = ch - this.advance() - return true - } - return false +// https://www.ecma-international.org/ecma-262/8.0/#prod-SyntaxCharacter +pp.validateRegExp_eatSyntaxCharacter = function(state) { + const ch = state.current() + if (isSyntaxCharacter(ch)) { + state.lastIntValue = ch + state.advance() + return true } - _isSyntaxCharacter(ch) { - return ( - ch === CIRCUMFLEX_ACCENT || - ch === DOLLAR_SIGN || - ch === REVERSE_SOLIDUS || - ch === FULL_STOP || - ch === ASTERISK || - ch === PLUS_SIGN || - ch === QUESTION_MARK || - ch === LEFT_PARENTHESIS || - ch === RIGHT_PARENTHESIS || - ch === LEFT_SQUARE_BRACKET || - ch === RIGHT_SQUARE_BRACKET || - ch === LEFT_CURLY_BRACKET || - ch === RIGHT_CURLY_BRACKET || - ch === VERTICAL_LINE - ) - } - - // https://www.ecma-international.org/ecma-262/8.0/#prod-PatternCharacter - // But eat eager. - eatPatternCharacters() { - const start = this.pos - let ch = 0 - while ((ch = this.current()) !== -1 && !this._isSyntaxCharacter(ch)) { - this.advance() - } - return this.pos !== start + return false +} +function isSyntaxCharacter(ch) { + return ( + ch === CIRCUMFLEX_ACCENT || + ch === DOLLAR_SIGN || + ch === REVERSE_SOLIDUS || + ch === FULL_STOP || + ch === ASTERISK || + ch === PLUS_SIGN || + ch === QUESTION_MARK || + ch === LEFT_PARENTHESIS || + ch === RIGHT_PARENTHESIS || + ch === LEFT_SQUARE_BRACKET || + ch === RIGHT_SQUARE_BRACKET || + ch === LEFT_CURLY_BRACKET || + ch === RIGHT_CURLY_BRACKET || + ch === VERTICAL_LINE + ) +} + +// https://www.ecma-international.org/ecma-262/8.0/#prod-PatternCharacter +// But eat eager. +pp.validateRegExp_eatPatternCharacters = function(state) { + const start = state.pos + let ch = 0 + while ((ch = state.current()) !== -1 && !isSyntaxCharacter(ch)) { + state.advance() } + return state.pos !== start +} - // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-ExtendedPatternCharacter - eatExtendedPatternCharacter() { - const ch = this.current() - if ( - ch !== -1 && - ch !== CIRCUMFLEX_ACCENT && - ch !== DOLLAR_SIGN && - ch !== FULL_STOP && - ch !== ASTERISK && - ch !== PLUS_SIGN && - ch !== QUESTION_MARK && - ch !== LEFT_PARENTHESIS && - ch !== RIGHT_PARENTHESIS && - ch !== LEFT_SQUARE_BRACKET && - ch !== VERTICAL_LINE - ) { - this.advance() - return true - } - return false +// https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-ExtendedPatternCharacter +pp.validateRegExp_eatExtendedPatternCharacter = function(state) { + const ch = state.current() + if ( + ch !== -1 && + ch !== CIRCUMFLEX_ACCENT && + ch !== DOLLAR_SIGN && + ch !== FULL_STOP && + ch !== ASTERISK && + ch !== PLUS_SIGN && + ch !== QUESTION_MARK && + ch !== LEFT_PARENTHESIS && + ch !== RIGHT_PARENTHESIS && + ch !== LEFT_SQUARE_BRACKET && + ch !== VERTICAL_LINE + ) { + state.advance() + return true } + return false +} - // GroupSpecifier[U] :: - // [empty] - // `?` GroupName[?U] - groupSpecifier() { - if (this.eat(QUESTION_MARK)) { - if (this.eatGroupName()) { - if (this.groupNames.indexOf(this.lastStringValue) !== -1) { - this.raise("Duplicate capture group name") - } - this.groupNames.push(this.lastStringValue) - return +// GroupSpecifier[U] :: +// [empty] +// `?` GroupName[?U] +pp.validateRegExp_groupSpecifier = function(state) { + if (state.eat(QUESTION_MARK)) { + if (this.validateRegExp_eatGroupName(state)) { + if (state.groupNames.indexOf(state.lastStringValue) !== -1) { + state.raise("Duplicate capture group name") } - this.raise("Invalid group") + state.groupNames.push(state.lastStringValue) + return } + state.raise("Invalid group") } +} - // GroupName[U] :: - // `<` RegExpIdentifierName[?U] `>` - // RegExpIdentifierName[U] :: - // RegExpIdentifierStart[?U] - // RegExpIdentifierName[?U] RegExpIdentifierPart[?U] - // Note: this updates `this.lastStringValue` property with the eaten name. - eatGroupName() { - this.lastStringValue = "" - if (this.eat(LESS_THAN_SIGN)) { - if (this.eatRegExpIdentifierStart()) { - while (this.eatRegExpIdentifierPart()) - ; - if (this.eat(GREATER_THAN_SIGN)) { - return true - } +// GroupName[U] :: +// `<` RegExpIdentifierName[?U] `>` +// RegExpIdentifierName[U] :: +// RegExpIdentifierStart[?U] +// RegExpIdentifierName[?U] RegExpIdentifierPart[?U] +// Note: this updates `state.lastStringValue` property with the eaten name. +pp.validateRegExp_eatGroupName = function(state) { + state.lastStringValue = "" + if (state.eat(LESS_THAN_SIGN)) { + if (this.validateRegExp_eatRegExpIdentifierStart(state)) { + state.lastStringValue += codePointToString(state.lastIntValue) + while (this.validateRegExp_eatRegExpIdentifierPart(state)) { + state.lastStringValue += codePointToString(state.lastIntValue) + } + if (state.eat(GREATER_THAN_SIGN)) { + return true } - this.raise("Invalid capture group name") } - return false + state.raise("Invalid capture group name") } + return false +} - // RegExpIdentifierStart[U] :: - // UnicodeIDStart - // `$` - // `_` - // `\` RegExpUnicodeEscapeSequence[?U] - // Note: this appends the eaten character to `this.lastStringValue` property. - eatRegExpIdentifierStart() { - const start = this.pos - let ch = this.current() - this.advance() +// RegExpIdentifierStart[U] :: +// UnicodeIDStart +// `$` +// `_` +// `\` RegExpUnicodeEscapeSequence[?U] +pp.validateRegExp_eatRegExpIdentifierStart = function(state) { + const start = state.pos + let ch = state.current() + state.advance() + + if (ch === REVERSE_SOLIDUS && this.validateRegExp_eatRegExpUnicodeEscapeSequence(state)) { + ch = state.lastIntValue + } + if (isRegExpIdentifierStart(ch)) { + state.lastIntValue = ch + return true + } - if (ch === REVERSE_SOLIDUS && this.eatRegExpUnicodeEscapeSequence()) { - ch = this.lastIntValue - } - if (this._isRegExpIdentifierStart(ch)) { - this.lastStringValue += this.codePointToString(ch) - return true - } + state.pos = start + return false +} +function isRegExpIdentifierStart(ch) { + return isIdentifierStart(ch, true) || ch === DOLLAR_SIGN || ch === LOW_LINE +} - this.pos = start - return false +// RegExpIdentifierPart[U] :: +// UnicodeIDContinue +// `$` +// `_` +// `\` RegExpUnicodeEscapeSequence[?U] +// +// +pp.validateRegExp_eatRegExpIdentifierPart = function(state) { + const start = state.pos + let ch = state.current() + state.advance() + + if (ch === REVERSE_SOLIDUS && this.validateRegExp_eatRegExpUnicodeEscapeSequence(state)) { + ch = state.lastIntValue + } + if (isRegExpIdentifierPart(ch)) { + state.lastIntValue = ch + return true } - _isRegExpIdentifierStart(ch) { - return isIdentifierStart(ch, true) || ch === DOLLAR_SIGN || ch === LOW_LINE - } - - // RegExpIdentifierPart[U] :: - // UnicodeIDContinue - // `$` - // `_` - // `\` RegExpUnicodeEscapeSequence[?U] - // - // - // Note: this appends the eaten character to `this.lastStringValue` property. - eatRegExpIdentifierPart() { - const start = this.pos - let ch = this.current() - this.advance() - - if (ch === REVERSE_SOLIDUS && this.eatRegExpUnicodeEscapeSequence()) { - ch = this.lastIntValue - } - if (this._isRegExpIdentifierPart(ch)) { - this.lastStringValue += this.codePointToString(ch) - return true - } - this.pos = start - return false + state.pos = start + return false +} +function isRegExpIdentifierPart(ch) { + return isIdentifierChar(ch, true) || ch === DOLLAR_SIGN || ch === LOW_LINE || ch === ZERO_WIDTH_NON_JOINER || ch === ZERO_WIDTH_JOINER +} + +// https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-AtomEscape +pp.validateRegExp_eatAtomEscape = function(state) { + if ( + this.validateRegExp_eatBackReference(state) || + this.validateRegExp_eatCharacterClassEscape(state) || + this.validateRegExp_eatCharacterEscape(state) || + (state.switchN && this.validateRegExp_eatKGroupName(state)) + ) { + return true } - _isRegExpIdentifierPart(ch) { - return isIdentifierChar(ch, true) || ch === DOLLAR_SIGN || ch === LOW_LINE || ch === ZERO_WIDTH_NON_JOINER || ch === ZERO_WIDTH_JOINER + if (state.switchU) { + // Make the same message as V8. + if (state.current() === LATIN_SMALL_LETTER_C) { + state.raise("Invalid unicode escape") + } + state.raise("Invalid escape") } - - // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-AtomEscape - eatAtomEscape() { - if ( - this._eatBackReference() || - this.eatCharacterClassEscape() || - this.eatCharacterEscape() || - (this.switchN && this._eatKGroupName()) - ) { + return false +} +pp.validateRegExp_eatBackReference = function(state) { + const start = state.pos + if (this.validateRegExp_eatDecimalEscape(state)) { + const n = state.lastIntValue + if (state.switchU) { + // For SyntaxError in https://www.ecma-international.org/ecma-262/8.0/#sec-atomescape + if (n > state.maxBackReference) { + state.maxBackReference = n + } return true } - if (this.switchU) { - // Make the same message as V8. - if (this.current() === LATIN_SMALL_LETTER_C) { - this.raise("Invalid unicode escape") - } - this.raise("Invalid escape") + if (n <= state.numCapturingParens) { + return true } - return false + state.pos = start } - _eatBackReference() { - const start = this.pos - if (this.eatDecimalEscape()) { - const n = this.lastIntValue - if (this.switchU) { - // For SyntaxError in https://www.ecma-international.org/ecma-262/8.0/#sec-atomescape - if (n > this.maxBackReference) { - this.maxBackReference = n - } - return true - } - if (n <= this.numCapturingParens) { - return true - } - this.pos = start + return false +} +pp.validateRegExp_eatKGroupName = function(state) { + if (state.eat(LATIN_SMALL_LETTER_K)) { + if (this.validateRegExp_eatGroupName(state)) { + state.backReferenceNames.push(state.lastStringValue) + return true } - return false + state.raise("Invalid named reference") } - _eatKGroupName() { - if (this.eat(LATIN_SMALL_LETTER_K)) { - if (this.eatGroupName()) { - this.backReferenceNames.push(this.lastStringValue) - return true - } - this.raise("Invalid named reference") + return false +} + +// https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-CharacterEscape +pp.validateRegExp_eatCharacterEscape = function(state) { + return ( + this.validateRegExp_eatControlEscape(state) || + this.validateRegExp_eatCControlLetter(state) || + this.validateRegExp_eatZero(state) || + this.validateRegExp_eatHexEscapeSequence(state) || + this.validateRegExp_eatRegExpUnicodeEscapeSequence(state) || + (!state.switchU && this.validateRegExp_eatLegacyOctalEscapeSequence(state)) || + this.validateRegExp_eatIdentityEscape(state) + ) +} +pp.validateRegExp_eatCControlLetter = function(state) { + const start = state.pos + if (state.eat(LATIN_SMALL_LETTER_C)) { + if (this.validateRegExp_eatControlLetter(state)) { + return true } - return false + state.pos = start + } + return false +} +pp.validateRegExp_eatZero = function(state) { + if (state.current() === DIGIT_ZERO && !isDecimalDigit(state.lookahead())) { + state.lastIntValue = 0 + state.advance() + return true } + return false +} - // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-CharacterEscape - eatCharacterEscape() { - return ( - this.eatControlEscape() || - this._eatCControlLetter() || - this._eatZero() || - this.eatHexEscapeSequence() || - this.eatRegExpUnicodeEscapeSequence() || - (!this.switchU && this.eatLegacyOctalEscapeSequence()) || - this.eatIdentityEscape() - ) - } - _eatCControlLetter() { - const start = this.pos - if (this.eat(LATIN_SMALL_LETTER_C)) { - if (this.eatControlLetter()) { - return true - } - this.pos = start - } - return false +// https://www.ecma-international.org/ecma-262/8.0/#prod-ControlEscape +pp.validateRegExp_eatControlEscape = function(state) { + const ch = state.current() + if (ch === LATIN_SMALL_LETTER_T) { + state.lastIntValue = CHARACTER_TABULATION + state.advance() + return true } - _eatZero() { - if (this.current() === DIGIT_ZERO && !this._isDecimalDigit(this.lookahead())) { - this.lastIntValue = 0 - this.advance() - return true - } - return false + if (ch === LATIN_SMALL_LETTER_N) { + state.lastIntValue = LINE_FEED + state.advance() + return true + } + if (ch === LATIN_SMALL_LETTER_V) { + state.lastIntValue = LINE_TABULATION + state.advance() + return true } + if (ch === LATIN_SMALL_LETTER_F) { + state.lastIntValue = FORM_FEED + state.advance() + return true + } + if (ch === LATIN_SMALL_LETTER_R) { + state.lastIntValue = CARRIAGE_RETURN + state.advance() + return true + } + return false +} - // https://www.ecma-international.org/ecma-262/8.0/#prod-ControlEscape - eatControlEscape() { - const ch = this.current() - if (ch === LATIN_SMALL_LETTER_T) { - this.lastIntValue = CHARACTER_TABULATION - this.advance() +// https://www.ecma-international.org/ecma-262/8.0/#prod-ControlLetter +pp.validateRegExp_eatControlLetter = function(state) { + const ch = state.current() + if (isControlLetter(ch)) { + state.lastIntValue = ch % 0x20 + state.advance() + return true + } + return false +} +function isControlLetter(ch) { + return ( + (ch >= LATIN_CAPITAL_LETTER_A && ch <= LATIN_CAPITAL_LETTER_Z) || + (ch >= LATIN_SMALL_LETTER_A && ch <= LATIN_SMALL_LETTER_Z) + ) +} + +// https://www.ecma-international.org/ecma-262/8.0/#prod-RegExpUnicodeEscapeSequence +pp.validateRegExp_eatRegExpUnicodeEscapeSequence = function(state) { + const start = state.pos + + if (state.eat(LATIN_SMALL_LETTER_U)) { + if (this.validateRegExp_eatFixedHexDigits(state, 4)) { + const lead = state.lastIntValue + if (state.switchU && lead >= 0xD800 && lead <= 0xDBFF) { + const leadSurrogateEnd = state.pos + if (state.eat(REVERSE_SOLIDUS) && state.eat(LATIN_SMALL_LETTER_U) && this.validateRegExp_eatFixedHexDigits(state, 4)) { + const trail = state.lastIntValue + if (trail >= 0xDC00 && trail <= 0xDFFF) { + state.lastIntValue = (lead - 0xD800) * 0x400 + (trail - 0xDC00) + 0x10000 + return true + } + } + state.pos = leadSurrogateEnd + state.lastIntValue = lead + } return true } - if (ch === LATIN_SMALL_LETTER_N) { - this.lastIntValue = LINE_FEED - this.advance() + if ( + state.switchU && + state.eat(LEFT_CURLY_BRACKET) && + this.validateRegExp_eatHexDigits(state) && + state.eat(RIGHT_CURLY_BRACKET) && + isValidUnicode(state.lastIntValue) + ) { return true } - if (ch === LATIN_SMALL_LETTER_V) { - this.lastIntValue = LINE_TABULATION - this.advance() - return true + if (state.switchU) { + state.raise("Invalid unicode escape") } - if (ch === LATIN_SMALL_LETTER_F) { - this.lastIntValue = FORM_FEED - this.advance() + state.pos = start + } + + return false +} +function isValidUnicode(ch) { + return ch >= 0 && ch <= 0x10FFFF +} + +// https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-IdentityEscape +pp.validateRegExp_eatIdentityEscape = function(state) { + if (state.switchU) { + if (this.validateRegExp_eatSyntaxCharacter(state)) { return true } - if (ch === LATIN_SMALL_LETTER_R) { - this.lastIntValue = CARRIAGE_RETURN - this.advance() + if (state.eat(SOLIDUS)) { + state.lastIntValue = SOLIDUS return true } return false } - // https://www.ecma-international.org/ecma-262/8.0/#prod-ControlLetter - eatControlLetter() { - const ch = this.current() - if (this._isControlLetter(ch)) { - this.lastIntValue = ch % 0x20 - this.advance() - return true - } - return false + const ch = state.current() + if (ch !== LATIN_SMALL_LETTER_C && (!state.switchN || ch !== LATIN_SMALL_LETTER_K)) { + state.lastIntValue = ch + state.advance() + return true } - _isControlLetter(ch) { - return ( - (ch >= LATIN_CAPITAL_LETTER_A && ch <= LATIN_CAPITAL_LETTER_Z) || - (ch >= LATIN_SMALL_LETTER_A && ch <= LATIN_SMALL_LETTER_Z) - ) - } - - // https://www.ecma-international.org/ecma-262/8.0/#prod-RegExpUnicodeEscapeSequence - eatRegExpUnicodeEscapeSequence() { - const start = this.pos - - if (this.eat(LATIN_SMALL_LETTER_U)) { - if (this._eatFixedHexDigits(4)) { - const lead = this.lastIntValue - if (this.switchU && lead >= 0xD800 && lead <= 0xDBFF) { - const leadSurrogateEnd = this.pos - if (this.eat(REVERSE_SOLIDUS) && this.eat(LATIN_SMALL_LETTER_U) && this._eatFixedHexDigits(4)) { - const trail = this.lastIntValue - if (trail >= 0xDC00 && trail <= 0xDFFF) { - this.lastIntValue = (lead - 0xD800) * 0x400 + (trail - 0xDC00) + 0x10000 - return true - } - } - this.pos = leadSurrogateEnd - this.lastIntValue = lead - } - return true - } - if ( - this.switchU && - this.eat(LEFT_CURLY_BRACKET) && - this.eatHexDigits() && - this.eat(RIGHT_CURLY_BRACKET) && - this._isValidUnicode(this.lastIntValue) - ) { - return true - } - if (this.switchU) { - this.raise("Invalid unicode escape") - } - this.pos = start - } - return false - } - _isValidUnicode(ch) { - return ch >= 0 && ch <= 0x10FFFF + return false +} + +// https://www.ecma-international.org/ecma-262/8.0/#prod-DecimalEscape +pp.validateRegExp_eatDecimalEscape = function(state) { + state.lastIntValue = 0 + let ch = state.current() + if (ch >= DIGIT_ONE && ch <= DIGIT_NINE) { + do { + state.lastIntValue = 10 * state.lastIntValue + (ch - DIGIT_ZERO) + state.advance() + } while ((ch = state.current()) >= DIGIT_ZERO && ch <= DIGIT_NINE) + return true } + return false +} - // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-IdentityEscape - eatIdentityEscape() { - if (this.switchU) { - if (this.eatSyntaxCharacter()) { - return true - } - if (this.eat(SOLIDUS)) { - this.lastIntValue = SOLIDUS - return true - } - return false - } +// https://www.ecma-international.org/ecma-262/8.0/#prod-CharacterClassEscape +pp.validateRegExp_eatCharacterClassEscape = function(state) { + const ch = state.current() - const ch = this.current() - if (ch !== LATIN_SMALL_LETTER_C && (!this.switchN || ch !== LATIN_SMALL_LETTER_K)) { - this.lastIntValue = ch - this.advance() - return true - } - - return false + if (isCharacterClassEscape(ch)) { + state.lastIntValue = -1 + state.advance() + return true } - // https://www.ecma-international.org/ecma-262/8.0/#prod-DecimalEscape - eatDecimalEscape() { - this.lastIntValue = 0 - let ch = this.current() - if (ch >= DIGIT_ONE && ch <= DIGIT_NINE) { - do { - this.lastIntValue = 10 * this.lastIntValue + (ch - DIGIT_ZERO) - this.advance() - } while ((ch = this.current()) >= DIGIT_ZERO && ch <= DIGIT_NINE) + if ( + state.switchU && + this.options.ecmaVersion >= 9 && + (ch === LATIN_CAPITAL_LETTER_P || ch === LATIN_SMALL_LETTER_P) + ) { + state.lastIntValue = -1 + state.advance() + if ( + state.eat(LEFT_CURLY_BRACKET) && + this.validateRegExp_eatUnicodePropertyValueExpression(state) && + state.eat(RIGHT_CURLY_BRACKET) + ) { return true } - return false + state.raise("Invalid property name") } - // https://www.ecma-international.org/ecma-262/8.0/#prod-CharacterClassEscape - eatCharacterClassEscape() { - const ch = this.current() - if (this._isCharacterClassEscape(ch)) { - this.lastIntValue = -1 - this.advance() + return false +} +function isCharacterClassEscape(ch) { + return ( + ch === LATIN_SMALL_LETTER_D || + ch === LATIN_CAPITAL_LETTER_D || + ch === LATIN_SMALL_LETTER_S || + ch === LATIN_CAPITAL_LETTER_S || + ch === LATIN_SMALL_LETTER_W || + ch === LATIN_CAPITAL_LETTER_W + ) +} + +// UnicodePropertyValueExpression :: +// UnicodePropertyName `=` UnicodePropertyValue +// LoneUnicodePropertyNameOrValue +pp.validateRegExp_eatUnicodePropertyValueExpression = function(state) { + const start = state.pos + + // UnicodePropertyName `=` UnicodePropertyValue + if (this.validateRegExp_eatUnicodePropertyName(state) && state.eat(EQUALS_SIGN)) { + const name = state.lastStringValue + if (this.validateRegExp_eatUnicodePropertyValue(state)) { + const value = state.lastStringValue + this.validateRegExp_validateUnicodePropertyNameAndValue(state, name, value) return true } - if (this.switchU && this.ecmaVersion >= 9 && (ch === LATIN_CAPITAL_LETTER_P || ch === LATIN_SMALL_LETTER_P)) { - this.lastIntValue = -1 - this.advance() - if (this.eat(LEFT_CURLY_BRACKET) && this.eatUnicodePropertyValueExpression() && this.eat(RIGHT_CURLY_BRACKET)) { - return true - } - this.raise("Invalid property name") - } - return false } - _isCharacterClassEscape(ch) { - return ( - ch === LATIN_SMALL_LETTER_D || - ch === LATIN_CAPITAL_LETTER_D || - ch === LATIN_SMALL_LETTER_S || - ch === LATIN_CAPITAL_LETTER_S || - ch === LATIN_SMALL_LETTER_W || - ch === LATIN_CAPITAL_LETTER_W - ) - } - - // UnicodePropertyValueExpression :: - // UnicodePropertyName `=` UnicodePropertyValue - // LoneUnicodePropertyNameOrValue - eatUnicodePropertyValueExpression() { - const start = this.pos - - if (this.eatUnicodePropertyName() && this.eat(EQUALS_SIGN)) { - const name = this.lastStringValue - if (this.eatUnicodePropertyValue()) { - const value = this.lastStringValue - this._validateUnicodePropertyNameAndValue(name, value) - return true - } - } - this.pos = start + state.pos = start - if (this.eatLoneUnicodePropertyNameOrValue()) { - const nameOrValue = this.lastStringValue - this._validateUnicodePropertyNameOrValue(nameOrValue) - return true - } - return false + // LoneUnicodePropertyNameOrValue + if (this.validateRegExp_eatLoneUnicodePropertyNameOrValue(state)) { + const nameOrValue = state.lastStringValue + this.validateRegExp_validateUnicodePropertyNameOrValue(state, nameOrValue) + return true } - _validateUnicodePropertyNameAndValue(name, value) { - if (!UNICODE_PROPERTY_VALUES.hasOwnProperty(name) || UNICODE_PROPERTY_VALUES[name].indexOf(value) === -1) { - this.raise("Invalid property name") - } + return false +} +pp.validateRegExp_validateUnicodePropertyNameAndValue = function(state, name, value) { + if (!UNICODE_PROPERTY_VALUES.hasOwnProperty(name) || UNICODE_PROPERTY_VALUES[name].indexOf(value) === -1) { + state.raise("Invalid property name") } - _validateUnicodePropertyNameOrValue(nameOrValue) { - if (UNICODE_PROPERTY_VALUES.$LONE.indexOf(nameOrValue) === -1) { - this.raise("Invalid property name") - } +} +pp.validateRegExp_validateUnicodePropertyNameOrValue = function(state, nameOrValue) { + if (UNICODE_PROPERTY_VALUES.$LONE.indexOf(nameOrValue) === -1) { + state.raise("Invalid property name") } +} - // UnicodePropertyName :: - // UnicodePropertyNameCharacters - eatUnicodePropertyName() { - let ch = 0 - this.lastStringValue = "" - while (this._isUnicodePropertyNameCharacter(ch = this.current())) { - this.lastStringValue += this.codePointToString(ch) - this.advance() - } - return this.lastStringValue !== "" - } - _isUnicodePropertyNameCharacter(ch) { - return this._isControlLetter(ch) || ch === LOW_LINE +// UnicodePropertyName :: +// UnicodePropertyNameCharacters +pp.validateRegExp_eatUnicodePropertyName = function(state) { + let ch = 0 + state.lastStringValue = "" + while (isUnicodePropertyNameCharacter(ch = state.current())) { + state.lastStringValue += codePointToString(ch) + state.advance() } + return state.lastStringValue !== "" +} +function isUnicodePropertyNameCharacter(ch) { + return isControlLetter(ch) || ch === LOW_LINE +} - // UnicodePropertyValue :: - // UnicodePropertyValueCharacters - eatUnicodePropertyValue() { - let ch = 0 - this.lastStringValue = "" - while (this._isUnicodePropertyValueCharacter(ch = this.current())) { - this.lastStringValue += this.codePointToString(ch) - this.advance() - } - return this.lastStringValue !== "" - } - _isUnicodePropertyValueCharacter(ch) { - return this._isUnicodePropertyNameCharacter(ch) || this._isDecimalDigit(ch) +// UnicodePropertyValue :: +// UnicodePropertyValueCharacters +pp.validateRegExp_eatUnicodePropertyValue = function(state) { + let ch = 0 + state.lastStringValue = "" + while (isUnicodePropertyValueCharacter(ch = state.current())) { + state.lastStringValue += codePointToString(ch) + state.advance() } + return state.lastStringValue !== "" +} +function isUnicodePropertyValueCharacter(ch) { + return isUnicodePropertyNameCharacter(ch) || isDecimalDigit(ch) +} - // LoneUnicodePropertyNameOrValue :: - // UnicodePropertyValueCharacters - eatLoneUnicodePropertyNameOrValue() { - return this.eatUnicodePropertyValue() - } +// LoneUnicodePropertyNameOrValue :: +// UnicodePropertyValueCharacters +pp.validateRegExp_eatLoneUnicodePropertyNameOrValue = function(state) { + return this.validateRegExp_eatUnicodePropertyValue(state) +} - // https://www.ecma-international.org/ecma-262/8.0/#prod-CharacterClass - eatCharacterClass() { - if (this.eat(LEFT_SQUARE_BRACKET)) { - this.eat(CIRCUMFLEX_ACCENT) - this.classRanges() - if (this.eat(RIGHT_SQUARE_BRACKET)) { - return true - } - // Unreachable since it threw "unterminated regular expression" error before. - this.raise("Unterminated character class") +// https://www.ecma-international.org/ecma-262/8.0/#prod-CharacterClass +pp.validateRegExp_eatCharacterClass = function(state) { + if (state.eat(LEFT_SQUARE_BRACKET)) { + state.eat(CIRCUMFLEX_ACCENT) + this.validateRegExp_classRanges(state) + if (state.eat(RIGHT_SQUARE_BRACKET)) { + return true } - return false + // Unreachable since it threw "unterminated regular expression" error before. + state.raise("Unterminated character class") } + return false +} - // https://www.ecma-international.org/ecma-262/8.0/#prod-ClassRanges - // https://www.ecma-international.org/ecma-262/8.0/#prod-NonemptyClassRanges - // https://www.ecma-international.org/ecma-262/8.0/#prod-NonemptyClassRangesNoDash - classRanges() { - for (;;) { - if (this.eatClassAtom()) { - const left = (this.switchU || this.lastIntValue <= 0xFFFF) ? this.lastIntValue : this._getTrailSurrogate(this.lastIntValue) - if (this.eat(HYPHEN_MINUS) && this.eatClassAtom()) { - const right = (this.switchU || this.lastIntValue <= 0xFFFF) ? this.lastIntValue : this._getLeadSurrogate(this.lastIntValue) - if (this.switchU && (left === -1 || right === -1)) { - this.raise("Invalid character class") - } - if (left !== -1 && right !== -1 && left > right) { - this.raise("Range out of order in character class") - } - } - } else { - break +// https://www.ecma-international.org/ecma-262/8.0/#prod-ClassRanges +// https://www.ecma-international.org/ecma-262/8.0/#prod-NonemptyClassRanges +// https://www.ecma-international.org/ecma-262/8.0/#prod-NonemptyClassRangesNoDash +pp.validateRegExp_classRanges = function(state) { + while (this.validateRegExp_eatClassAtom(state)) { + const left = (state.switchU || state.lastIntValue <= 0xFFFF) + ? state.lastIntValue + : getTrailSurrogate(state.lastIntValue) + + if (state.eat(HYPHEN_MINUS) && this.validateRegExp_eatClassAtom(state)) { + const right = (state.switchU || state.lastIntValue <= 0xFFFF) + ? state.lastIntValue + : getLeadSurrogate(state.lastIntValue) + + if (state.switchU && (left === -1 || right === -1)) { + state.raise("Invalid character class") + } + if (left !== -1 && right !== -1 && left > right) { + state.raise("Range out of order in character class") } } } +} - // https://www.ecma-international.org/ecma-262/8.0/#prod-ClassAtom - // https://www.ecma-international.org/ecma-262/8.0/#prod-ClassAtomNoDash - eatClassAtom() { - const start = this.pos +// https://www.ecma-international.org/ecma-262/8.0/#prod-ClassAtom +// https://www.ecma-international.org/ecma-262/8.0/#prod-ClassAtomNoDash +pp.validateRegExp_eatClassAtom = function(state) { + const start = state.pos - if (this.eat(REVERSE_SOLIDUS)) { - if (this.eatClassEscape()) { - return true - } - if (this.switchU) { - // Make the same message as V8. - const ch = this.current() - if (ch === LATIN_SMALL_LETTER_C || this._isOctalDigit(ch)) { - this.raise("Invalid class escape") - } - this.raise("Invalid escape") + if (state.eat(REVERSE_SOLIDUS)) { + if (this.validateRegExp_eatClassEscape(state)) { + return true + } + if (state.switchU) { + // Make the same message as V8. + const ch = state.current() + if (ch === LATIN_SMALL_LETTER_C || isOctalDigit(ch)) { + state.raise("Invalid class escape") } - this.pos = start + state.raise("Invalid escape") } + state.pos = start + } - const ch = this.current() - if (ch !== RIGHT_SQUARE_BRACKET) { - this.lastIntValue = ch - this.advance() - return true - } + const ch = state.current() + if (ch !== RIGHT_SQUARE_BRACKET) { + state.lastIntValue = ch + state.advance() + return true + } - return false + return false +} + +// https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-ClassEscape +pp.validateRegExp_eatClassEscape = function(state) { + const start = state.pos + + if (state.eat(LATIN_SMALL_LETTER_B)) { + state.lastIntValue = BACKSPACE + return true } - // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-ClassEscape - eatClassEscape() { - const start = this.pos - if (this.eat(LATIN_SMALL_LETTER_B)) { - this.lastIntValue = BACKSPACE - return true - } - if (this.switchU && this.eat(HYPHEN_MINUS)) { - this.lastIntValue = HYPHEN_MINUS + if (state.switchU && state.eat(HYPHEN_MINUS)) { + state.lastIntValue = HYPHEN_MINUS + return true + } + + if (!state.switchU && state.eat(LATIN_SMALL_LETTER_C)) { + if (this.validateRegExp_eatClassControlLetter(state)) { return true } - if (!this.switchU && this.eat(LATIN_SMALL_LETTER_C)) { - if (this.eatClassControlLetter()) { - return true - } - this.pos = start - } - return this.eatCharacterClassEscape() || this.eatCharacterEscape() + state.pos = start } - // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-ClassControlLetter - eatClassControlLetter() { - const ch = this.current() - if (this._isDecimalDigit(ch) || ch === LOW_LINE) { - this.lastIntValue = ch % 0x20 - this.advance() + return ( + this.validateRegExp_eatCharacterClassEscape(state) || + this.validateRegExp_eatCharacterEscape(state) + ) +} + +// https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-ClassControlLetter +pp.validateRegExp_eatClassControlLetter = function(state) { + const ch = state.current() + if (isDecimalDigit(ch) || ch === LOW_LINE) { + state.lastIntValue = ch % 0x20 + state.advance() + return true + } + return false +} + +// https://www.ecma-international.org/ecma-262/8.0/#prod-HexEscapeSequence +pp.validateRegExp_eatHexEscapeSequence = function(state) { + const start = state.pos + if (state.eat(LATIN_SMALL_LETTER_X)) { + if (this.validateRegExp_eatFixedHexDigits(state, 2)) { return true } - return false + if (state.switchU) { + state.raise("Invalid escape") + } + state.pos = start } + return false +} - // https://www.ecma-international.org/ecma-262/8.0/#prod-HexEscapeSequence - eatHexEscapeSequence() { - const start = this.pos - if (this.eat(LATIN_SMALL_LETTER_X)) { - if (this._eatFixedHexDigits(2)) { - return true - } - if (this.switchU) { - this.raise("Invalid escape") - } - this.pos = start - } - return false +// https://www.ecma-international.org/ecma-262/8.0/#prod-DecimalDigits +pp.validateRegExp_eatDecimalDigits = function(state) { + const start = state.pos + let ch = 0 + state.lastIntValue = 0 + while (isDecimalDigit(ch = state.current())) { + state.lastIntValue = 10 * state.lastIntValue + (ch - DIGIT_ZERO) + state.advance() } + return state.pos !== start +} +function isDecimalDigit(ch) { + return ch >= DIGIT_ZERO && ch <= DIGIT_NINE +} - // https://www.ecma-international.org/ecma-262/8.0/#prod-DecimalDigits - eatDecimalDigits() { - const start = this.pos - let ch = 0 - this.lastIntValue = 0 - while (this._isDecimalDigit(ch = this.current())) { - this.lastIntValue = 10 * this.lastIntValue + (ch - DIGIT_ZERO) - this.advance() - } - return this.pos !== start +// https://www.ecma-international.org/ecma-262/8.0/#prod-HexDigits +pp.validateRegExp_eatHexDigits = function(state) { + const start = state.pos + let ch = 0 + state.lastIntValue = 0 + while (isHexDigit(ch = state.current())) { + state.lastIntValue = 16 * state.lastIntValue + hexToInt(ch) + state.advance() } - _isDecimalDigit(ch) { - return ch >= DIGIT_ZERO && ch <= DIGIT_NINE + return state.pos !== start +} +function isHexDigit(ch) { + return ( + (ch >= DIGIT_ZERO && ch <= DIGIT_NINE) || + (ch >= LATIN_CAPITAL_LETTER_A && ch <= LATIN_CAPITAL_LETTER_F) || + (ch >= LATIN_SMALL_LETTER_A && ch <= LATIN_SMALL_LETTER_F) + ) +} +function hexToInt(ch) { + if (ch >= LATIN_CAPITAL_LETTER_A && ch <= LATIN_CAPITAL_LETTER_F) { + return 10 + (ch - LATIN_CAPITAL_LETTER_A) + } + if (ch >= LATIN_SMALL_LETTER_A && ch <= LATIN_SMALL_LETTER_F) { + return 10 + (ch - LATIN_SMALL_LETTER_A) } + return ch - DIGIT_ZERO +} - // https://www.ecma-international.org/ecma-262/8.0/#prod-HexDigits - eatHexDigits() { - const start = this.pos - let ch = 0 - this.lastIntValue = 0 - while (this._isHexDigit(ch = this.current())) { - this.lastIntValue = 16 * this.lastIntValue + this._hexToInt(ch) - this.advance() - } - return this.pos !== start - } - _isHexDigit(ch) { - return ( - (ch >= DIGIT_ZERO && ch <= DIGIT_NINE) || - (ch >= LATIN_CAPITAL_LETTER_A && ch <= LATIN_CAPITAL_LETTER_F) || - (ch >= LATIN_SMALL_LETTER_A && ch <= LATIN_SMALL_LETTER_F) - ) - } - _hexToInt(ch) { - if (ch >= LATIN_CAPITAL_LETTER_A && ch <= LATIN_CAPITAL_LETTER_F) { - return 10 + (ch - LATIN_CAPITAL_LETTER_A) - } - if (ch >= LATIN_SMALL_LETTER_A && ch <= LATIN_SMALL_LETTER_F) { - return 10 + (ch - LATIN_SMALL_LETTER_A) - } - return ch - DIGIT_ZERO - } - - // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-LegacyOctalEscapeSequence - // Allows only 0-377(octal) i.e. 0-255(decimal). - eatLegacyOctalEscapeSequence() { - if (this.eatOctalDigit()) { - const n1 = this.lastIntValue - if (this.eatOctalDigit()) { - const n2 = this.lastIntValue - if (n1 <= 3 && this.eatOctalDigit()) { - this.lastIntValue = n1 * 64 + n2 * 8 + this.lastIntValue - } else { - this.lastIntValue = n1 * 8 + n2 - } +// https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-LegacyOctalEscapeSequence +// Allows only 0-377(octal) i.e. 0-255(decimal). +pp.validateRegExp_eatLegacyOctalEscapeSequence = function(state) { + if (this.validateRegExp_eatOctalDigit(state)) { + const n1 = state.lastIntValue + if (this.validateRegExp_eatOctalDigit(state)) { + const n2 = state.lastIntValue + if (n1 <= 3 && this.validateRegExp_eatOctalDigit(state)) { + state.lastIntValue = n1 * 64 + n2 * 8 + state.lastIntValue } else { - this.lastIntValue = n1 + state.lastIntValue = n1 * 8 + n2 } - return true + } else { + state.lastIntValue = n1 } - return false + return true } + return false +} - // https://www.ecma-international.org/ecma-262/8.0/#prod-OctalDigit - eatOctalDigit() { - const ch = this.current() - if (this._isOctalDigit(ch)) { - this.lastIntValue = ch - DIGIT_ZERO - this.advance() - return true - } - this.lastIntValue = 0 - return false - } - _isOctalDigit(ch) { - return ch >= DIGIT_ZERO && ch <= DIGIT_SEVEN +// https://www.ecma-international.org/ecma-262/8.0/#prod-OctalDigit +pp.validateRegExp_eatOctalDigit = function(state) { + const ch = state.current() + if (isOctalDigit(ch)) { + state.lastIntValue = ch - DIGIT_ZERO + state.advance() + return true } + state.lastIntValue = 0 + return false +} +function isOctalDigit(ch) { + return ch >= DIGIT_ZERO && ch <= DIGIT_SEVEN +} - // https://www.ecma-international.org/ecma-262/8.0/#prod-Hex4Digits - // https://www.ecma-international.org/ecma-262/8.0/#prod-HexDigit - // And HexDigit HexDigit in https://www.ecma-international.org/ecma-262/8.0/#prod-HexEscapeSequence - _eatFixedHexDigits(length) { - const start = this.pos - this.lastIntValue = 0 - for (let i = 0; i < length; ++i) { - const ch = this.current() - if (!this._isHexDigit(ch)) { - this.pos = start - return false - } - this.lastIntValue = 16 * this.lastIntValue + this._hexToInt(ch) - this.advance() +// https://www.ecma-international.org/ecma-262/8.0/#prod-Hex4Digits +// https://www.ecma-international.org/ecma-262/8.0/#prod-HexDigit +// And HexDigit HexDigit in https://www.ecma-international.org/ecma-262/8.0/#prod-HexEscapeSequence +pp.validateRegExp_eatFixedHexDigits = function(state, length) { + const start = state.pos + state.lastIntValue = 0 + for (let i = 0; i < length; ++i) { + const ch = state.current() + if (!isHexDigit(ch)) { + state.pos = start + return false } - return true + state.lastIntValue = 16 * state.lastIntValue + hexToInt(ch) + state.advance() } + return true } diff --git a/src/state.js b/src/state.js index 583ed7b3f..185fe2864 100644 --- a/src/state.js +++ b/src/state.js @@ -90,8 +90,8 @@ export class Parser { this.scopeStack = [] this.enterFunctionScope() - // Lazy initialization - this.regexpValidator = null + // For RegExp validation + this.regexpState = null } // DEPRECATED Kept for backwards compatibility until 3.0 in case a plugin uses them diff --git a/src/tokenize.js b/src/tokenize.js index ef3302fe7..3dbaf67f1 100644 --- a/src/tokenize.js +++ b/src/tokenize.js @@ -2,8 +2,8 @@ import {isIdentifierStart, isIdentifierChar} from "./identifier" import {types as tt, keywords as keywordTypes} from "./tokentype" import {Parser} from "./state" import {SourceLocation} from "./locutil" +import {RegExpValidationState} from "./regexp" import {lineBreak, lineBreakG, isNewLine, nonASCIIwhitespace} from "./whitespace" -import {RegExpValidator} from "./regexp" // Object type used to represent tokens. Note that normally, tokens // simply exist as properties on the parser object. This is only @@ -388,9 +388,10 @@ pp.readRegexp = function() { if (this.containsEsc) this.unexpected(flagsStart) // Validate pattern - const validator = this.regexpValidator || (this.regexpValidator = new RegExpValidator(this)) - validator.validateFlags(start, flags) - validator.validatePattern(start, pattern, flags.indexOf("u") !== -1) + const state = this.regexpState || (this.regexpState = new RegExpValidationState(this)) + state.reset(start, pattern, flags) + this.validateRegExpFlags(state) + this.validateRegExpPattern(state) // Create Literal#value property value. let value = null From 68fd78e9eca2f6dbb60c4d08957a10bc0b20427a Mon Sep 17 00:00:00 2001 From: Toru Nagashima Date: Tue, 13 Feb 2018 22:18:37 +0900 Subject: [PATCH 10/18] small fix --- src/regexp.js | 37 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/src/regexp.js b/src/regexp.js index acc6a853d..5e8143451 100644 --- a/src/regexp.js +++ b/src/regexp.js @@ -2,8 +2,6 @@ import {isIdentifierStart, isIdentifierChar} from "./identifier.js" import {Parser} from "./state.js" import UNICODE_PROPERTY_VALUES from "./unicode-property-data.js" -/* eslint no-invalid-this: error */ - const BACKSPACE = 0x08 const CHARACTER_TABULATION = 0x09 const LINE_FEED = 0x0A @@ -200,14 +198,6 @@ pp.validateRegExpPattern = function(state) { } } -// --------------------------------------------------------------------------- -// Helpers -// --------------------------------------------------------------------------- - -// --------------------------------------------------------------------------- -// Productions -// --------------------------------------------------------------------------- - // https://www.ecma-international.org/ecma-262/8.0/#prod-Pattern pp.validateRegExp_pattern = function(state) { state.pos = 0 @@ -518,23 +508,30 @@ pp.validateRegExp_groupSpecifier = function(state) { // GroupName[U] :: // `<` RegExpIdentifierName[?U] `>` +// Note: this updates `state.lastStringValue` property with the eaten name. +pp.validateRegExp_eatGroupName = function(state) { + state.lastStringValue = "" + if (state.eat(LESS_THAN_SIGN)) { + if (this.validateRegExp_eatRegExpIdentifierName(state) && state.eat(GREATER_THAN_SIGN)) { + return true + } + state.raise("Invalid capture group name") + } + return false +} + // RegExpIdentifierName[U] :: // RegExpIdentifierStart[?U] // RegExpIdentifierName[?U] RegExpIdentifierPart[?U] // Note: this updates `state.lastStringValue` property with the eaten name. -pp.validateRegExp_eatGroupName = function(state) { +pp.validateRegExp_eatRegExpIdentifierName = function(state) { state.lastStringValue = "" - if (state.eat(LESS_THAN_SIGN)) { - if (this.validateRegExp_eatRegExpIdentifierStart(state)) { + if (this.validateRegExp_eatRegExpIdentifierStart(state)) { + state.lastStringValue += codePointToString(state.lastIntValue) + while (this.validateRegExp_eatRegExpIdentifierPart(state)) { state.lastStringValue += codePointToString(state.lastIntValue) - while (this.validateRegExp_eatRegExpIdentifierPart(state)) { - state.lastStringValue += codePointToString(state.lastIntValue) - } - if (state.eat(GREATER_THAN_SIGN)) { - return true - } } - state.raise("Invalid capture group name") + return true } return false } From 82fd89bd2180d55be1a3ef3e7817ac51026721fb Mon Sep 17 00:00:00 2001 From: Toru Nagashima Date: Wed, 14 Feb 2018 09:59:56 +0900 Subject: [PATCH 11/18] use code unit without u flag --- src/regexp.js | 36 ++++++++++++------------------------ test/tests-regexp.js | 5 +++++ 2 files changed, 17 insertions(+), 24 deletions(-) diff --git a/src/regexp.js b/src/regexp.js index 5e8143451..48974e365 100644 --- a/src/regexp.js +++ b/src/regexp.js @@ -96,15 +96,16 @@ export class RegExpValidationState { this.parser.raise(this.start, `Invalid regular expression: /${this.source}/: ${message}`) } - // Node.js 0.12/0.10 don't support String.prototype.codePointAt(). - codePointAt(i) { + // If u flag is given, this returns the code point at the index (it combines a surrogate pair). + // Otherwise, this returns the code unit of the index (can be a part of a surrogate pair). + at(i) { const s = this.source const l = s.length if (i >= l) { return -1 } const c = s.charCodeAt(i) - if (c <= 0xD7FF || c >= 0xE000 || i + 1 >= l) { + if (!this.switchU || c <= 0xD7FF || c >= 0xE000 || i + 1 >= l) { return c } return (c << 10) + s.charCodeAt(i + 1) - 0x35FDC00 @@ -117,18 +118,18 @@ export class RegExpValidationState { return l } const c = s.charCodeAt(i) - if (c <= 0xD7FF || c >= 0xE000 || i + 1 >= l) { + if (!this.switchU || c <= 0xD7FF || c >= 0xE000 || i + 1 >= l) { return i + 1 } return i + 2 } current() { - return this.codePointAt(this.pos) + return this.at(this.pos) } lookahead() { - return this.codePointAt(this.nextIndex(this.pos)) + return this.at(this.nextIndex(this.pos)) } advance() { @@ -145,16 +146,9 @@ export class RegExpValidationState { } function codePointToString(ch) { - if (ch <= 0xFFFF) { - return String.fromCharCode(ch) - } - return String.fromCharCode(getLeadSurrogate(ch), getTrailSurrogate(ch)) -} -function getLeadSurrogate(ch) { - return ((ch - 0x10000) >> 10) + 0xD800 -} -function getTrailSurrogate(ch) { - return ((ch - 0x10000) & 0x03FF) + 0xDC00 + if (ch <= 0xFFFF) return String.fromCharCode(ch) + ch -= 0x10000 + return String.fromCharCode((ch >> 10) + 0xD800, (ch & 0x03FF) + 0xDC00) } /** @@ -925,15 +919,9 @@ pp.validateRegExp_eatCharacterClass = function(state) { // https://www.ecma-international.org/ecma-262/8.0/#prod-NonemptyClassRangesNoDash pp.validateRegExp_classRanges = function(state) { while (this.validateRegExp_eatClassAtom(state)) { - const left = (state.switchU || state.lastIntValue <= 0xFFFF) - ? state.lastIntValue - : getTrailSurrogate(state.lastIntValue) - + const left = state.lastIntValue if (state.eat(HYPHEN_MINUS) && this.validateRegExp_eatClassAtom(state)) { - const right = (state.switchU || state.lastIntValue <= 0xFFFF) - ? state.lastIntValue - : getLeadSurrogate(state.lastIntValue) - + const right = state.lastIntValue if (state.switchU && (left === -1 || right === -1)) { state.raise("Invalid character class") } diff --git a/test/tests-regexp.js b/test/tests-regexp.js index 6b9b2656d..8601bcb3b 100644 --- a/test/tests-regexp.js +++ b/test/tests-regexp.js @@ -943,6 +943,11 @@ testFail("/[🌷-🌸]/", "Invalid regular expression: /[🌷-🌸]/: Range out testFail("/[🌷-🌸]/", "Invalid regular expression: /[🌷-🌸]/: Range out of order in character class (1:1)", { ecmaVersion: 2015 }) testFail("/[🌷-🌸]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) test("/[🌷-🌸]/u", {}, { ecmaVersion: 2015 }) +testFail("/[\\u0000-🌸-\\u0000]/", "Invalid regular expression: /[\\u0000-🌸-\\u0000]/: Range out of order in character class (1:1)", { ecmaVersion: 2015 }) +testFail("/[\\u0000-\\ud83c\\udf38-\\u0000]/", "Invalid regular expression: /[\\u0000-\\ud83c\\udf38-\\u0000]/: Range out of order in character class (1:1)", { ecmaVersion: 2015 }) +test("/[\\u0000-🌸-\\u0000]/u", {}, { ecmaVersion: 2015 }) +test("/[\\u0000-\\u{1f338}-\\u0000]/u", {}, { ecmaVersion: 2015 }) +test("/[\\u0000-\\ud83c\\udf38-\\u0000]/u", {}, { ecmaVersion: 2015 }) testFail("/[🌸-🌷]/", "Invalid regular expression: /[🌸-🌷]/: Range out of order in character class (1:1)", { ecmaVersion: 5 }) testFail("/[🌸-🌷]/", "Invalid regular expression: /[🌸-🌷]/: Range out of order in character class (1:1)", { ecmaVersion: 2015 }) testFail("/[🌸-🌷]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) From 4bf6c11509eeb6f3048c2a5d523688d6f12ba06e Mon Sep 17 00:00:00 2001 From: Toru Nagashima Date: Wed, 14 Feb 2018 15:51:19 +0900 Subject: [PATCH 12/18] add a test --- test/tests-regexp.js | 1 + 1 file changed, 1 insertion(+) diff --git a/test/tests-regexp.js b/test/tests-regexp.js index 8601bcb3b..6c4719486 100644 --- a/test/tests-regexp.js +++ b/test/tests-regexp.js @@ -1048,6 +1048,7 @@ test("/[\\d][\\12-\\14]{1,}[^\\d]/", {}, { ecmaVersion: 5 }) test("/[\\d][\\12-\\14]{1,}[^\\d]/", {}, { ecmaVersion: 2015 }) testFail("/[\\d][\\12-\\14]{1,}[^\\d]/u", "Invalid regular expression flag (1:1)", { ecmaVersion: 5 }) testFail("/[\\d][\\12-\\14]{1,}[^\\d]/u", "Invalid regular expression: /[\\d][\\12-\\14]{1,}[^\\d]/: Invalid class escape (1:1)", { ecmaVersion: 2015 }) +test("/([a ]\\b)*\\b/", {}, { ecmaVersion: 5 }) /* // This is test case generator. From 9264666b631d225781696ad81cf7f2b1f1bc576a Mon Sep 17 00:00:00 2001 From: Toru Nagashima Date: Thu, 15 Feb 2018 07:04:54 +0900 Subject: [PATCH 13/18] make recoverable --- src/regexp.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/regexp.js b/src/regexp.js index 48974e365..96ef9212e 100644 --- a/src/regexp.js +++ b/src/regexp.js @@ -93,7 +93,7 @@ export class RegExpValidationState { } raise(message) { - this.parser.raise(this.start, `Invalid regular expression: /${this.source}/: ${message}`) + this.parser.raiseRecoverable(this.start, `Invalid regular expression: /${this.source}/: ${message}`) } // If u flag is given, this returns the code point at the index (it combines a surrogate pair). From c44e09bc7289508134cd799cb028a497a5500146 Mon Sep 17 00:00:00 2001 From: Toru Nagashima Date: Thu, 15 Feb 2018 07:09:20 +0900 Subject: [PATCH 14/18] improve loose parser --- src/loose/state.js | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/loose/state.js b/src/loose/state.js index 571f472d6..da068edb5 100644 --- a/src/loose/state.js +++ b/src/loose/state.js @@ -1,5 +1,7 @@ import {tokenizer, SourceLocation, tokTypes as tt, Node, lineBreak, isNewLine} from "../index" +function noop() {} + // Registered plugins export const pluginsLoose = {} @@ -9,6 +11,8 @@ export class LooseParser { this.options = this.toks.options this.input = this.toks.input this.tok = this.last = {type: tt.eof, start: 0, end: 0} + this.tok.validateRegExpFlags = noop + this.tok.validateRegExpPattern = noop if (this.options.locations) { let here = this.toks.curPosition() this.tok.loc = new SourceLocation(this.toks, here, here) From 2b229f0ac9750aa2643e58b5e0c0448d7d9c8363 Mon Sep 17 00:00:00 2001 From: Toru Nagashima Date: Sat, 17 Feb 2018 13:52:17 +0900 Subject: [PATCH 15/18] follow tc39/ecma262#1102 --- src/regexp.js | 5 +++-- test/tests-regexp-2018.js | 32 ++++++++++++++++---------------- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/src/regexp.js b/src/regexp.js index 96ef9212e..cb4935815 100644 --- a/src/regexp.js +++ b/src/regexp.js @@ -289,15 +289,16 @@ pp.validateRegExp_eatAssertion = function(state) { // Lookahead / Lookbehind if (state.eat(LEFT_PARENTHESIS) && state.eat(QUESTION_MARK)) { + let lookbehind = false if (this.options.ecmaVersion >= 9) { - state.eat(LESS_THAN_SIGN) + lookbehind = state.eat(LESS_THAN_SIGN) } if (state.eat(EQUALS_SIGN) || state.eat(EXCLAMATION_MARK)) { this.validateRegExp_disjunction(state) if (!state.eat(RIGHT_PARENTHESIS)) { state.raise("Unterminated group") } - state.lastAssertionIsQuantifiable = true + state.lastAssertionIsQuantifiable = !lookbehind return true } } diff --git a/test/tests-regexp-2018.js b/test/tests-regexp-2018.js index 1add3a75e..0a48fabb7 100644 --- a/test/tests-regexp-2018.js +++ b/test/tests-regexp-2018.js @@ -118,23 +118,23 @@ testFail("/(?\\w){3})f/u", {}, { ecmaVersion: 2018 }) test("/((?<=\\w{3}))f/u", {}, { ecmaVersion: 2018 }) From 307e3475fd30e8c71f1cb3460a42a91b42fe1645 Mon Sep 17 00:00:00 2001 From: Toru Nagashima Date: Sat, 17 Feb 2018 14:09:59 +0900 Subject: [PATCH 16/18] remove constants --- src/regexp.js | 302 ++++++++++++++++++++------------------------------ 1 file changed, 121 insertions(+), 181 deletions(-) diff --git a/src/regexp.js b/src/regexp.js index cb4935815..6041f9135 100644 --- a/src/regexp.js +++ b/src/regexp.js @@ -2,66 +2,6 @@ import {isIdentifierStart, isIdentifierChar} from "./identifier.js" import {Parser} from "./state.js" import UNICODE_PROPERTY_VALUES from "./unicode-property-data.js" -const BACKSPACE = 0x08 -const CHARACTER_TABULATION = 0x09 -const LINE_FEED = 0x0A -const LINE_TABULATION = 0x0B -const FORM_FEED = 0x0C -const CARRIAGE_RETURN = 0x0D -const EXCLAMATION_MARK = 0x21 // ! -const DOLLAR_SIGN = 0x24 // $ -const LEFT_PARENTHESIS = 0x28 // ( -const RIGHT_PARENTHESIS = 0x29 // ) -const ASTERISK = 0x2A // * -const PLUS_SIGN = 0x2B // + -const COMMA = 0x2C // , -const HYPHEN_MINUS = 0x2D // - -const FULL_STOP = 0x2E // . -const SOLIDUS = 0x2F // / -const DIGIT_ZERO = 0x30 // 0 -const DIGIT_ONE = 0x31 // 1 -const DIGIT_SEVEN = 0x37 // 7 -const DIGIT_NINE = 0x39 // 9 -const COLON = 0x3A // : -const LESS_THAN_SIGN = 0x3C // < -const EQUALS_SIGN = 0x3D // = -const GREATER_THAN_SIGN = 0x3E // > -const QUESTION_MARK = 0x3F // ? -const LATIN_CAPITAL_LETTER_A = 0x41 // A -const LATIN_CAPITAL_LETTER_B = 0x42 // B -const LATIN_CAPITAL_LETTER_D = 0x44 // D -const LATIN_CAPITAL_LETTER_F = 0x46 // F -const LATIN_CAPITAL_LETTER_P = 0x50 // P -const LATIN_CAPITAL_LETTER_S = 0x53 // S -const LATIN_CAPITAL_LETTER_W = 0x57 // W -const LATIN_CAPITAL_LETTER_Z = 0x5A // Z -const LOW_LINE = 0x5F // _ -const LATIN_SMALL_LETTER_A = 0x61 // a -const LATIN_SMALL_LETTER_B = 0x62 // b -const LATIN_SMALL_LETTER_C = 0x63 // c -const LATIN_SMALL_LETTER_D = 0x64 // d -const LATIN_SMALL_LETTER_F = 0x66 // f -const LATIN_SMALL_LETTER_K = 0x6B // k -const LATIN_SMALL_LETTER_N = 0x6E // n -const LATIN_SMALL_LETTER_P = 0x70 // p -const LATIN_SMALL_LETTER_R = 0x72 // r -const LATIN_SMALL_LETTER_S = 0x73 // s -const LATIN_SMALL_LETTER_T = 0x74 // t -const LATIN_SMALL_LETTER_U = 0x75 // u -const LATIN_SMALL_LETTER_V = 0x76 // v -const LATIN_SMALL_LETTER_W = 0x77 // w -const LATIN_SMALL_LETTER_X = 0x78 // x -const LATIN_SMALL_LETTER_Z = 0x7A // z -const LEFT_SQUARE_BRACKET = 0x5B // [ -const REVERSE_SOLIDUS = 0x5C // \ -const RIGHT_SQUARE_BRACKET = 0x5D // [ -const CIRCUMFLEX_ACCENT = 0x5E // ^ -const LEFT_CURLY_BRACKET = 0x7B // { -const VERTICAL_LINE = 0x7C // | -const RIGHT_CURLY_BRACKET = 0x7D // } -const ZERO_WIDTH_NON_JOINER = 0x200C -const ZERO_WIDTH_JOINER = 0x200D - const pp = Parser.prototype export class RegExpValidationState { @@ -207,10 +147,10 @@ pp.validateRegExp_pattern = function(state) { if (state.pos !== state.source.length) { // Make the same messages as V8. - if (state.eat(RIGHT_PARENTHESIS)) { + if (state.eat(0x29 /* ) */)) { state.raise("Unmatched ')'") } - if (state.eat(RIGHT_SQUARE_BRACKET) || state.eat(RIGHT_CURLY_BRACKET)) { + if (state.eat(0x5D /* [ */) || state.eat(0x7D /* } */)) { state.raise("Lone quantifier brackets") } } @@ -227,7 +167,7 @@ pp.validateRegExp_pattern = function(state) { // https://www.ecma-international.org/ecma-262/8.0/#prod-Disjunction pp.validateRegExp_disjunction = function(state) { this.validateRegExp_alternative(state) - while (state.eat(VERTICAL_LINE)) { + while (state.eat(0x7C /* | */)) { this.validateRegExp_alternative(state) } @@ -235,7 +175,7 @@ pp.validateRegExp_disjunction = function(state) { if (this.validateRegExp_eatQuantifier(state, true)) { state.raise("Nothing to repeat") } - if (state.eat(LEFT_CURLY_BRACKET)) { + if (state.eat(0x7B /* { */)) { state.raise("Lone quantifier brackets") } } @@ -275,27 +215,27 @@ pp.validateRegExp_eatAssertion = function(state) { state.lastAssertionIsQuantifiable = false // ^, $ - if (state.eat(CIRCUMFLEX_ACCENT) || state.eat(DOLLAR_SIGN)) { + if (state.eat(0x5E /* ^ */) || state.eat(0x24 /* $ */)) { return true } // \b \B - if (state.eat(REVERSE_SOLIDUS)) { - if (state.eat(LATIN_CAPITAL_LETTER_B) || state.eat(LATIN_SMALL_LETTER_B)) { + if (state.eat(0x5C /* \ */)) { + if (state.eat(0x42 /* B */) || state.eat(0x62 /* b */)) { return true } state.pos = start } // Lookahead / Lookbehind - if (state.eat(LEFT_PARENTHESIS) && state.eat(QUESTION_MARK)) { + if (state.eat(0x28 /* ( */) && state.eat(0x3F /* ? */)) { let lookbehind = false if (this.options.ecmaVersion >= 9) { - lookbehind = state.eat(LESS_THAN_SIGN) + lookbehind = state.eat(0x3C /* < */) } - if (state.eat(EQUALS_SIGN) || state.eat(EXCLAMATION_MARK)) { + if (state.eat(0x3D /* = */) || state.eat(0x21 /* ! */)) { this.validateRegExp_disjunction(state) - if (!state.eat(RIGHT_PARENTHESIS)) { + if (!state.eat(0x29 /* ) */)) { state.raise("Unterminated group") } state.lastAssertionIsQuantifiable = !lookbehind @@ -310,7 +250,7 @@ pp.validateRegExp_eatAssertion = function(state) { // https://www.ecma-international.org/ecma-262/8.0/#prod-Quantifier pp.validateRegExp_eatQuantifier = function(state, noError = false) { if (this.validateRegExp_eatQuantifierPrefix(state, noError)) { - state.eat(QUESTION_MARK) + state.eat(0x3F /* ? */) return true } return false @@ -319,22 +259,22 @@ pp.validateRegExp_eatQuantifier = function(state, noError = false) { // https://www.ecma-international.org/ecma-262/8.0/#prod-QuantifierPrefix pp.validateRegExp_eatQuantifierPrefix = function(state, noError) { return ( - state.eat(ASTERISK) || - state.eat(PLUS_SIGN) || - state.eat(QUESTION_MARK) || + state.eat(0x2A /* * */) || + state.eat(0x2B /* + */) || + state.eat(0x3F /* ? */) || this.validateRegExp_eatBracedQuantifier(state, noError) ) } pp.validateRegExp_eatBracedQuantifier = function(state, noError) { const start = state.pos - if (state.eat(LEFT_CURLY_BRACKET)) { + if (state.eat(0x7B /* { */)) { let min = 0, max = -1 if (this.validateRegExp_eatDecimalDigits(state)) { min = state.lastIntValue - if (state.eat(COMMA) && this.validateRegExp_eatDecimalDigits(state)) { + if (state.eat(0x2C /* , */) && this.validateRegExp_eatDecimalDigits(state)) { max = state.lastIntValue } - if (state.eat(RIGHT_CURLY_BRACKET)) { + if (state.eat(0x7D /* } */)) { // SyntaxError in https://www.ecma-international.org/ecma-262/8.0/#sec-term if (max !== -1 && max < min && !noError) { state.raise("numbers out of order in {} quantifier") @@ -354,7 +294,7 @@ pp.validateRegExp_eatBracedQuantifier = function(state, noError) { pp.validateRegExp_eatAtom = function(state) { return ( this.validateRegExp_eatPatternCharacters(state) || - state.eat(FULL_STOP) || + state.eat(0x2E /* . */) || this.validateRegExp_eatReverseSolidusAtomEscape(state) || this.validateRegExp_eatCharacterClass(state) || this.validateRegExp_eatUncapturingGroup(state) || @@ -363,7 +303,7 @@ pp.validateRegExp_eatAtom = function(state) { } pp.validateRegExp_eatReverseSolidusAtomEscape = function(state) { const start = state.pos - if (state.eat(REVERSE_SOLIDUS)) { + if (state.eat(0x5C /* \ */)) { if (this.validateRegExp_eatAtomEscape(state)) { return true } @@ -373,10 +313,10 @@ pp.validateRegExp_eatReverseSolidusAtomEscape = function(state) { } pp.validateRegExp_eatUncapturingGroup = function(state) { const start = state.pos - if (state.eat(LEFT_PARENTHESIS)) { - if (state.eat(QUESTION_MARK) && state.eat(COLON)) { + if (state.eat(0x28 /* ( */)) { + if (state.eat(0x3F /* ? */) && state.eat(0x3A /* : */)) { this.validateRegExp_disjunction(state) - if (state.eat(RIGHT_PARENTHESIS)) { + if (state.eat(0x29 /* ) */)) { return true } state.raise("Unterminated group") @@ -386,14 +326,14 @@ pp.validateRegExp_eatUncapturingGroup = function(state) { return false } pp.validateRegExp_eatCapturingGroup = function(state) { - if (state.eat(LEFT_PARENTHESIS)) { + if (state.eat(0x28 /* ( */)) { if (this.options.ecmaVersion >= 9) { this.validateRegExp_groupSpecifier(state) - } else if (state.current() === QUESTION_MARK) { + } else if (state.current() === 0x3F /* ? */) { state.raise("Invalid group") } this.validateRegExp_disjunction(state) - if (state.eat(RIGHT_PARENTHESIS)) { + if (state.eat(0x29 /* ) */)) { state.numCapturingParens += 1 return true } @@ -405,7 +345,7 @@ pp.validateRegExp_eatCapturingGroup = function(state) { // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-ExtendedAtom pp.validateRegExp_eatExtendedAtom = function(state) { return ( - state.eat(FULL_STOP) || + state.eat(0x2E /* . */) || this.validateRegExp_eatReverseSolidusAtomEscape(state) || this.validateRegExp_eatCharacterClass(state) || this.validateRegExp_eatUncapturingGroup(state) || @@ -435,20 +375,20 @@ pp.validateRegExp_eatSyntaxCharacter = function(state) { } function isSyntaxCharacter(ch) { return ( - ch === CIRCUMFLEX_ACCENT || - ch === DOLLAR_SIGN || - ch === REVERSE_SOLIDUS || - ch === FULL_STOP || - ch === ASTERISK || - ch === PLUS_SIGN || - ch === QUESTION_MARK || - ch === LEFT_PARENTHESIS || - ch === RIGHT_PARENTHESIS || - ch === LEFT_SQUARE_BRACKET || - ch === RIGHT_SQUARE_BRACKET || - ch === LEFT_CURLY_BRACKET || - ch === RIGHT_CURLY_BRACKET || - ch === VERTICAL_LINE + ch === 0x5E /* ^ */ || + ch === 0x24 /* $ */ || + ch === 0x5C /* \ */ || + ch === 0x2E /* . */ || + ch === 0x2A /* * */ || + ch === 0x2B /* + */ || + ch === 0x3F /* ? */ || + ch === 0x28 /* ( */ || + ch === 0x29 /* ) */ || + ch === 0x5B /* [ */ || + ch === 0x5D /* [ */ || + ch === 0x7B /* { */ || + ch === 0x7D /* } */ || + ch === 0x7C /* | */ ) } @@ -468,16 +408,16 @@ pp.validateRegExp_eatExtendedPatternCharacter = function(state) { const ch = state.current() if ( ch !== -1 && - ch !== CIRCUMFLEX_ACCENT && - ch !== DOLLAR_SIGN && - ch !== FULL_STOP && - ch !== ASTERISK && - ch !== PLUS_SIGN && - ch !== QUESTION_MARK && - ch !== LEFT_PARENTHESIS && - ch !== RIGHT_PARENTHESIS && - ch !== LEFT_SQUARE_BRACKET && - ch !== VERTICAL_LINE + ch !== 0x5E /* ^ */ && + ch !== 0x24 /* $ */ && + ch !== 0x2E /* . */ && + ch !== 0x2A /* * */ && + ch !== 0x2B /* + */ && + ch !== 0x3F /* ? */ && + ch !== 0x28 /* ( */ && + ch !== 0x29 /* ) */ && + ch !== 0x5B /* [ */ && + ch !== 0x7C /* | */ ) { state.advance() return true @@ -489,7 +429,7 @@ pp.validateRegExp_eatExtendedPatternCharacter = function(state) { // [empty] // `?` GroupName[?U] pp.validateRegExp_groupSpecifier = function(state) { - if (state.eat(QUESTION_MARK)) { + if (state.eat(0x3F /* ? */)) { if (this.validateRegExp_eatGroupName(state)) { if (state.groupNames.indexOf(state.lastStringValue) !== -1) { state.raise("Duplicate capture group name") @@ -506,8 +446,8 @@ pp.validateRegExp_groupSpecifier = function(state) { // Note: this updates `state.lastStringValue` property with the eaten name. pp.validateRegExp_eatGroupName = function(state) { state.lastStringValue = "" - if (state.eat(LESS_THAN_SIGN)) { - if (this.validateRegExp_eatRegExpIdentifierName(state) && state.eat(GREATER_THAN_SIGN)) { + if (state.eat(0x3C /* < */)) { + if (this.validateRegExp_eatRegExpIdentifierName(state) && state.eat(0x3E /* > */)) { return true } state.raise("Invalid capture group name") @@ -541,7 +481,7 @@ pp.validateRegExp_eatRegExpIdentifierStart = function(state) { let ch = state.current() state.advance() - if (ch === REVERSE_SOLIDUS && this.validateRegExp_eatRegExpUnicodeEscapeSequence(state)) { + if (ch === 0x5C /* \ */ && this.validateRegExp_eatRegExpUnicodeEscapeSequence(state)) { ch = state.lastIntValue } if (isRegExpIdentifierStart(ch)) { @@ -553,7 +493,7 @@ pp.validateRegExp_eatRegExpIdentifierStart = function(state) { return false } function isRegExpIdentifierStart(ch) { - return isIdentifierStart(ch, true) || ch === DOLLAR_SIGN || ch === LOW_LINE + return isIdentifierStart(ch, true) || ch === 0x24 /* $ */ || ch === 0x5F /* _ */ } // RegExpIdentifierPart[U] :: @@ -568,7 +508,7 @@ pp.validateRegExp_eatRegExpIdentifierPart = function(state) { let ch = state.current() state.advance() - if (ch === REVERSE_SOLIDUS && this.validateRegExp_eatRegExpUnicodeEscapeSequence(state)) { + if (ch === 0x5C /* \ */ && this.validateRegExp_eatRegExpUnicodeEscapeSequence(state)) { ch = state.lastIntValue } if (isRegExpIdentifierPart(ch)) { @@ -580,7 +520,7 @@ pp.validateRegExp_eatRegExpIdentifierPart = function(state) { return false } function isRegExpIdentifierPart(ch) { - return isIdentifierChar(ch, true) || ch === DOLLAR_SIGN || ch === LOW_LINE || ch === ZERO_WIDTH_NON_JOINER || ch === ZERO_WIDTH_JOINER + return isIdentifierChar(ch, true) || ch === 0x24 /* $ */ || ch === 0x5F /* _ */ || ch === 0x200C /* */ || ch === 0x200D /* */ } // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-AtomEscape @@ -595,7 +535,7 @@ pp.validateRegExp_eatAtomEscape = function(state) { } if (state.switchU) { // Make the same message as V8. - if (state.current() === LATIN_SMALL_LETTER_C) { + if (state.current() === 0x63 /* c */) { state.raise("Invalid unicode escape") } state.raise("Invalid escape") @@ -621,7 +561,7 @@ pp.validateRegExp_eatBackReference = function(state) { return false } pp.validateRegExp_eatKGroupName = function(state) { - if (state.eat(LATIN_SMALL_LETTER_K)) { + if (state.eat(0x6B /* k */)) { if (this.validateRegExp_eatGroupName(state)) { state.backReferenceNames.push(state.lastStringValue) return true @@ -645,7 +585,7 @@ pp.validateRegExp_eatCharacterEscape = function(state) { } pp.validateRegExp_eatCControlLetter = function(state) { const start = state.pos - if (state.eat(LATIN_SMALL_LETTER_C)) { + if (state.eat(0x63 /* c */)) { if (this.validateRegExp_eatControlLetter(state)) { return true } @@ -654,7 +594,7 @@ pp.validateRegExp_eatCControlLetter = function(state) { return false } pp.validateRegExp_eatZero = function(state) { - if (state.current() === DIGIT_ZERO && !isDecimalDigit(state.lookahead())) { + if (state.current() === 0x30 /* 0 */ && !isDecimalDigit(state.lookahead())) { state.lastIntValue = 0 state.advance() return true @@ -665,28 +605,28 @@ pp.validateRegExp_eatZero = function(state) { // https://www.ecma-international.org/ecma-262/8.0/#prod-ControlEscape pp.validateRegExp_eatControlEscape = function(state) { const ch = state.current() - if (ch === LATIN_SMALL_LETTER_T) { - state.lastIntValue = CHARACTER_TABULATION + if (ch === 0x74 /* t */) { + state.lastIntValue = 0x09 /* \t */ state.advance() return true } - if (ch === LATIN_SMALL_LETTER_N) { - state.lastIntValue = LINE_FEED + if (ch === 0x6E /* n */) { + state.lastIntValue = 0x0A /* \n */ state.advance() return true } - if (ch === LATIN_SMALL_LETTER_V) { - state.lastIntValue = LINE_TABULATION + if (ch === 0x76 /* v */) { + state.lastIntValue = 0x0B /* \v */ state.advance() return true } - if (ch === LATIN_SMALL_LETTER_F) { - state.lastIntValue = FORM_FEED + if (ch === 0x66 /* f */) { + state.lastIntValue = 0x0C /* \f */ state.advance() return true } - if (ch === LATIN_SMALL_LETTER_R) { - state.lastIntValue = CARRIAGE_RETURN + if (ch === 0x72 /* r */) { + state.lastIntValue = 0x0D /* \r */ state.advance() return true } @@ -705,8 +645,8 @@ pp.validateRegExp_eatControlLetter = function(state) { } function isControlLetter(ch) { return ( - (ch >= LATIN_CAPITAL_LETTER_A && ch <= LATIN_CAPITAL_LETTER_Z) || - (ch >= LATIN_SMALL_LETTER_A && ch <= LATIN_SMALL_LETTER_Z) + (ch >= 0x41 /* A */ && ch <= 0x5A /* Z */) || + (ch >= 0x61 /* a */ && ch <= 0x7A /* z */) ) } @@ -714,12 +654,12 @@ function isControlLetter(ch) { pp.validateRegExp_eatRegExpUnicodeEscapeSequence = function(state) { const start = state.pos - if (state.eat(LATIN_SMALL_LETTER_U)) { + if (state.eat(0x75 /* u */)) { if (this.validateRegExp_eatFixedHexDigits(state, 4)) { const lead = state.lastIntValue if (state.switchU && lead >= 0xD800 && lead <= 0xDBFF) { const leadSurrogateEnd = state.pos - if (state.eat(REVERSE_SOLIDUS) && state.eat(LATIN_SMALL_LETTER_U) && this.validateRegExp_eatFixedHexDigits(state, 4)) { + if (state.eat(0x5C /* \ */) && state.eat(0x75 /* u */) && this.validateRegExp_eatFixedHexDigits(state, 4)) { const trail = state.lastIntValue if (trail >= 0xDC00 && trail <= 0xDFFF) { state.lastIntValue = (lead - 0xD800) * 0x400 + (trail - 0xDC00) + 0x10000 @@ -733,9 +673,9 @@ pp.validateRegExp_eatRegExpUnicodeEscapeSequence = function(state) { } if ( state.switchU && - state.eat(LEFT_CURLY_BRACKET) && + state.eat(0x7B /* { */) && this.validateRegExp_eatHexDigits(state) && - state.eat(RIGHT_CURLY_BRACKET) && + state.eat(0x7D /* } */) && isValidUnicode(state.lastIntValue) ) { return true @@ -758,15 +698,15 @@ pp.validateRegExp_eatIdentityEscape = function(state) { if (this.validateRegExp_eatSyntaxCharacter(state)) { return true } - if (state.eat(SOLIDUS)) { - state.lastIntValue = SOLIDUS + if (state.eat(0x2F /* / */)) { + state.lastIntValue = 0x2F /* / */ return true } return false } const ch = state.current() - if (ch !== LATIN_SMALL_LETTER_C && (!state.switchN || ch !== LATIN_SMALL_LETTER_K)) { + if (ch !== 0x63 /* c */ && (!state.switchN || ch !== 0x6B /* k */)) { state.lastIntValue = ch state.advance() return true @@ -779,11 +719,11 @@ pp.validateRegExp_eatIdentityEscape = function(state) { pp.validateRegExp_eatDecimalEscape = function(state) { state.lastIntValue = 0 let ch = state.current() - if (ch >= DIGIT_ONE && ch <= DIGIT_NINE) { + if (ch >= 0x31 /* 1 */ && ch <= 0x39 /* 9 */) { do { - state.lastIntValue = 10 * state.lastIntValue + (ch - DIGIT_ZERO) + state.lastIntValue = 10 * state.lastIntValue + (ch - 0x30 /* 0 */) state.advance() - } while ((ch = state.current()) >= DIGIT_ZERO && ch <= DIGIT_NINE) + } while ((ch = state.current()) >= 0x30 /* 0 */ && ch <= 0x39 /* 9 */) return true } return false @@ -802,14 +742,14 @@ pp.validateRegExp_eatCharacterClassEscape = function(state) { if ( state.switchU && this.options.ecmaVersion >= 9 && - (ch === LATIN_CAPITAL_LETTER_P || ch === LATIN_SMALL_LETTER_P) + (ch === 0x50 /* P */ || ch === 0x70 /* p */) ) { state.lastIntValue = -1 state.advance() if ( - state.eat(LEFT_CURLY_BRACKET) && + state.eat(0x7B /* { */) && this.validateRegExp_eatUnicodePropertyValueExpression(state) && - state.eat(RIGHT_CURLY_BRACKET) + state.eat(0x7D /* } */) ) { return true } @@ -820,12 +760,12 @@ pp.validateRegExp_eatCharacterClassEscape = function(state) { } function isCharacterClassEscape(ch) { return ( - ch === LATIN_SMALL_LETTER_D || - ch === LATIN_CAPITAL_LETTER_D || - ch === LATIN_SMALL_LETTER_S || - ch === LATIN_CAPITAL_LETTER_S || - ch === LATIN_SMALL_LETTER_W || - ch === LATIN_CAPITAL_LETTER_W + ch === 0x64 /* d */ || + ch === 0x44 /* D */ || + ch === 0x73 /* s */ || + ch === 0x53 /* S */ || + ch === 0x77 /* w */ || + ch === 0x57 /* W */ ) } @@ -836,7 +776,7 @@ pp.validateRegExp_eatUnicodePropertyValueExpression = function(state) { const start = state.pos // UnicodePropertyName `=` UnicodePropertyValue - if (this.validateRegExp_eatUnicodePropertyName(state) && state.eat(EQUALS_SIGN)) { + if (this.validateRegExp_eatUnicodePropertyName(state) && state.eat(0x3D /* = */)) { const name = state.lastStringValue if (this.validateRegExp_eatUnicodePropertyValue(state)) { const value = state.lastStringValue @@ -877,7 +817,7 @@ pp.validateRegExp_eatUnicodePropertyName = function(state) { return state.lastStringValue !== "" } function isUnicodePropertyNameCharacter(ch) { - return isControlLetter(ch) || ch === LOW_LINE + return isControlLetter(ch) || ch === 0x5F /* _ */ } // UnicodePropertyValue :: @@ -903,10 +843,10 @@ pp.validateRegExp_eatLoneUnicodePropertyNameOrValue = function(state) { // https://www.ecma-international.org/ecma-262/8.0/#prod-CharacterClass pp.validateRegExp_eatCharacterClass = function(state) { - if (state.eat(LEFT_SQUARE_BRACKET)) { - state.eat(CIRCUMFLEX_ACCENT) + if (state.eat(0x5B /* [ */)) { + state.eat(0x5E /* ^ */) this.validateRegExp_classRanges(state) - if (state.eat(RIGHT_SQUARE_BRACKET)) { + if (state.eat(0x5D /* [ */)) { return true } // Unreachable since it threw "unterminated regular expression" error before. @@ -921,7 +861,7 @@ pp.validateRegExp_eatCharacterClass = function(state) { pp.validateRegExp_classRanges = function(state) { while (this.validateRegExp_eatClassAtom(state)) { const left = state.lastIntValue - if (state.eat(HYPHEN_MINUS) && this.validateRegExp_eatClassAtom(state)) { + if (state.eat(0x2D /* - */) && this.validateRegExp_eatClassAtom(state)) { const right = state.lastIntValue if (state.switchU && (left === -1 || right === -1)) { state.raise("Invalid character class") @@ -938,14 +878,14 @@ pp.validateRegExp_classRanges = function(state) { pp.validateRegExp_eatClassAtom = function(state) { const start = state.pos - if (state.eat(REVERSE_SOLIDUS)) { + if (state.eat(0x5C /* \ */)) { if (this.validateRegExp_eatClassEscape(state)) { return true } if (state.switchU) { // Make the same message as V8. const ch = state.current() - if (ch === LATIN_SMALL_LETTER_C || isOctalDigit(ch)) { + if (ch === 0x63 /* c */ || isOctalDigit(ch)) { state.raise("Invalid class escape") } state.raise("Invalid escape") @@ -954,7 +894,7 @@ pp.validateRegExp_eatClassAtom = function(state) { } const ch = state.current() - if (ch !== RIGHT_SQUARE_BRACKET) { + if (ch !== 0x5D /* [ */) { state.lastIntValue = ch state.advance() return true @@ -967,17 +907,17 @@ pp.validateRegExp_eatClassAtom = function(state) { pp.validateRegExp_eatClassEscape = function(state) { const start = state.pos - if (state.eat(LATIN_SMALL_LETTER_B)) { - state.lastIntValue = BACKSPACE + if (state.eat(0x62 /* b */)) { + state.lastIntValue = 0x08 /* */ return true } - if (state.switchU && state.eat(HYPHEN_MINUS)) { - state.lastIntValue = HYPHEN_MINUS + if (state.switchU && state.eat(0x2D /* - */)) { + state.lastIntValue = 0x2D /* - */ return true } - if (!state.switchU && state.eat(LATIN_SMALL_LETTER_C)) { + if (!state.switchU && state.eat(0x63 /* c */)) { if (this.validateRegExp_eatClassControlLetter(state)) { return true } @@ -993,7 +933,7 @@ pp.validateRegExp_eatClassEscape = function(state) { // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-ClassControlLetter pp.validateRegExp_eatClassControlLetter = function(state) { const ch = state.current() - if (isDecimalDigit(ch) || ch === LOW_LINE) { + if (isDecimalDigit(ch) || ch === 0x5F /* _ */) { state.lastIntValue = ch % 0x20 state.advance() return true @@ -1004,7 +944,7 @@ pp.validateRegExp_eatClassControlLetter = function(state) { // https://www.ecma-international.org/ecma-262/8.0/#prod-HexEscapeSequence pp.validateRegExp_eatHexEscapeSequence = function(state) { const start = state.pos - if (state.eat(LATIN_SMALL_LETTER_X)) { + if (state.eat(0x78 /* x */)) { if (this.validateRegExp_eatFixedHexDigits(state, 2)) { return true } @@ -1022,13 +962,13 @@ pp.validateRegExp_eatDecimalDigits = function(state) { let ch = 0 state.lastIntValue = 0 while (isDecimalDigit(ch = state.current())) { - state.lastIntValue = 10 * state.lastIntValue + (ch - DIGIT_ZERO) + state.lastIntValue = 10 * state.lastIntValue + (ch - 0x30 /* 0 */) state.advance() } return state.pos !== start } function isDecimalDigit(ch) { - return ch >= DIGIT_ZERO && ch <= DIGIT_NINE + return ch >= 0x30 /* 0 */ && ch <= 0x39 /* 9 */ } // https://www.ecma-international.org/ecma-262/8.0/#prod-HexDigits @@ -1044,19 +984,19 @@ pp.validateRegExp_eatHexDigits = function(state) { } function isHexDigit(ch) { return ( - (ch >= DIGIT_ZERO && ch <= DIGIT_NINE) || - (ch >= LATIN_CAPITAL_LETTER_A && ch <= LATIN_CAPITAL_LETTER_F) || - (ch >= LATIN_SMALL_LETTER_A && ch <= LATIN_SMALL_LETTER_F) + (ch >= 0x30 /* 0 */ && ch <= 0x39 /* 9 */) || + (ch >= 0x41 /* A */ && ch <= 0x46 /* F */) || + (ch >= 0x61 /* a */ && ch <= 0x66 /* f */) ) } function hexToInt(ch) { - if (ch >= LATIN_CAPITAL_LETTER_A && ch <= LATIN_CAPITAL_LETTER_F) { - return 10 + (ch - LATIN_CAPITAL_LETTER_A) + if (ch >= 0x41 /* A */ && ch <= 0x46 /* F */) { + return 10 + (ch - 0x41 /* A */) } - if (ch >= LATIN_SMALL_LETTER_A && ch <= LATIN_SMALL_LETTER_F) { - return 10 + (ch - LATIN_SMALL_LETTER_A) + if (ch >= 0x61 /* a */ && ch <= 0x66 /* f */) { + return 10 + (ch - 0x61 /* a */) } - return ch - DIGIT_ZERO + return ch - 0x30 /* 0 */ } // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-LegacyOctalEscapeSequence @@ -1083,7 +1023,7 @@ pp.validateRegExp_eatLegacyOctalEscapeSequence = function(state) { pp.validateRegExp_eatOctalDigit = function(state) { const ch = state.current() if (isOctalDigit(ch)) { - state.lastIntValue = ch - DIGIT_ZERO + state.lastIntValue = ch - 0x30 /* 0 */ state.advance() return true } @@ -1091,7 +1031,7 @@ pp.validateRegExp_eatOctalDigit = function(state) { return false } function isOctalDigit(ch) { - return ch >= DIGIT_ZERO && ch <= DIGIT_SEVEN + return ch >= 0x30 /* 0 */ && ch <= 0x37 /* 7 */ } // https://www.ecma-international.org/ecma-262/8.0/#prod-Hex4Digits From df4b452048c4ae3e83a66c1e94847689a37f40f0 Mon Sep 17 00:00:00 2001 From: Toru Nagashima Date: Sat, 17 Feb 2018 14:12:20 +0900 Subject: [PATCH 17/18] make shorter method names --- src/regexp.js | 250 +++++++++++++++++++++++++------------------------- 1 file changed, 125 insertions(+), 125 deletions(-) diff --git a/src/regexp.js b/src/regexp.js index 6041f9135..622b3cdf1 100644 --- a/src/regexp.js +++ b/src/regexp.js @@ -119,7 +119,7 @@ pp.validateRegExpFlags = function(state) { * @returns {void} */ pp.validateRegExpPattern = function(state) { - this.validateRegExp_pattern(state) + this.regexp_pattern(state) // The goal symbol for the parse is |Pattern[~U, ~N]|. If the result of // parsing contains a |GroupName|, reparse with the goal symbol @@ -128,12 +128,12 @@ pp.validateRegExpPattern = function(state) { // were not matched by the parse, or if any Early Error conditions exist. if (!state.switchN && this.options.ecmaVersion >= 9 && state.groupNames.length > 0) { state.switchN = true - this.validateRegExp_pattern(state) + this.regexp_pattern(state) } } // https://www.ecma-international.org/ecma-262/8.0/#prod-Pattern -pp.validateRegExp_pattern = function(state) { +pp.regexp_pattern = function(state) { state.pos = 0 state.lastIntValue = 0 state.lastStringValue = "" @@ -143,7 +143,7 @@ pp.validateRegExp_pattern = function(state) { state.groupNames.length = 0 state.backReferenceNames.length = 0 - this.validateRegExp_disjunction(state) + this.regexp_disjunction(state) if (state.pos !== state.source.length) { // Make the same messages as V8. @@ -165,14 +165,14 @@ pp.validateRegExp_pattern = function(state) { } // https://www.ecma-international.org/ecma-262/8.0/#prod-Disjunction -pp.validateRegExp_disjunction = function(state) { - this.validateRegExp_alternative(state) +pp.regexp_disjunction = function(state) { + this.regexp_alternative(state) while (state.eat(0x7C /* | */)) { - this.validateRegExp_alternative(state) + this.regexp_alternative(state) } // Make the same message as V8. - if (this.validateRegExp_eatQuantifier(state, true)) { + if (this.regexp_eatQuantifier(state, true)) { state.raise("Nothing to repeat") } if (state.eat(0x7B /* { */)) { @@ -181,18 +181,18 @@ pp.validateRegExp_disjunction = function(state) { } // https://www.ecma-international.org/ecma-262/8.0/#prod-Alternative -pp.validateRegExp_alternative = function(state) { - while (state.pos < state.source.length && this.validateRegExp_eatTerm(state)) +pp.regexp_alternative = function(state) { + while (state.pos < state.source.length && this.regexp_eatTerm(state)) ; } // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-Term -pp.validateRegExp_eatTerm = function(state) { - if (this.validateRegExp_eatAssertion(state)) { +pp.regexp_eatTerm = function(state) { + if (this.regexp_eatAssertion(state)) { // Handle `QuantifiableAssertion Quantifier` alternative. // `state.lastAssertionIsQuantifiable` is true if the last eaten Assertion // is a QuantifiableAssertion. - if (state.lastAssertionIsQuantifiable && this.validateRegExp_eatQuantifier(state)) { + if (state.lastAssertionIsQuantifiable && this.regexp_eatQuantifier(state)) { // Make the same message as V8. if (state.switchU) { state.raise("Invalid quantifier") @@ -201,8 +201,8 @@ pp.validateRegExp_eatTerm = function(state) { return true } - if (state.switchU ? this.validateRegExp_eatAtom(state) : this.validateRegExp_eatExtendedAtom(state)) { - this.validateRegExp_eatQuantifier(state) + if (state.switchU ? this.regexp_eatAtom(state) : this.regexp_eatExtendedAtom(state)) { + this.regexp_eatQuantifier(state) return true } @@ -210,7 +210,7 @@ pp.validateRegExp_eatTerm = function(state) { } // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-Assertion -pp.validateRegExp_eatAssertion = function(state) { +pp.regexp_eatAssertion = function(state) { const start = state.pos state.lastAssertionIsQuantifiable = false @@ -234,7 +234,7 @@ pp.validateRegExp_eatAssertion = function(state) { lookbehind = state.eat(0x3C /* < */) } if (state.eat(0x3D /* = */) || state.eat(0x21 /* ! */)) { - this.validateRegExp_disjunction(state) + this.regexp_disjunction(state) if (!state.eat(0x29 /* ) */)) { state.raise("Unterminated group") } @@ -248,8 +248,8 @@ pp.validateRegExp_eatAssertion = function(state) { } // https://www.ecma-international.org/ecma-262/8.0/#prod-Quantifier -pp.validateRegExp_eatQuantifier = function(state, noError = false) { - if (this.validateRegExp_eatQuantifierPrefix(state, noError)) { +pp.regexp_eatQuantifier = function(state, noError = false) { + if (this.regexp_eatQuantifierPrefix(state, noError)) { state.eat(0x3F /* ? */) return true } @@ -257,21 +257,21 @@ pp.validateRegExp_eatQuantifier = function(state, noError = false) { } // https://www.ecma-international.org/ecma-262/8.0/#prod-QuantifierPrefix -pp.validateRegExp_eatQuantifierPrefix = function(state, noError) { +pp.regexp_eatQuantifierPrefix = function(state, noError) { return ( state.eat(0x2A /* * */) || state.eat(0x2B /* + */) || state.eat(0x3F /* ? */) || - this.validateRegExp_eatBracedQuantifier(state, noError) + this.regexp_eatBracedQuantifier(state, noError) ) } -pp.validateRegExp_eatBracedQuantifier = function(state, noError) { +pp.regexp_eatBracedQuantifier = function(state, noError) { const start = state.pos if (state.eat(0x7B /* { */)) { let min = 0, max = -1 - if (this.validateRegExp_eatDecimalDigits(state)) { + if (this.regexp_eatDecimalDigits(state)) { min = state.lastIntValue - if (state.eat(0x2C /* , */) && this.validateRegExp_eatDecimalDigits(state)) { + if (state.eat(0x2C /* , */) && this.regexp_eatDecimalDigits(state)) { max = state.lastIntValue } if (state.eat(0x7D /* } */)) { @@ -291,31 +291,31 @@ pp.validateRegExp_eatBracedQuantifier = function(state, noError) { } // https://www.ecma-international.org/ecma-262/8.0/#prod-Atom -pp.validateRegExp_eatAtom = function(state) { +pp.regexp_eatAtom = function(state) { return ( - this.validateRegExp_eatPatternCharacters(state) || + this.regexp_eatPatternCharacters(state) || state.eat(0x2E /* . */) || - this.validateRegExp_eatReverseSolidusAtomEscape(state) || - this.validateRegExp_eatCharacterClass(state) || - this.validateRegExp_eatUncapturingGroup(state) || - this.validateRegExp_eatCapturingGroup(state) + this.regexp_eatReverseSolidusAtomEscape(state) || + this.regexp_eatCharacterClass(state) || + this.regexp_eatUncapturingGroup(state) || + this.regexp_eatCapturingGroup(state) ) } -pp.validateRegExp_eatReverseSolidusAtomEscape = function(state) { +pp.regexp_eatReverseSolidusAtomEscape = function(state) { const start = state.pos if (state.eat(0x5C /* \ */)) { - if (this.validateRegExp_eatAtomEscape(state)) { + if (this.regexp_eatAtomEscape(state)) { return true } state.pos = start } return false } -pp.validateRegExp_eatUncapturingGroup = function(state) { +pp.regexp_eatUncapturingGroup = function(state) { const start = state.pos if (state.eat(0x28 /* ( */)) { if (state.eat(0x3F /* ? */) && state.eat(0x3A /* : */)) { - this.validateRegExp_disjunction(state) + this.regexp_disjunction(state) if (state.eat(0x29 /* ) */)) { return true } @@ -325,14 +325,14 @@ pp.validateRegExp_eatUncapturingGroup = function(state) { } return false } -pp.validateRegExp_eatCapturingGroup = function(state) { +pp.regexp_eatCapturingGroup = function(state) { if (state.eat(0x28 /* ( */)) { if (this.options.ecmaVersion >= 9) { - this.validateRegExp_groupSpecifier(state) + this.regexp_groupSpecifier(state) } else if (state.current() === 0x3F /* ? */) { state.raise("Invalid group") } - this.validateRegExp_disjunction(state) + this.regexp_disjunction(state) if (state.eat(0x29 /* ) */)) { state.numCapturingParens += 1 return true @@ -343,28 +343,28 @@ pp.validateRegExp_eatCapturingGroup = function(state) { } // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-ExtendedAtom -pp.validateRegExp_eatExtendedAtom = function(state) { +pp.regexp_eatExtendedAtom = function(state) { return ( state.eat(0x2E /* . */) || - this.validateRegExp_eatReverseSolidusAtomEscape(state) || - this.validateRegExp_eatCharacterClass(state) || - this.validateRegExp_eatUncapturingGroup(state) || - this.validateRegExp_eatCapturingGroup(state) || - this.validateRegExp_eatInvalidBracedQuantifier(state) || - this.validateRegExp_eatExtendedPatternCharacter(state) + this.regexp_eatReverseSolidusAtomEscape(state) || + this.regexp_eatCharacterClass(state) || + this.regexp_eatUncapturingGroup(state) || + this.regexp_eatCapturingGroup(state) || + this.regexp_eatInvalidBracedQuantifier(state) || + this.regexp_eatExtendedPatternCharacter(state) ) } // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-InvalidBracedQuantifier -pp.validateRegExp_eatInvalidBracedQuantifier = function(state) { - if (this.validateRegExp_eatBracedQuantifier(state, true)) { +pp.regexp_eatInvalidBracedQuantifier = function(state) { + if (this.regexp_eatBracedQuantifier(state, true)) { state.raise("Nothing to repeat") } return false } // https://www.ecma-international.org/ecma-262/8.0/#prod-SyntaxCharacter -pp.validateRegExp_eatSyntaxCharacter = function(state) { +pp.regexp_eatSyntaxCharacter = function(state) { const ch = state.current() if (isSyntaxCharacter(ch)) { state.lastIntValue = ch @@ -394,7 +394,7 @@ function isSyntaxCharacter(ch) { // https://www.ecma-international.org/ecma-262/8.0/#prod-PatternCharacter // But eat eager. -pp.validateRegExp_eatPatternCharacters = function(state) { +pp.regexp_eatPatternCharacters = function(state) { const start = state.pos let ch = 0 while ((ch = state.current()) !== -1 && !isSyntaxCharacter(ch)) { @@ -404,7 +404,7 @@ pp.validateRegExp_eatPatternCharacters = function(state) { } // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-ExtendedPatternCharacter -pp.validateRegExp_eatExtendedPatternCharacter = function(state) { +pp.regexp_eatExtendedPatternCharacter = function(state) { const ch = state.current() if ( ch !== -1 && @@ -428,9 +428,9 @@ pp.validateRegExp_eatExtendedPatternCharacter = function(state) { // GroupSpecifier[U] :: // [empty] // `?` GroupName[?U] -pp.validateRegExp_groupSpecifier = function(state) { +pp.regexp_groupSpecifier = function(state) { if (state.eat(0x3F /* ? */)) { - if (this.validateRegExp_eatGroupName(state)) { + if (this.regexp_eatGroupName(state)) { if (state.groupNames.indexOf(state.lastStringValue) !== -1) { state.raise("Duplicate capture group name") } @@ -444,10 +444,10 @@ pp.validateRegExp_groupSpecifier = function(state) { // GroupName[U] :: // `<` RegExpIdentifierName[?U] `>` // Note: this updates `state.lastStringValue` property with the eaten name. -pp.validateRegExp_eatGroupName = function(state) { +pp.regexp_eatGroupName = function(state) { state.lastStringValue = "" if (state.eat(0x3C /* < */)) { - if (this.validateRegExp_eatRegExpIdentifierName(state) && state.eat(0x3E /* > */)) { + if (this.regexp_eatRegExpIdentifierName(state) && state.eat(0x3E /* > */)) { return true } state.raise("Invalid capture group name") @@ -459,11 +459,11 @@ pp.validateRegExp_eatGroupName = function(state) { // RegExpIdentifierStart[?U] // RegExpIdentifierName[?U] RegExpIdentifierPart[?U] // Note: this updates `state.lastStringValue` property with the eaten name. -pp.validateRegExp_eatRegExpIdentifierName = function(state) { +pp.regexp_eatRegExpIdentifierName = function(state) { state.lastStringValue = "" - if (this.validateRegExp_eatRegExpIdentifierStart(state)) { + if (this.regexp_eatRegExpIdentifierStart(state)) { state.lastStringValue += codePointToString(state.lastIntValue) - while (this.validateRegExp_eatRegExpIdentifierPart(state)) { + while (this.regexp_eatRegExpIdentifierPart(state)) { state.lastStringValue += codePointToString(state.lastIntValue) } return true @@ -476,12 +476,12 @@ pp.validateRegExp_eatRegExpIdentifierName = function(state) { // `$` // `_` // `\` RegExpUnicodeEscapeSequence[?U] -pp.validateRegExp_eatRegExpIdentifierStart = function(state) { +pp.regexp_eatRegExpIdentifierStart = function(state) { const start = state.pos let ch = state.current() state.advance() - if (ch === 0x5C /* \ */ && this.validateRegExp_eatRegExpUnicodeEscapeSequence(state)) { + if (ch === 0x5C /* \ */ && this.regexp_eatRegExpUnicodeEscapeSequence(state)) { ch = state.lastIntValue } if (isRegExpIdentifierStart(ch)) { @@ -503,12 +503,12 @@ function isRegExpIdentifierStart(ch) { // `\` RegExpUnicodeEscapeSequence[?U] // // -pp.validateRegExp_eatRegExpIdentifierPart = function(state) { +pp.regexp_eatRegExpIdentifierPart = function(state) { const start = state.pos let ch = state.current() state.advance() - if (ch === 0x5C /* \ */ && this.validateRegExp_eatRegExpUnicodeEscapeSequence(state)) { + if (ch === 0x5C /* \ */ && this.regexp_eatRegExpUnicodeEscapeSequence(state)) { ch = state.lastIntValue } if (isRegExpIdentifierPart(ch)) { @@ -524,12 +524,12 @@ function isRegExpIdentifierPart(ch) { } // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-AtomEscape -pp.validateRegExp_eatAtomEscape = function(state) { +pp.regexp_eatAtomEscape = function(state) { if ( - this.validateRegExp_eatBackReference(state) || - this.validateRegExp_eatCharacterClassEscape(state) || - this.validateRegExp_eatCharacterEscape(state) || - (state.switchN && this.validateRegExp_eatKGroupName(state)) + this.regexp_eatBackReference(state) || + this.regexp_eatCharacterClassEscape(state) || + this.regexp_eatCharacterEscape(state) || + (state.switchN && this.regexp_eatKGroupName(state)) ) { return true } @@ -542,9 +542,9 @@ pp.validateRegExp_eatAtomEscape = function(state) { } return false } -pp.validateRegExp_eatBackReference = function(state) { +pp.regexp_eatBackReference = function(state) { const start = state.pos - if (this.validateRegExp_eatDecimalEscape(state)) { + if (this.regexp_eatDecimalEscape(state)) { const n = state.lastIntValue if (state.switchU) { // For SyntaxError in https://www.ecma-international.org/ecma-262/8.0/#sec-atomescape @@ -560,9 +560,9 @@ pp.validateRegExp_eatBackReference = function(state) { } return false } -pp.validateRegExp_eatKGroupName = function(state) { +pp.regexp_eatKGroupName = function(state) { if (state.eat(0x6B /* k */)) { - if (this.validateRegExp_eatGroupName(state)) { + if (this.regexp_eatGroupName(state)) { state.backReferenceNames.push(state.lastStringValue) return true } @@ -572,28 +572,28 @@ pp.validateRegExp_eatKGroupName = function(state) { } // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-CharacterEscape -pp.validateRegExp_eatCharacterEscape = function(state) { +pp.regexp_eatCharacterEscape = function(state) { return ( - this.validateRegExp_eatControlEscape(state) || - this.validateRegExp_eatCControlLetter(state) || - this.validateRegExp_eatZero(state) || - this.validateRegExp_eatHexEscapeSequence(state) || - this.validateRegExp_eatRegExpUnicodeEscapeSequence(state) || - (!state.switchU && this.validateRegExp_eatLegacyOctalEscapeSequence(state)) || - this.validateRegExp_eatIdentityEscape(state) + this.regexp_eatControlEscape(state) || + this.regexp_eatCControlLetter(state) || + this.regexp_eatZero(state) || + this.regexp_eatHexEscapeSequence(state) || + this.regexp_eatRegExpUnicodeEscapeSequence(state) || + (!state.switchU && this.regexp_eatLegacyOctalEscapeSequence(state)) || + this.regexp_eatIdentityEscape(state) ) } -pp.validateRegExp_eatCControlLetter = function(state) { +pp.regexp_eatCControlLetter = function(state) { const start = state.pos if (state.eat(0x63 /* c */)) { - if (this.validateRegExp_eatControlLetter(state)) { + if (this.regexp_eatControlLetter(state)) { return true } state.pos = start } return false } -pp.validateRegExp_eatZero = function(state) { +pp.regexp_eatZero = function(state) { if (state.current() === 0x30 /* 0 */ && !isDecimalDigit(state.lookahead())) { state.lastIntValue = 0 state.advance() @@ -603,7 +603,7 @@ pp.validateRegExp_eatZero = function(state) { } // https://www.ecma-international.org/ecma-262/8.0/#prod-ControlEscape -pp.validateRegExp_eatControlEscape = function(state) { +pp.regexp_eatControlEscape = function(state) { const ch = state.current() if (ch === 0x74 /* t */) { state.lastIntValue = 0x09 /* \t */ @@ -634,7 +634,7 @@ pp.validateRegExp_eatControlEscape = function(state) { } // https://www.ecma-international.org/ecma-262/8.0/#prod-ControlLetter -pp.validateRegExp_eatControlLetter = function(state) { +pp.regexp_eatControlLetter = function(state) { const ch = state.current() if (isControlLetter(ch)) { state.lastIntValue = ch % 0x20 @@ -651,15 +651,15 @@ function isControlLetter(ch) { } // https://www.ecma-international.org/ecma-262/8.0/#prod-RegExpUnicodeEscapeSequence -pp.validateRegExp_eatRegExpUnicodeEscapeSequence = function(state) { +pp.regexp_eatRegExpUnicodeEscapeSequence = function(state) { const start = state.pos if (state.eat(0x75 /* u */)) { - if (this.validateRegExp_eatFixedHexDigits(state, 4)) { + if (this.regexp_eatFixedHexDigits(state, 4)) { const lead = state.lastIntValue if (state.switchU && lead >= 0xD800 && lead <= 0xDBFF) { const leadSurrogateEnd = state.pos - if (state.eat(0x5C /* \ */) && state.eat(0x75 /* u */) && this.validateRegExp_eatFixedHexDigits(state, 4)) { + if (state.eat(0x5C /* \ */) && state.eat(0x75 /* u */) && this.regexp_eatFixedHexDigits(state, 4)) { const trail = state.lastIntValue if (trail >= 0xDC00 && trail <= 0xDFFF) { state.lastIntValue = (lead - 0xD800) * 0x400 + (trail - 0xDC00) + 0x10000 @@ -674,7 +674,7 @@ pp.validateRegExp_eatRegExpUnicodeEscapeSequence = function(state) { if ( state.switchU && state.eat(0x7B /* { */) && - this.validateRegExp_eatHexDigits(state) && + this.regexp_eatHexDigits(state) && state.eat(0x7D /* } */) && isValidUnicode(state.lastIntValue) ) { @@ -693,9 +693,9 @@ function isValidUnicode(ch) { } // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-IdentityEscape -pp.validateRegExp_eatIdentityEscape = function(state) { +pp.regexp_eatIdentityEscape = function(state) { if (state.switchU) { - if (this.validateRegExp_eatSyntaxCharacter(state)) { + if (this.regexp_eatSyntaxCharacter(state)) { return true } if (state.eat(0x2F /* / */)) { @@ -716,7 +716,7 @@ pp.validateRegExp_eatIdentityEscape = function(state) { } // https://www.ecma-international.org/ecma-262/8.0/#prod-DecimalEscape -pp.validateRegExp_eatDecimalEscape = function(state) { +pp.regexp_eatDecimalEscape = function(state) { state.lastIntValue = 0 let ch = state.current() if (ch >= 0x31 /* 1 */ && ch <= 0x39 /* 9 */) { @@ -730,7 +730,7 @@ pp.validateRegExp_eatDecimalEscape = function(state) { } // https://www.ecma-international.org/ecma-262/8.0/#prod-CharacterClassEscape -pp.validateRegExp_eatCharacterClassEscape = function(state) { +pp.regexp_eatCharacterClassEscape = function(state) { const ch = state.current() if (isCharacterClassEscape(ch)) { @@ -748,7 +748,7 @@ pp.validateRegExp_eatCharacterClassEscape = function(state) { state.advance() if ( state.eat(0x7B /* { */) && - this.validateRegExp_eatUnicodePropertyValueExpression(state) && + this.regexp_eatUnicodePropertyValueExpression(state) && state.eat(0x7D /* } */) ) { return true @@ -772,34 +772,34 @@ function isCharacterClassEscape(ch) { // UnicodePropertyValueExpression :: // UnicodePropertyName `=` UnicodePropertyValue // LoneUnicodePropertyNameOrValue -pp.validateRegExp_eatUnicodePropertyValueExpression = function(state) { +pp.regexp_eatUnicodePropertyValueExpression = function(state) { const start = state.pos // UnicodePropertyName `=` UnicodePropertyValue - if (this.validateRegExp_eatUnicodePropertyName(state) && state.eat(0x3D /* = */)) { + if (this.regexp_eatUnicodePropertyName(state) && state.eat(0x3D /* = */)) { const name = state.lastStringValue - if (this.validateRegExp_eatUnicodePropertyValue(state)) { + if (this.regexp_eatUnicodePropertyValue(state)) { const value = state.lastStringValue - this.validateRegExp_validateUnicodePropertyNameAndValue(state, name, value) + this.regexp_validateUnicodePropertyNameAndValue(state, name, value) return true } } state.pos = start // LoneUnicodePropertyNameOrValue - if (this.validateRegExp_eatLoneUnicodePropertyNameOrValue(state)) { + if (this.regexp_eatLoneUnicodePropertyNameOrValue(state)) { const nameOrValue = state.lastStringValue - this.validateRegExp_validateUnicodePropertyNameOrValue(state, nameOrValue) + this.regexp_validateUnicodePropertyNameOrValue(state, nameOrValue) return true } return false } -pp.validateRegExp_validateUnicodePropertyNameAndValue = function(state, name, value) { +pp.regexp_validateUnicodePropertyNameAndValue = function(state, name, value) { if (!UNICODE_PROPERTY_VALUES.hasOwnProperty(name) || UNICODE_PROPERTY_VALUES[name].indexOf(value) === -1) { state.raise("Invalid property name") } } -pp.validateRegExp_validateUnicodePropertyNameOrValue = function(state, nameOrValue) { +pp.regexp_validateUnicodePropertyNameOrValue = function(state, nameOrValue) { if (UNICODE_PROPERTY_VALUES.$LONE.indexOf(nameOrValue) === -1) { state.raise("Invalid property name") } @@ -807,7 +807,7 @@ pp.validateRegExp_validateUnicodePropertyNameOrValue = function(state, nameOrVal // UnicodePropertyName :: // UnicodePropertyNameCharacters -pp.validateRegExp_eatUnicodePropertyName = function(state) { +pp.regexp_eatUnicodePropertyName = function(state) { let ch = 0 state.lastStringValue = "" while (isUnicodePropertyNameCharacter(ch = state.current())) { @@ -822,7 +822,7 @@ function isUnicodePropertyNameCharacter(ch) { // UnicodePropertyValue :: // UnicodePropertyValueCharacters -pp.validateRegExp_eatUnicodePropertyValue = function(state) { +pp.regexp_eatUnicodePropertyValue = function(state) { let ch = 0 state.lastStringValue = "" while (isUnicodePropertyValueCharacter(ch = state.current())) { @@ -837,15 +837,15 @@ function isUnicodePropertyValueCharacter(ch) { // LoneUnicodePropertyNameOrValue :: // UnicodePropertyValueCharacters -pp.validateRegExp_eatLoneUnicodePropertyNameOrValue = function(state) { - return this.validateRegExp_eatUnicodePropertyValue(state) +pp.regexp_eatLoneUnicodePropertyNameOrValue = function(state) { + return this.regexp_eatUnicodePropertyValue(state) } // https://www.ecma-international.org/ecma-262/8.0/#prod-CharacterClass -pp.validateRegExp_eatCharacterClass = function(state) { +pp.regexp_eatCharacterClass = function(state) { if (state.eat(0x5B /* [ */)) { state.eat(0x5E /* ^ */) - this.validateRegExp_classRanges(state) + this.regexp_classRanges(state) if (state.eat(0x5D /* [ */)) { return true } @@ -858,10 +858,10 @@ pp.validateRegExp_eatCharacterClass = function(state) { // https://www.ecma-international.org/ecma-262/8.0/#prod-ClassRanges // https://www.ecma-international.org/ecma-262/8.0/#prod-NonemptyClassRanges // https://www.ecma-international.org/ecma-262/8.0/#prod-NonemptyClassRangesNoDash -pp.validateRegExp_classRanges = function(state) { - while (this.validateRegExp_eatClassAtom(state)) { +pp.regexp_classRanges = function(state) { + while (this.regexp_eatClassAtom(state)) { const left = state.lastIntValue - if (state.eat(0x2D /* - */) && this.validateRegExp_eatClassAtom(state)) { + if (state.eat(0x2D /* - */) && this.regexp_eatClassAtom(state)) { const right = state.lastIntValue if (state.switchU && (left === -1 || right === -1)) { state.raise("Invalid character class") @@ -875,11 +875,11 @@ pp.validateRegExp_classRanges = function(state) { // https://www.ecma-international.org/ecma-262/8.0/#prod-ClassAtom // https://www.ecma-international.org/ecma-262/8.0/#prod-ClassAtomNoDash -pp.validateRegExp_eatClassAtom = function(state) { +pp.regexp_eatClassAtom = function(state) { const start = state.pos if (state.eat(0x5C /* \ */)) { - if (this.validateRegExp_eatClassEscape(state)) { + if (this.regexp_eatClassEscape(state)) { return true } if (state.switchU) { @@ -904,7 +904,7 @@ pp.validateRegExp_eatClassAtom = function(state) { } // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-ClassEscape -pp.validateRegExp_eatClassEscape = function(state) { +pp.regexp_eatClassEscape = function(state) { const start = state.pos if (state.eat(0x62 /* b */)) { @@ -918,20 +918,20 @@ pp.validateRegExp_eatClassEscape = function(state) { } if (!state.switchU && state.eat(0x63 /* c */)) { - if (this.validateRegExp_eatClassControlLetter(state)) { + if (this.regexp_eatClassControlLetter(state)) { return true } state.pos = start } return ( - this.validateRegExp_eatCharacterClassEscape(state) || - this.validateRegExp_eatCharacterEscape(state) + this.regexp_eatCharacterClassEscape(state) || + this.regexp_eatCharacterEscape(state) ) } // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-ClassControlLetter -pp.validateRegExp_eatClassControlLetter = function(state) { +pp.regexp_eatClassControlLetter = function(state) { const ch = state.current() if (isDecimalDigit(ch) || ch === 0x5F /* _ */) { state.lastIntValue = ch % 0x20 @@ -942,10 +942,10 @@ pp.validateRegExp_eatClassControlLetter = function(state) { } // https://www.ecma-international.org/ecma-262/8.0/#prod-HexEscapeSequence -pp.validateRegExp_eatHexEscapeSequence = function(state) { +pp.regexp_eatHexEscapeSequence = function(state) { const start = state.pos if (state.eat(0x78 /* x */)) { - if (this.validateRegExp_eatFixedHexDigits(state, 2)) { + if (this.regexp_eatFixedHexDigits(state, 2)) { return true } if (state.switchU) { @@ -957,7 +957,7 @@ pp.validateRegExp_eatHexEscapeSequence = function(state) { } // https://www.ecma-international.org/ecma-262/8.0/#prod-DecimalDigits -pp.validateRegExp_eatDecimalDigits = function(state) { +pp.regexp_eatDecimalDigits = function(state) { const start = state.pos let ch = 0 state.lastIntValue = 0 @@ -972,7 +972,7 @@ function isDecimalDigit(ch) { } // https://www.ecma-international.org/ecma-262/8.0/#prod-HexDigits -pp.validateRegExp_eatHexDigits = function(state) { +pp.regexp_eatHexDigits = function(state) { const start = state.pos let ch = 0 state.lastIntValue = 0 @@ -1001,12 +1001,12 @@ function hexToInt(ch) { // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-LegacyOctalEscapeSequence // Allows only 0-377(octal) i.e. 0-255(decimal). -pp.validateRegExp_eatLegacyOctalEscapeSequence = function(state) { - if (this.validateRegExp_eatOctalDigit(state)) { +pp.regexp_eatLegacyOctalEscapeSequence = function(state) { + if (this.regexp_eatOctalDigit(state)) { const n1 = state.lastIntValue - if (this.validateRegExp_eatOctalDigit(state)) { + if (this.regexp_eatOctalDigit(state)) { const n2 = state.lastIntValue - if (n1 <= 3 && this.validateRegExp_eatOctalDigit(state)) { + if (n1 <= 3 && this.regexp_eatOctalDigit(state)) { state.lastIntValue = n1 * 64 + n2 * 8 + state.lastIntValue } else { state.lastIntValue = n1 * 8 + n2 @@ -1020,7 +1020,7 @@ pp.validateRegExp_eatLegacyOctalEscapeSequence = function(state) { } // https://www.ecma-international.org/ecma-262/8.0/#prod-OctalDigit -pp.validateRegExp_eatOctalDigit = function(state) { +pp.regexp_eatOctalDigit = function(state) { const ch = state.current() if (isOctalDigit(ch)) { state.lastIntValue = ch - 0x30 /* 0 */ @@ -1037,7 +1037,7 @@ function isOctalDigit(ch) { // https://www.ecma-international.org/ecma-262/8.0/#prod-Hex4Digits // https://www.ecma-international.org/ecma-262/8.0/#prod-HexDigit // And HexDigit HexDigit in https://www.ecma-international.org/ecma-262/8.0/#prod-HexEscapeSequence -pp.validateRegExp_eatFixedHexDigits = function(state, length) { +pp.regexp_eatFixedHexDigits = function(state, length) { const start = state.pos state.lastIntValue = 0 for (let i = 0; i < length; ++i) { From ea163cbaba5a65adeebdacad2fef99eb64571c76 Mon Sep 17 00:00:00 2001 From: Toru Nagashima Date: Sat, 17 Feb 2018 14:26:50 +0900 Subject: [PATCH 18/18] make shorter a bit --- src/regexp.js | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/src/regexp.js b/src/regexp.js index 622b3cdf1..c5953e301 100644 --- a/src/regexp.js +++ b/src/regexp.js @@ -375,20 +375,12 @@ pp.regexp_eatSyntaxCharacter = function(state) { } function isSyntaxCharacter(ch) { return ( - ch === 0x5E /* ^ */ || ch === 0x24 /* $ */ || - ch === 0x5C /* \ */ || + ch >= 0x28 /* ( */ && ch <= 0x2B /* + */ || ch === 0x2E /* . */ || - ch === 0x2A /* * */ || - ch === 0x2B /* + */ || ch === 0x3F /* ? */ || - ch === 0x28 /* ( */ || - ch === 0x29 /* ) */ || - ch === 0x5B /* [ */ || - ch === 0x5D /* [ */ || - ch === 0x7B /* { */ || - ch === 0x7D /* } */ || - ch === 0x7C /* | */ + ch >= 0x5B /* [ */ && ch <= 0x5E /* ^ */ || + ch >= 0x7B /* { */ && ch <= 0x7D /* } */ ) } @@ -408,15 +400,12 @@ pp.regexp_eatExtendedPatternCharacter = function(state) { const ch = state.current() if ( ch !== -1 && - ch !== 0x5E /* ^ */ && ch !== 0x24 /* $ */ && + !(ch >= 0x28 /* ( */ && ch <= 0x2B /* + */) && ch !== 0x2E /* . */ && - ch !== 0x2A /* * */ && - ch !== 0x2B /* + */ && ch !== 0x3F /* ? */ && - ch !== 0x28 /* ( */ && - ch !== 0x29 /* ) */ && ch !== 0x5B /* [ */ && + ch !== 0x5E /* ^ */ && ch !== 0x7C /* | */ ) { state.advance()