From 942b3af17d8b5c4edb34a85dd4ce1a7a219f58fb Mon Sep 17 00:00:00 2001 From: Mathias Bynens Date: Fri, 15 Nov 2013 20:53:58 +0100 Subject: [PATCH] Add support for ES6 Unicode code point escapes --- package.json | 2 +- parser.js | 41 ++++--- test/parse_input.json | 12 ++ test/parse_output.json | 250 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 286 insertions(+), 19 deletions(-) diff --git a/package.json b/package.json index 88d4afc..d74fc6b 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "regjsparser", "version": "0.0.2", - "author": "'Juilan Viereck' ", + "author": "'Julian Viereck' ", "license": "BSD", "main": "./parser", "bin": "bin/parser", diff --git a/parser.js b/parser.js index 13a1008..9d58af6 100644 --- a/parser.js +++ b/parser.js @@ -56,12 +56,12 @@ // CharacterEscape // CharacterClassEscape // -// CharacterEscape :: +// CharacterEscape[U] :: // ControlEscape // c ControlLetter // HexEscapeSequence -// UnicodeEscapeSequence -// IdentityEscape +// RegExpUnicodeEscapeSequence[?U] (ES6) +// IdentityEscape[?U] // // ControlEscape :: // one of f n r t v @@ -631,7 +631,7 @@ function parse(str) { } else if (res = matchReg(/^[dDsSwW]/)) { return createEscapedChar(res[0]); } - return false; + return false; } function parseCharacterEscape() { @@ -644,19 +644,22 @@ function parse(str) { var res; if (res = matchReg(/^[fnrtv]/)) { - // ControlEscape + // ControlEscape return createEscapedChar(res[0]); } else if (res = matchReg(/^c([a-zA-Z])/)) { - // c ControlLetter + // c ControlLetter return createEscaped('controlLetter', res[1], 1); } else if (res = matchReg(/^x([0-9a-fA-F]{2})/)) { - // HexEscapeSequence + // HexEscapeSequence return createEscaped('hex', res[1], 1); - } else if (res = matchReg(/^u([0-9a-fA-F]{4})/)) { - // UnicodeEscapeSequence + } else if (res = matchReg(/^u([0-9a-fA-F]{4})/)) { + // UnicodeEscapeSequence return createEscaped('unicode', res[1], 1); + } else if (res = matchReg(/^u\{([0-9a-fA-F]{1,6})\}/)) { + // RegExpUnicodeEscapeSequence (ES6 Unicode code point escape) + return createEscaped('codePoint', res[1], 3); } else { - // IdentityEscape + // IdentityEscape return parseIdentityEscape(); } } @@ -689,10 +692,10 @@ function parse(str) { } if (match(ZWJ)) { - // + // return createEscaped('identifier', ZWJ); } else if (match(ZWNJ)) { - // + // return createEscaped('identifier', ZWNJ); } @@ -739,7 +742,7 @@ function parse(str) { function parseHelperClassRanges(atom) { var from = pos, to, res; if (current('-') && !next(']')) { - // ClassAtom - ClassAtom ClassRanges + // ClassAtom - ClassAtom ClassRanges skip('-'); res = parseClassAtom(); @@ -777,12 +780,12 @@ function parse(str) { } if (current(']')) { - // ClassAtom + // ClassAtom return [atom]; } - // ClassAtom NonemptyClassRangesNoDash - // ClassAtom - ClassAtom ClassRanges + // ClassAtom NonemptyClassRangesNoDash + // ClassAtom - ClassAtom ClassRanges return parseHelperClassRanges(atom); } @@ -801,8 +804,8 @@ function parse(str) { return res; } - // ClassAtomNoDash NonemptyClassRangesNoDash - // ClassAtomNoDash - ClassAtom ClassRanges + // ClassAtomNoDash NonemptyClassRangesNoDash + // ClassAtomNoDash - ClassAtom ClassRanges return parseHelperClassRanges(res); } @@ -857,6 +860,8 @@ function nodeToCharCode(node) { switch (node.name) { case 'unicode': return parseInt(node.value, 16); + case 'codePoint': + return parseInt(node.value, 16); case 'controlLetter': return node.value.charCodeAt(0) % 32; case 'identifier': diff --git a/test/parse_input.json b/test/parse_input.json index 9f62767..d24ecbf 100644 --- a/test/parse_input.json +++ b/test/parse_input.json @@ -79,6 +79,8 @@ "[\\td-G]", "[\\u0020]", "[\\u0061d-G]", + "[\\u{0}-\\u{A}]", + "[\\u{02}-\\u{003}]", "[\\vd-G]", "[\\wb-G]", "[\\x0061d-G]", @@ -403,6 +405,16 @@ "\\u044F", "\\u0451", "\\undefined", + "\\u{000000}", + "\\u{0}", + "\\u{1}", + "\\u{02}", + "\\u{003}", + "\\u{0004}", + "\\u{00005}", + "\\u{1D306}", + "\\u{01D306}", + "\\u{10FFFF}", "\\w", "\\x41", "\\x42", diff --git a/test/parse_output.json b/test/parse_output.json index d571003..9b43199 100644 --- a/test/parse_output.json +++ b/test/parse_output.json @@ -24485,6 +24485,86 @@ "message": "invalid range in character class", "input": "[\\u0061d-G]" }, + { + "type": "alternative", + "terms": [ + { + "type": "characterClass", + "classRanges": [ + { + "type": "characterClassRange", + "min": { + "type": "escape", + "name": "codePoint", + "value": "0", + "from": 2, + "to": 6, + "raw": "u{0}" + }, + "max": { + "type": "escape", + "name": "codePoint", + "value": "A", + "from": 8, + "to": 12, + "raw": "u{A}" + }, + "from": 6, + "to": 12, + "raw": "-\\u{A}" + } + ], + "negative": false, + "from": 0, + "to": 13, + "raw": "[\\u{0}-\\u{A}]" + } + ], + "from": 0, + "to": 13, + "raw": "[\\u{0}-\\u{A}]", + "lastMatchIdx": 0 + }, + { + "type": "alternative", + "terms": [ + { + "type": "characterClass", + "classRanges": [ + { + "type": "characterClassRange", + "min": { + "type": "escape", + "name": "codePoint", + "value": "02", + "from": 2, + "to": 7, + "raw": "u{02}" + }, + "max": { + "type": "escape", + "name": "codePoint", + "value": "003", + "from": 9, + "to": 15, + "raw": "u{003}" + }, + "from": 7, + "to": 15, + "raw": "-\\u{003}" + } + ], + "negative": false, + "from": 0, + "to": 16, + "raw": "[\\u{02}-\\u{003}]" + } + ], + "from": 0, + "to": 16, + "raw": "[\\u{02}-\\u{003}]", + "lastMatchIdx": 0 + }, { "type": "error", "name": "SyntaxError", @@ -36569,6 +36649,176 @@ "raw": "\\undefined", "lastMatchIdx": 0 }, + { + "type": "alternative", + "terms": [ + { + "type": "escape", + "name": "codePoint", + "value": "000000", + "from": 1, + "to": 10, + "raw": "u{000000}" + } + ], + "from": 0, + "to": 10, + "raw": "\\u{000000}", + "lastMatchIdx": 0 + }, + { + "type": "alternative", + "terms": [ + { + "type": "escape", + "name": "codePoint", + "value": "0", + "from": 1, + "to": 5, + "raw": "u{0}" + } + ], + "from": 0, + "to": 5, + "raw": "\\u{0}", + "lastMatchIdx": 0 + }, + { + "type": "alternative", + "terms": [ + { + "type": "escape", + "name": "codePoint", + "value": "1", + "from": 1, + "to": 5, + "raw": "u{1}" + } + ], + "from": 0, + "to": 5, + "raw": "\\u{1}", + "lastMatchIdx": 0 + }, + { + "type": "alternative", + "terms": [ + { + "type": "escape", + "name": "codePoint", + "value": "02", + "from": 1, + "to": 6, + "raw": "u{02}" + } + ], + "from": 0, + "to": 6, + "raw": "\\u{02}", + "lastMatchIdx": 0 + }, + { + "type": "alternative", + "terms": [ + { + "type": "escape", + "name": "codePoint", + "value": "003", + "from": 1, + "to": 7, + "raw": "u{003}" + } + ], + "from": 0, + "to": 7, + "raw": "\\u{003}", + "lastMatchIdx": 0 + }, + { + "type": "alternative", + "terms": [ + { + "type": "escape", + "name": "codePoint", + "value": "0004", + "from": 1, + "to": 8, + "raw": "u{0004}" + } + ], + "from": 0, + "to": 8, + "raw": "\\u{0004}", + "lastMatchIdx": 0 + }, + { + "type": "alternative", + "terms": [ + { + "type": "escape", + "name": "codePoint", + "value": "00005", + "from": 1, + "to": 9, + "raw": "u{00005}" + } + ], + "from": 0, + "to": 9, + "raw": "\\u{00005}", + "lastMatchIdx": 0 + }, + { + "type": "alternative", + "terms": [ + { + "type": "escape", + "name": "codePoint", + "value": "1D306", + "from": 1, + "to": 9, + "raw": "u{1D306}" + } + ], + "from": 0, + "to": 9, + "raw": "\\u{1D306}", + "lastMatchIdx": 0 + }, + { + "type": "alternative", + "terms": [ + { + "type": "escape", + "name": "codePoint", + "value": "01D306", + "from": 1, + "to": 10, + "raw": "u{01D306}" + } + ], + "from": 0, + "to": 10, + "raw": "\\u{01D306}", + "lastMatchIdx": 0 + }, + { + "type": "alternative", + "terms": [ + { + "type": "escape", + "name": "codePoint", + "value": "10FFFF", + "from": 1, + "to": 10, + "raw": "u{10FFFF}" + } + ], + "from": 0, + "to": 10, + "raw": "\\u{10FFFF}", + "lastMatchIdx": 0 + }, { "type": "alternative", "terms": [