diff --git a/library/CHANGELOG.md b/library/CHANGELOG.md index e17766040..c45804dd2 100644 --- a/library/CHANGELOG.md +++ b/library/CHANGELOG.md @@ -5,6 +5,8 @@ All notable changes to the library will be documented in this file. ## vX.X.X (Month DD, YYYY) - Add `base64` action to validate Base64 strings (pull request #644) +- Refactor `HEXADECIMAL_REGEX` (pull request #666) +- Change `EMOJI_REGEX` to be more accurate and strict (pull request #666) ## v0.36.0 (July 05, 2024) diff --git a/library/src/actions/emoji/emoji.test.ts b/library/src/actions/emoji/emoji.test.ts index a1c03450f..b81c30779 100644 --- a/library/src/actions/emoji/emoji.test.ts +++ b/library/src/actions/emoji/emoji.test.ts @@ -62,6 +62,7 @@ describe('emoji', () => { 'πŸ‡ΊπŸ‡Έ', 'πŸ‘‹πŸΌ', '🫨', + '✈️', ]); }); @@ -112,30 +113,26 @@ describe('emoji', () => { ]); }); - // TODO: This test needs to be enabled after upgrading the emoji regex. - // See the comment in `regex.ts` for more details. - // test('for numbers', () => { - // expectActionIssue(action, baseIssue, [ - // '0', - // '1', - // '2', - // '3', - // '4', - // '5', - // '6', - // '7', - // '8', - // '9', - // '0123456789', - // ]); - // }); + test('for numbers', () => { + expectActionIssue(action, baseIssue, [ + '0', + '1', + '2', + '3', + '4', + '5', + '6', + '7', + '8', + '9', + '0123456789', + ]); + }); test('for special chars', () => { expectActionIssue(action, baseIssue, [ - // TODO: These chars needs to be enabled after upgrading the emoji regex. - // See the comment in `regex.ts` for more details. - // '#', - // '*', + '#', + '*', '!', '@', '$', @@ -160,6 +157,33 @@ describe('emoji', () => { ]); }); + test('for format and mark chars', () => { + expectActionIssue(action, baseIssue, [ + '\u200D', + '\u20E3', + '\uFE0F', + '\u{E007F}', + ]); + }); + + test('for tag digit and tag small letter chars', () => { + expectActionIssue(action, baseIssue, [ + '\u{E0030}', + '\u{E0039}', + '\u{E0061}', + '\u{E007A}', + ]); + }); + + test('for non-emoji symbol chars', () => { + expectActionIssue(action, baseIssue, [ + '\u2642', // β™‚ + '\u2708', // ✈ + '\u{1F3F3}', // 🏳 + '\u{1F441}', // πŸ‘ + ]); + }); + test('for composite chars', () => { expectActionIssue(action, baseIssue, [ 'S\u0307', // SΜ‡ @@ -168,11 +192,12 @@ describe('emoji', () => { ]); }); - test('for wrong emoji parts', () => { + test('for surrogate code points', () => { + // 😍 '\u{1F60D}' can be represented with surrogate pair '\uD83D\uDE0D' expectActionIssue(action, baseIssue, [ - '\uD83D', // First part of 😍 - '\uDE0D', // Second part of 😍 - '\uDE0D\uD83D', // Twisted parts of 😍 + '\uD83D', // Lone high surrogate + '\uDE0D', // Lone low surrogate + '\uDE0D\uD83D', // Reversed surrogate order for 😍 ]); }); }); diff --git a/library/src/regex.ts b/library/src/regex.ts index 046c6533b..efe4082b6 100644 --- a/library/src/regex.ts +++ b/library/src/regex.ts @@ -26,19 +26,19 @@ export const EMAIL_REGEX: RegExp = /^[\w+-]+(?:\.[\w+-]+)*@[\da-z]+(?:[.-][\da-z]+)*\.[a-z]{2,}$/iu; /** - * Emoji regex. + * Emoji regex from [emoji-regex-xs](https://github.com/slevithan/emoji-regex-xs) v1.0.0 (MIT license). + * + * Hint: We decided against the newer `/^\p{RGI_Emoji}+$/v` regex because it is + * not supported in older runtimes and does not match all emoji. */ export const EMOJI_REGEX: RegExp = - /^[\p{Extended_Pictographic}\p{Emoji_Component}]+$/u; - -// This emoji regex is not supported in Node.js v18 and older browsers. -// Therefore, we are postponing the switch to this regex to a later date. -// export const EMOJI_REGEX = /^\p{RGI_Emoji}+$/v; + // eslint-disable-next-line redos-detector/no-unsafe-regex, regexp/no-dupe-disjunctions -- false positives + /^(?:[\u{1F1E6}-\u{1F1FF}]{2}|\u{1F3F4}[\u{E0061}-\u{E007A}]{2}[\u{E0030}-\u{E0039}\u{E0061}-\u{E007A}]{1,3}\u{E007F}|(?:\p{Emoji}\uFE0F\u20E3?|\p{Emoji_Modifier_Base}\p{Emoji_Modifier}?|\p{Emoji_Presentation})(?:\u200D(?:\p{Emoji}\uFE0F\u20E3?|\p{Emoji_Modifier_Base}\p{Emoji_Modifier}?|\p{Emoji_Presentation}))*)+$/u; /** * [Hexadecimal](https://en.wikipedia.org/wiki/Hexadecimal) regex. */ -export const HEXADECIMAL_REGEX: RegExp = /^(?:0h|0x)?[\da-f]+$/iu; +export const HEXADECIMAL_REGEX: RegExp = /^(?:0[hx])?[\da-f]+$/iu; /** * [Hex color](https://en.wikipedia.org/wiki/Web_colors#Hex_triplet) regex.