Skip to content

Commit

Permalink
test_runner: parse non-ascii character correctly
Browse files Browse the repository at this point in the history
PR-URL: nodejs#45736
Reviewed-By: Yagiz Nizipli <[email protected]>
Reviewed-By: Moshe Atlow <[email protected]>
  • Loading branch information
mertcanaltin authored and MoLow committed Feb 25, 2023
1 parent 3effd0c commit c23e601
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 24 deletions.
38 changes: 25 additions & 13 deletions lib/internal/test_runner/tap_lexer.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,15 @@ const {
ArrayPrototypePush,
MathMax,
SafeSet,
StringPrototypeIncludes,
StringPrototypeCodePointAt,
StringPrototypeTrim,
} = primordials;
const {
codes: { ERR_TAP_LEXER_ERROR },
} = require('internal/errors');

const { isZeroWidthCodePoint } = require('internal/util/inspect');

const kEOL = '';
const kEOF = '';

Expand Down Expand Up @@ -474,18 +476,28 @@ class TapLexer {
}

#isLiteralSymbol(char) {
return (
(char >= 'a' && char <= 'z') ||
(char >= 'A' && char <= 'Z') ||
this.#isSpecialCharacterSymbol(char)
);
}

#isSpecialCharacterSymbol(char) {
// We deliberately do not include "# \ + -"" in this list
// these are used for comments/reasons explanations, pragma and escape characters
// whitespace is not included because it is handled separately
return StringPrototypeIncludes('!"$%&\'()*,./:;<=>?@[]^_`{|}~', char);
if (typeof char !== 'string') return false;
const charCode = StringPrototypeCodePointAt(char);

if (isZeroWidthCodePoint(charCode)) return false;
if (this.#isWhitespaceSymbol(char)) return false;
const MAX_ASCII_CHAR_CODE = 0b111_1111; // ASCII is 7-bit long
// Allow all non-latin characters.
if (charCode > MAX_ASCII_CHAR_CODE) return true;
const ZERO = 48; // 0
const NINE = 58; // 9
// Disallow numeric values
if (charCode >= ZERO && char <= NINE) return false;

// Disallow characters with special meaning in TAP
const HASH = 35; // #
const BACKSLASH = 92; // \
const PLUS = 43; // +
const DASH = 45; // -

// Disallow characters with special meaning in TAP
return charCode !== HASH && charCode !== BACKSLASH &&
charCode !== PLUS && charCode !== DASH;
}

#isWhitespaceSymbol(char) {
Expand Down
24 changes: 13 additions & 11 deletions lib/internal/util/inspect.js
Original file line number Diff line number Diff line change
Expand Up @@ -2277,6 +2277,18 @@ function formatWithOptionsInternal(inspectOptions, args) {
return str;
}

function isZeroWidthCodePoint(code) {
return code <= 0x1F || // C0 control codes
(code >= 0x7F && code <= 0x9F) || // C1 control codes
(code >= 0x300 && code <= 0x36F) || // Combining Diacritical Marks
(code >= 0x200B && code <= 0x200F) || // Modifying Invisible Characters
// Combining Diacritical Marks for Symbols
(code >= 0x20D0 && code <= 0x20FF) ||
(code >= 0xFE00 && code <= 0xFE0F) || // Variation Selectors
(code >= 0xFE20 && code <= 0xFE2F) || // Combining Half Marks
(code >= 0xE0100 && code <= 0xE01EF); // Variation Selectors
}

if (internalBinding('config').hasIntl) {
const icu = internalBinding('icu');
// icu.getStringWidth(string, ambiguousAsFullWidth, expandEmojiSequence)
Expand Down Expand Up @@ -2366,17 +2378,6 @@ if (internalBinding('config').hasIntl) {
);
};

const isZeroWidthCodePoint = (code) => {
return code <= 0x1F || // C0 control codes
(code >= 0x7F && code <= 0x9F) || // C1 control codes
(code >= 0x300 && code <= 0x36F) || // Combining Diacritical Marks
(code >= 0x200B && code <= 0x200F) || // Modifying Invisible Characters
// Combining Diacritical Marks for Symbols
(code >= 0x20D0 && code <= 0x20FF) ||
(code >= 0xFE00 && code <= 0xFE0F) || // Variation Selectors
(code >= 0xFE20 && code <= 0xFE2F) || // Combining Half Marks
(code >= 0xE0100 && code <= 0xE01EF); // Variation Selectors
};
}

/**
Expand All @@ -2396,4 +2397,5 @@ module.exports = {
formatWithOptions,
getStringWidth,
stripVTControlCharacters,
isZeroWidthCodePoint,
};
36 changes: 36 additions & 0 deletions test/parallel/test-runner-tap-lexer.js
Original file line number Diff line number Diff line change
Expand Up @@ -444,3 +444,39 @@ ok 1
assert.strictEqual(tokens[index].value, token.value);
});
}

// Test isLiteralSymbol method
{
const tokens = TAPLexer('ok 1 - description أتث讲演講👍🔥');

[
{ kind: TokenKind.TAP_TEST_OK, value: 'ok' },
{ kind: TokenKind.WHITESPACE, value: ' ' },
{ kind: TokenKind.NUMERIC, value: '1' },
{ kind: TokenKind.WHITESPACE, value: ' ' },
{ kind: TokenKind.DASH, value: '-' },
{ kind: TokenKind.WHITESPACE, value: ' ' },
{ kind: TokenKind.LITERAL, value: 'description' },
{ kind: TokenKind.WHITESPACE, value: ' ' },
{ kind: TokenKind.LITERAL, value: 'أتث讲演講👍🔥' },
{ kind: TokenKind.EOL, value: '' },
].forEach((token, index) => {
assert.strictEqual(tokens[index].kind, token.kind);
assert.strictEqual(tokens[index].value, token.value);
});
}

{
const tokens = TAPLexer('# comment أتث讲演講👍🔥');
[
{ kind: TokenKind.COMMENT, value: '#' },
{ kind: TokenKind.WHITESPACE, value: ' ' },
{ kind: TokenKind.LITERAL, value: 'comment' },
{ kind: TokenKind.WHITESPACE, value: ' ' },
{ kind: TokenKind.LITERAL, value: 'أتث讲演講👍🔥' },
{ kind: TokenKind.EOL, value: '' },
].forEach((token, index) => {
assert.strictEqual(tokens[index].kind, token.kind);
assert.strictEqual(tokens[index].value, token.value);
});
}

0 comments on commit c23e601

Please sign in to comment.