From 2f7fe1189c804d9f1b2f8ce9cda2fccea639950e Mon Sep 17 00:00:00 2001 From: Per Bothner Date: Tue, 16 May 2023 18:31:31 -0700 Subject: [PATCH 01/42] New unicode-graphemes addon. Handle grapheme cluster lookup as well as wcwidth. This is based on Unicode 15, and could replace the unicode11 addon. --- .../xterm-addon-unicode-graphemes/.gitignore | 2 + .../xterm-addon-unicode-graphemes/.npmignore | 29 ++ addons/xterm-addon-unicode-graphemes/LICENSE | 19 + .../xterm-addon-unicode-graphemes/README.md | 25 ++ .../package.json | 26 ++ .../src/UnicodeGraphemeProvider.ts | 44 ++ .../src/UnicodeGraphemesAddon.ts | 17 + .../src/UnicodeProperties.ts | 144 +++++++ .../src/tiny-inflate.ts | 380 ++++++++++++++++++ .../src/tsconfig.json | 33 ++ .../src/unicode-trie.ts | 134 ++++++ .../tsconfig.json | 8 + .../typings/xterm-addon-unicode11.d.ts | 14 + .../webpack.config.js | 38 ++ .../xterm-addon-unicode11/src/UnicodeV11.ts | 24 +- src/common/InputHandler.ts | 73 ++-- src/common/TestUtils.test.ts | 10 +- src/common/Types.d.ts | 2 +- src/common/buffer/BufferLine.test.ts | 6 +- src/common/buffer/BufferLine.ts | 8 +- src/common/input/UnicodeV6.ts | 24 +- src/common/parser/EscapeSequenceParser.ts | 3 + src/common/services/Services.ts | 14 +- src/common/services/UnicodeService.test.ts | 3 + src/common/services/UnicodeService.ts | 21 +- tsconfig.all.json | 1 + typings/xterm-headless.d.ts | 1 + typings/xterm.d.ts | 1 + 28 files changed, 1055 insertions(+), 49 deletions(-) create mode 100644 addons/xterm-addon-unicode-graphemes/.gitignore create mode 100644 addons/xterm-addon-unicode-graphemes/.npmignore create mode 100644 addons/xterm-addon-unicode-graphemes/LICENSE create mode 100644 addons/xterm-addon-unicode-graphemes/README.md create mode 100644 addons/xterm-addon-unicode-graphemes/package.json create mode 100644 addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemeProvider.ts create mode 100644 addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemesAddon.ts create mode 100644 addons/xterm-addon-unicode-graphemes/src/UnicodeProperties.ts create mode 100644 addons/xterm-addon-unicode-graphemes/src/tiny-inflate.ts create mode 100644 addons/xterm-addon-unicode-graphemes/src/tsconfig.json create mode 100644 addons/xterm-addon-unicode-graphemes/src/unicode-trie.ts create mode 100644 addons/xterm-addon-unicode-graphemes/tsconfig.json create mode 100644 addons/xterm-addon-unicode-graphemes/typings/xterm-addon-unicode11.d.ts create mode 100644 addons/xterm-addon-unicode-graphemes/webpack.config.js diff --git a/addons/xterm-addon-unicode-graphemes/.gitignore b/addons/xterm-addon-unicode-graphemes/.gitignore new file mode 100644 index 0000000000..3063f07d55 --- /dev/null +++ b/addons/xterm-addon-unicode-graphemes/.gitignore @@ -0,0 +1,2 @@ +lib +node_modules diff --git a/addons/xterm-addon-unicode-graphemes/.npmignore b/addons/xterm-addon-unicode-graphemes/.npmignore new file mode 100644 index 0000000000..b203232aff --- /dev/null +++ b/addons/xterm-addon-unicode-graphemes/.npmignore @@ -0,0 +1,29 @@ +# Blacklist - exclude everything except npm defaults such as LICENSE, etc +* +!*/ + +# Whitelist - lib/ +!lib/**/*.d.ts + +!lib/**/*.js +!lib/**/*.js.map + +!lib/**/*.css + +# Whitelist - src/ +!src/**/*.ts +!src/**/*.d.ts + +!src/**/*.js +!src/**/*.js.map + +!src/**/*.css + +# Blacklist - src/ test files +src/**/*.test.ts +src/**/*.test.d.ts +src/**/*.test.js +src/**/*.test.js.map + +# Whitelist - typings/ +!typings/*.d.ts diff --git a/addons/xterm-addon-unicode-graphemes/LICENSE b/addons/xterm-addon-unicode-graphemes/LICENSE new file mode 100644 index 0000000000..8f17892587 --- /dev/null +++ b/addons/xterm-addon-unicode-graphemes/LICENSE @@ -0,0 +1,19 @@ +Copyright (c) 2019, The xterm.js authors (https://github.com/xtermjs/xterm.js) + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/addons/xterm-addon-unicode-graphemes/README.md b/addons/xterm-addon-unicode-graphemes/README.md new file mode 100644 index 0000000000..72f1ee2759 --- /dev/null +++ b/addons/xterm-addon-unicode-graphemes/README.md @@ -0,0 +1,25 @@ +## xterm-addon-unicode-graphemes + +An addon providing enhanced Unicode support (include grapheme clustering) for xterm.js. + +The file `src/UnicodeProperties.ts` is generated and depends on the Unicode version. See [the unicode-properties project](https://github.com/PerBothner/unicode-properties) for credits and re-generation instructions. + +### Install + +```bash +npm install --save xterm-addon-unicode-graphemes +``` + +### Usage + +```ts +import { Terminal } from 'xterm'; +import { UnicodeGraphemeAddon } from 'xterm-addon-unicode-graphemes'; + +const terminal = new Terminal(); +const unicodeGraphemeAddon = new UnicodeGraphemeAddon(); +terminal.loadAddon(unicodeGraphemeAddon); + +// activate the new version +terminal.unicode.activeVersion = '15'; +``` diff --git a/addons/xterm-addon-unicode-graphemes/package.json b/addons/xterm-addon-unicode-graphemes/package.json new file mode 100644 index 0000000000..11643667c2 --- /dev/null +++ b/addons/xterm-addon-unicode-graphemes/package.json @@ -0,0 +1,26 @@ +{ + "name": "xterm-addon-unicode-graphemes", + "version": "0.5.0", + "author": { + "name": "The xterm.js authors", + "url": "https://xtermjs.org/" + }, + "main": "lib/xterm-addon-unicode-graphemes.js", + "types": "typings/xterm-addon-unicode-graphemes.d.ts", + "repository": "https://github.com/xtermjs/xterm.js", + "license": "MIT", + "keywords": [ + "terminal", + "xterm", + "xterm.js" + ], + "scripts": { + "build": "../../node_modules/.bin/tsc -p .", + "prepackage": "npm run build", + "package": "../../node_modules/.bin/webpack", + "prepublishOnly": "npm run package" + }, + "peerDependencies": { + "xterm": "^5.0.0" + } +} diff --git a/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemeProvider.ts b/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemeProvider.ts new file mode 100644 index 0000000000..69f113fbec --- /dev/null +++ b/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemeProvider.ts @@ -0,0 +1,44 @@ +/** + * Copyright (c) 2023 The xterm.js authors. All rights reserved. + * @license MIT + */ + +import { IUnicodeVersionProvider } from 'xterm'; +import { UnicodeCharProperties, UnicodeCharWidth } from 'common/services/Services'; +import { UnicodeService } from 'common/services/UnicodeService'; +import * as UC from './UnicodeProperties'; + +export class UnicodeGraphemeProvider implements IUnicodeVersionProvider { + public readonly version = '15-graphemes'; + + constructor() { + } + + charProperties(codepoint: number, preceding: UnicodeCharProperties): UnicodeCharProperties { + let charInfo = UC.getInfo(codepoint); + let w = UC.infoToWidthInfo(charInfo); + let shouldJoin = false; + if (w >= 2) { + const preferWide = false; //this.ambiguousCharsAreWide(context); + // Treat emoji_presentation_selector as WIDE. + w = w == 3 || preferWide || codepoint === 0xfe0f ? 2 : 1; + } else + w = 1; + if (preceding !== 0) { + let oldWidth = UnicodeService.extractWidth(preceding); + charInfo = UC.shouldJoin(UnicodeService.extractCharKind(preceding), charInfo); + shouldJoin = charInfo > 0; + if (shouldJoin) { + if (oldWidth > w) + w = oldWidth; + else if (charInfo === 32) // FIXME UC.GRAPHEME_BREAK_SAW_Regional_Pair) + w = 2; + } + } + return UnicodeService.createPropertyValue(charInfo, w, shouldJoin); + } + + public wcwidth(num: number): UnicodeCharWidth { + return UC.infoToWidth(UC.getInfo(num)); + } +} diff --git a/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemesAddon.ts b/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemesAddon.ts new file mode 100644 index 0000000000..53a92e4332 --- /dev/null +++ b/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemesAddon.ts @@ -0,0 +1,17 @@ +/** + * Copyright (c) 2023 The xterm.js authors. All rights reserved. + * @license MIT + * + * UnicodeVersionProvider for V15 with grapeme cluster handleing. + */ + +import { Terminal, ITerminalAddon } from 'xterm'; +import { UnicodeGraphemeProvider } from './UnicodeGraphemeProvider'; + + +export class UnicodeGraphemesAddon implements ITerminalAddon { + public activate(terminal: Terminal): void { + terminal.unicode.register(new UnicodeGraphemeProvider()); + } + public dispose(): void { } +} diff --git a/addons/xterm-addon-unicode-graphemes/src/UnicodeProperties.ts b/addons/xterm-addon-unicode-graphemes/src/UnicodeProperties.ts new file mode 100644 index 0000000000..892f5652f3 --- /dev/null +++ b/addons/xterm-addon-unicode-graphemes/src/UnicodeProperties.ts @@ -0,0 +1,144 @@ +import UnicodeTrie from './unicode-trie'; +const trieRaw = "AAARAAAAAABwxwAAAb4LQfTtmw+sVmUdx58LL/ffe/kjzNBV80gW1F3yR+6CvbJiypoZa0paWmAWSluErSBbFtYkkuZykq6QamGJ4WRqo2kFGy6dYWtEq6G1MFAJbRbOVTQr+x7f5+x97q/n/3me87wXzm/3s+f/7/d7/p7znnvOlvGMbQM7wIPgEbAPHABPgcPgefAS+BfYwuv/F/Q2OulBxKcK6TMRPxu8FcwFbwcjYCFYDC4Cl4ArwNXgGvBJsA58UdBDwy+jbBO4La8DtoEd4H7wkNBuN+KPgn3gADgIngaHwFHwF/AyeAWMm4C+TGi3LdiJ/EnIex04A2RgFpgD5oKFYDG4CLwHXAo+IKSvAqt4/evA9bz9jWA6+Cq3dyvCP8HWNwX93wF38/ROcD94SCjP2+1B+BiPP4HwgOD/7xD/I08fRniMx48jPAFeBeuF+n29jE0G08FZvaPHYWZvh9mcEfAOjlhXx/qGfd2QvLO3zccmtMnzliC9lPt+GenD1nyMiK/LNf1cycs+gfAzPJ6vtxe4jhuQtx5sBLeA28G3eb3v8/Beif4HkPewxu5G6N/rMP4qfgEdvwZPgj+AZ8Cx3nYfxiE8Dk6AV0FfH/YEOB28AbwJDIPzQAtcAC4Gl/Z19F+J+NVCehWPr0b46b7RvixvdPg8yr7U10l/BfFN4La8DdgGdoAHwU/AI2AfOACeAofB8+AlcAKwfvyBKeCM/o7NrF9PXmdWv9/Ynot2I7ztIg8dF5I2a8i63CjZU+9Fm2Wcy4U4ZQVYyeOrwVoev57UuxHcJKRvFuJXgnU8/nUebtbYrKmpCUOx31P7UVNTU1NTU1NTU1OGLTz8Xr/77+W7+9vP0or0MxPMbXaizY8FW3sQ3wseB/t5/kGEh8DR/vbzwL8i/Af4Dy8fP8BYE0weaKenI/wV/DhrQG97JspngzlgLpgHzgPzwUhdVpfVZXVZXRa87HxwAVgQ4Pn5WEd85l5TUzOasvezFw/E3b/LoP9D4CpwrcTWWsGXNQOj748/G9k3G56d1KYxmbELwQbwKFiJvBM8nDWlHa5E+AOwCzwLzjkNeeB28NvTeB1OYyr0gQ1g99R23nGE50xj7MPgc+A+8K5Bxj4FHgB/G2z/T9XEzCZjd/S0WYX4Pc3/r/Nn5I0f6qQXIP5x8ENwBMyYyNhHJ3b0pOCuLrBvM941NTU1JyNHEp+BrC8dMyalt1/m3uWfhmeULzRGp9d3wf0WZSN8+prCr60Wz09tuNmx35sl9Y825HXvRN39KNveaL8flb9f913kbec67kHeTsR3gYcH2uV7ED4m2HhCYi/X9ZuBzvuXv0f8iKIfx5B/XCg7gTgbVPdvAsomCuWnD45eK28UyvL3Jt+s0fU2TVnOXJQvJHUWIb0ELAWXgCt4+UcMumSsEtpch/g6ouMGpG/ieZsc9N/q4YsLd3D9WyPbsWEbfNgO7hN82TWY/n8xKbmsC3xQsYKf+7sjrx2TH+u4H3vhx+OO6+X9hmtXN7C/4r15EPaeBs9J7L7YBeeED/k7wn8fbIf/Rji+yVizmd4vW6bB19cb/PU9w7MxMA60bzPHgM8+zG623+OnzOf55yNc3Gw/k303wveBy3nZcoTXgNVgLfiCRNcG5N3SbIebwZ08fhe4l8d/BH7K4yI/4+HPwS/BAfBks+PzIaHuc3x+ivSL4GUyZ68I6fwZYRNMG2qnz+Th2QjfMtTx/1zE5w61nyN+Q7C3aKgdin1dgrylYBn4INdhGn/Z2FfFiqH01/SUXMvnPD+jC+j85N/RqRhR/DYaS6T+P09K1mD+vzW+5zVqqeVUl0wTz2lK8odJHRGXfBufdGLSoSo3+ZFJ6sl0qvJVNmhI4z4i06mrZ6uT1le1z5h5HE3tMiHPtQ5javu+ItMXUr/MXpmwmyRL3D6U7UwIMyYfczGu0qdqb2pbhcw4xQkhWQBMerrZ/liXrGTbsQwTwrEu4zSczKLrd7fCSKiKn+zSo8BWXMe8myXWOivrUxWi60OPoQ7VIasbQ0S/Ukk3rZVullNhHEL1rYoxUF0PTfm6elWJzq54ZsU4z11ohOy0oxT2izFqCNj4TesXcWZo6+Jfqr1O+1O1beqDagypj2J9F1u2daucj3Eknmq/6PaHrK7Mb1o35DiW1a/a76LuhlDXZX25SOz11S33ErKxDb2/fc/bFKI6axskn+4/W90u9mOtbRf7smsoTdvOfwoRz0t6DaP9k81v6P7Re5aUQudTd303rX+bZzBl97/KR7E+Xbux9lLI+aNr1PfaYLpPDiW2/vrYTX1drMIeXbMye6HXlw8292Jl7ZXxLxRlxXbcaH9drjFlxfa3Qozx8NWRi834lPVZbD+SmN7EJPzc9TVCSVXXDps9L+513b2J7fMu176V2YOhx1A3JrJ8KrLxUumpcu5j/lYT+2tzLRVDZmhjO442a1Clu0ox9VPVXzE/lcS4V0k1D6LI1pJsz8fct9SGbO5l/rmKzTlvsxdj3IvRtC2uv0t1fotltvd2VaCy5Sp5m0EhnZG4CCNxXZrWp/VUIrOjapfnNw11ZNI0V/GWzKNuxtzGKKTEtJeR0NVmpojbtBuW5On0u0is9ZMxvU8ZM+8vEyadtu10oqtP9Q4rcJEm85+Two/QkpGwjI6YkgkhtUfzZOW6fFVexuRri+qj9TJJHZkdmW5abiu0rs6uj2TMfmx06bISUj9tZ9Lja8dVQtox6WpxTJKfW3M4MSTmvU4sWy1CU6BF4jIfdNeDjHWuO1lCWIm2Jr2ixNZvklD2fP0Q6+vsmO4hqN1hJvfDtV5G8mTlsvau4qPP1a64L1skT6QYEzEtq0PzGZOfCbSdSmcKTP7Qs86Ej/1hEpelaV6IMdT5ayu2+nT9tmnnO746XbLxE8t0qOrYtJWhmk9bvaLfsrotRVw1PnR+bcafSUKZ6Mps7smobybJLH2R6WqRkJa1DHV0UmbfUcksiSF0HExSpp+uY0zbTklMaCm7blzEtg8h1rNMXNaYi05ZXsbC75sQ/4+aUxFV2jL50Q3jE0rK2rVtN09By8OHoo1vH2LPSdE323mr2sdu0pUZiDkWLRKWnfeQY6taKzHF9n/GPv8jd/0/egiRvYMR24fU79iY3s9Qva9RlYR8n8HHtq9fMcT1HRWfdZXiHd9YInt/iI4PTaf+BimXKvdXYU+3hlRpHzs2dVK/cxhDn+xs0I2jzxjL5kpXz1VU72aLtkK/97sALKyQqu25SshvG6h08/cLrlKswRklKXvvXfa+pZt+y8nah5YUv2Oo/ap/X2URdRfico9K69hcp6r6XaCz5Wo/hs/iNTGF6N6tV92/9ZS0Wba9SlT3pKF/e6W674+x9ly+VRL73cPU8ygb31D3eSqfVd+iqET0y3YMYojoO11XqrTt2nPxmeq1HYeqxkmUMt8DiesjpoTSr+qDrD+qPZDiOZxMdH0pRPX8MFUfQtv0Xbs+a1a1NnRryNZ/2+tsaPG5ZoX0RXZei88yZGdo4UMPj/cwv/kMJboxLISuQbE+1VW12Mx7FWOrW3M9Hv7Y+uxyraPSo8B2TGPuLdOeZha+hBKf8Sjsm/oR+7pmsx/oeOraFWdXleeV6oyl41zm+mgSuq9C6ox1TsU8D+m4dwMmf8v2nz7Tm+fYfj7HV1K/x1HWjquvY+2dllxM64ue87Su772zzbXIVC+WxLZTRR9MdkMTypZNH1z6G0tUvoccwxA+hfLNdV+a7MaQqscztMi+7QnxDZXvd1dldWQOyMbApb1Jd2h91Ffx+y9Xfb7tClokboOvrRhrbVpFFO8z+65t2/u4su9MUx028znH01/TGVDmHAj13W1o+1USw+eUfYtpO+b82rRNsb6oPpV+1fdBqddB6n3WDXvdJDZrJ0QfQp6bsc/kqq4BIddHWXGdN1pmWveh58F1zYUW1zmOITHXWOg1XrZvZSWUf77tq1ofqear6muaT1lIQp3bofabSafJVlnfYo9B6LGr8uzz2Xchvzfw+T9PlgiV/A8="; +let _data = null; +{ + const bin = window.atob(trieRaw); + _data = new Uint8Array(bin.length) + for (let i = 0; i < bin.length; i++) + _data[i] = bin.charCodeAt(i); +} +const trieData = new UnicodeTrie(_data); +export const GRAPHEME_BREAK_MASK = 0xF; +export const GRAPHEME_BREAK_SHIFT = 0; +export const CHARWIDTH_MASK = 0x30; +export const CHARWIDTH_SHIFT = 4; + +// Values for the GRAPHEME_BREAK property +export const GRAPHEME_BREAK_Other = 0; // includes CR, LF, Control +export const GRAPHEME_BREAK_Prepend = 1; +export const GRAPHEME_BREAK_Extend = 2; +export const GRAPHEME_BREAK_Regional_Indicator = 3; +export const GRAPHEME_BREAK_SpacingMark = 4; +export const GRAPHEME_BREAK_Hangul_L = 5; +export const GRAPHEME_BREAK_Hangul_V = 6; +export const GRAPHEME_BREAK_Hangul_T = 7; +export const GRAPHEME_BREAK_Hangul_LV = 8; +export const GRAPHEME_BREAK_Hangul_LVT = 9; +export const GRAPHEME_BREAK_ZWJ = 10; +export const GRAPHEME_BREAK_ExtPic = 11; + +// Only used as return value from shouldJoin/shouldJoinBackwards. +// (Must be positive; distinct from other values; +// and become GRAPHEME_BREAK_Other when masked with GRAPHEME_BREAK_MASK.) +const GRAPHEME_BREAK_SAW_Regional_Pair = 32; + +export const CHARWIDTH_NORMAL = 0; +export const CHARWIDTH_FORCE_1COLUMN = 1; +export const CHARWIDTH_EA_AMBIGUOUS = 2; +export const CHARWIDTH_WIDE = 3; + +// In the following 'info' is an encoded value from trie.get(codePoint) + +// In the following 'info' is an encoded value from trie.get(codePoint) + +export function infoToWidthInfo(info: number): number { + return (info & CHARWIDTH_MASK) >> CHARWIDTH_SHIFT; +} + +export function infoToWidth(info: number, ambiguousIsWide = false): 0 | 1 |2 { + const v = infoToWidthInfo(info); + return v < CHARWIDTH_EA_AMBIGUOUS ? 1 + : v >= CHARWIDTH_WIDE || ambiguousIsWide ? 2 : 1; +} + +export function strWidth(str: string, preferWide: boolean): number { + let width = 0; + for (let i = 0; i < str.length;) { + const codePoint = str.codePointAt(i) as number; + width += infoToWidth(getInfo(codePoint), preferWide); + i += (codePoint <= 0xffff) ? 1 : 2; + } + return width; +} + +export function columnToIndexInContext(str: string, startIndex: number, column: number, preferWide: boolean): number { + let rv = 0; + for (let i = startIndex; ;) { + if (i >= str.length) + return i; + const codePoint = str.codePointAt(i) as number; + const w = infoToWidth(getInfo(codePoint), preferWide); + rv += w; + if (rv > column) + return i; + i += (codePoint <= 0xffff) ? 1 : 2; + } +} + + +// Test if should break between beforeState and afterCode. +// Return <= 0 if should break; > 0 if should join. +// 'beforeState' is the return value from the previous possible break; +// the value 0 is start of string. +// 'afterCode' is the GRAPHEME_BREAK_Xxx value for the following codepoint. +export function shouldJoin(beforeState: number, afterInfo: number): number { + let beforeCode = (beforeState & GRAPHEME_BREAK_MASK) >> GRAPHEME_BREAK_SHIFT; + let afterCode = (afterInfo & GRAPHEME_BREAK_MASK) >> GRAPHEME_BREAK_SHIFT; + if (_shouldJoin(beforeCode, afterCode)) { + if (afterCode === GRAPHEME_BREAK_Regional_Indicator) + return GRAPHEME_BREAK_SAW_Regional_Pair; + else + return afterCode + 16; + } else + return afterCode - 16; +} + +export function shouldJoinBackwards(beforeInfo: number, afterState: number): number { + let afterCode = (afterState & GRAPHEME_BREAK_MASK) >> GRAPHEME_BREAK_SHIFT; + let beforeCode = (beforeInfo & GRAPHEME_BREAK_MASK) >> GRAPHEME_BREAK_SHIFT; + if (_shouldJoin(beforeCode, afterCode)) { + if (beforeCode === GRAPHEME_BREAK_Regional_Indicator) + return GRAPHEME_BREAK_SAW_Regional_Pair; + else + return beforeCode + 16; + } else + return beforeCode - 16; +} + +/** Doesn't handle an odd number of RI characters. */ +function _shouldJoin(beforeCode: number, afterCode: number): boolean { + if (beforeCode >= GRAPHEME_BREAK_Hangul_L + && beforeCode <= GRAPHEME_BREAK_Hangul_LVT) { + if (beforeCode == GRAPHEME_BREAK_Hangul_L // GB6 + && (afterCode == GRAPHEME_BREAK_Hangul_L + || afterCode == GRAPHEME_BREAK_Hangul_V + || afterCode == GRAPHEME_BREAK_Hangul_LV + || afterCode == GRAPHEME_BREAK_Hangul_LVT)) + return true; + if ((beforeCode == GRAPHEME_BREAK_Hangul_LV // GB7 + || beforeCode == GRAPHEME_BREAK_Hangul_V) + && (afterCode == GRAPHEME_BREAK_Hangul_V + || afterCode == GRAPHEME_BREAK_Hangul_T)) + return true; + if ((beforeCode == GRAPHEME_BREAK_Hangul_LVT // GB8 + || beforeCode == GRAPHEME_BREAK_Hangul_T) + && afterCode == GRAPHEME_BREAK_Hangul_T) + return true; + } + if (afterCode == GRAPHEME_BREAK_Extend // GB9 + || afterCode == GRAPHEME_BREAK_ZWJ + || beforeCode == GRAPHEME_BREAK_Prepend // GB9a + || afterCode == GRAPHEME_BREAK_SpacingMark) // GB9b + return true; + if (beforeCode == GRAPHEME_BREAK_ZWJ // GB11 + && afterCode == GRAPHEME_BREAK_ExtPic) + return true; + if (afterCode == GRAPHEME_BREAK_Regional_Indicator // GB12, GB13 + && beforeCode == GRAPHEME_BREAK_Regional_Indicator) + return true; + return false; +} + +export function getInfo(codePoint: number): number { + return trieData.get(codePoint); +} diff --git a/addons/xterm-addon-unicode-graphemes/src/tiny-inflate.ts b/addons/xterm-addon-unicode-graphemes/src/tiny-inflate.ts new file mode 100644 index 0000000000..a8d2e8a4bd --- /dev/null +++ b/addons/xterm-addon-unicode-graphemes/src/tiny-inflate.ts @@ -0,0 +1,380 @@ +var TINF_OK = 0; +var TINF_DATA_ERROR = -3; + +class Tree { + table = new Uint16Array(16); /* table of code length counts */ + trans = new Uint16Array(288); /* code -> symbol translation table */ +}; + +class Data { + tag: number = 0; + bitcount: number = 0; + destLen: number = 0; + ltree: Tree; + dtree: Tree; + source: Uint8Array; + dest: Uint8Array; + sourceIndex: number = 0; + + constructor(source: Uint8Array, dest: Uint8Array) { + this.source = source; + this.dest = dest; + this.ltree = new Tree(); /* dynamic length/symbol tree */ + this.dtree = new Tree(); /* dynamic distance tree */ + } +} + +/* --------------------------------------------------- * + * -- uninitialized global data (static structures) -- * + * --------------------------------------------------- */ + +var sltree = new Tree(); +var sdtree = new Tree(); + +/* extra bits and base tables for length codes */ +var length_bits = new Uint8Array(30); +var length_base = new Uint16Array(30); + +/* extra bits and base tables for distance codes */ +var dist_bits = new Uint8Array(30); +var dist_base = new Uint16Array(30); + +/* special ordering of code length codes */ +var clcidx = new Uint8Array([ + 16, 17, 18, 0, 8, 7, 9, 6, + 10, 5, 11, 4, 12, 3, 13, 2, + 14, 1, 15 +]); + +/* used by tinf_decode_trees, avoids allocations every call */ +const code_tree = new Tree(); +const lengths = new Uint8Array(288 + 32); + +/* ----------------------- * + * -- utility functions -- * + * ----------------------- */ + +/* build extra bits and base tables */ +function tinf_build_bits_base(bits: Uint8Array, base: Uint16Array, delta: number, first: number): void { + var i, sum; + + /* build bits table */ + for (i = 0; i < delta; ++i) bits[i] = 0; + for (i = 0; i < 30 - delta; ++i) bits[i + delta] = i / delta | 0; + + /* build base table */ + for (sum = first, i = 0; i < 30; ++i) { + base[i] = sum; + sum += 1 << bits[i]; + } +} + +/* build the fixed huffman trees */ +function tinf_build_fixed_trees(lt: Tree, dt: Tree): void { + var i; + + /* build fixed length tree */ + for (i = 0; i < 7; ++i) lt.table[i] = 0; + + lt.table[7] = 24; + lt.table[8] = 152; + lt.table[9] = 112; + + for (i = 0; i < 24; ++i) lt.trans[i] = 256 + i; + for (i = 0; i < 144; ++i) lt.trans[24 + i] = i; + for (i = 0; i < 8; ++i) lt.trans[24 + 144 + i] = 280 + i; + for (i = 0; i < 112; ++i) lt.trans[24 + 144 + 8 + i] = 144 + i; + + /* build fixed distance tree */ + for (i = 0; i < 5; ++i) dt.table[i] = 0; + + dt.table[5] = 32; + + for (i = 0; i < 32; ++i) dt.trans[i] = i; +} + +/* given an array of code lengths, build a tree */ +var offs = new Uint16Array(16); + +function tinf_build_tree(t: Tree, lengths: Uint8Array, off: number, num: number): void { + var i, sum; + + /* clear code length count table */ + for (i = 0; i < 16; ++i) t.table[i] = 0; + + /* scan symbol lengths, and sum code length counts */ + for (i = 0; i < num; ++i) t.table[lengths[off + i]]++; + + t.table[0] = 0; + + /* compute offset table for distribution sort */ + for (sum = 0, i = 0; i < 16; ++i) { + offs[i] = sum; + sum += t.table[i]; + } + + /* create code->symbol translation table (symbols sorted by code) */ + for (i = 0; i < num; ++i) { + if (lengths[off + i]) t.trans[offs[lengths[off + i]]++] = i; + } +} + +/* ---------------------- * + * -- decode functions -- * + * ---------------------- */ + +/* get one bit from source stream */ +function tinf_getbit(d: Data): number { + /* check if tag is empty */ + if (!d.bitcount--) { + /* load next tag */ + d.tag = d.source[d.sourceIndex++]; + d.bitcount = 7; + } + + /* shift bit out of tag */ + var bit = d.tag & 1; + d.tag >>>= 1; + + return bit; +} + +/* read a num bit value from a stream and add base */ +function tinf_read_bits(d: Data, num: number, base: number): number { + if (!num) + return base; + + while (d.bitcount < 24) { + d.tag |= d.source[d.sourceIndex++] << d.bitcount; + d.bitcount += 8; + } + + var val = d.tag & (0xffff >>> (16 - num)); + d.tag >>>= num; + d.bitcount -= num; + return val + base; +} + +/* given a data stream and a tree, decode a symbol */ +function tinf_decode_symbol(d: Data, t: Tree): number { + while (d.bitcount < 24) { + d.tag |= d.source[d.sourceIndex++] << d.bitcount; + d.bitcount += 8; + } + + var sum = 0, cur = 0, len = 0; + var tag = d.tag; + + /* get more bits while code value is above sum */ + do { + cur = 2 * cur + (tag & 1); + tag >>>= 1; + ++len; + + sum += t.table[len]; + cur -= t.table[len]; + } while (cur >= 0); + + d.tag = tag; + d.bitcount -= len; + + return t.trans[sum + cur]; +} + +/* given a data stream, decode dynamic trees from it */ +function tinf_decode_trees(d: Data, lt: Tree, dt: Tree): void { + var hlit, hdist, hclen; + var i, num, length; + + /* get 5 bits HLIT (257-286) */ + hlit = tinf_read_bits(d, 5, 257); + + /* get 5 bits HDIST (1-32) */ + hdist = tinf_read_bits(d, 5, 1); + + /* get 4 bits HCLEN (4-19) */ + hclen = tinf_read_bits(d, 4, 4); + + for (i = 0; i < 19; ++i) lengths[i] = 0; + + /* read code lengths for code length alphabet */ + for (i = 0; i < hclen; ++i) { + /* get 3 bits code length (0-7) */ + var clen = tinf_read_bits(d, 3, 0); + lengths[clcidx[i]] = clen; + } + + /* build code length tree */ + tinf_build_tree(code_tree, lengths, 0, 19); + + /* decode code lengths for the dynamic trees */ + for (num = 0; num < hlit + hdist;) { + var sym = tinf_decode_symbol(d, code_tree); + + switch (sym) { + case 16: + /* copy previous code length 3-6 times (read 2 bits) */ + var prev = lengths[num - 1]; + for (length = tinf_read_bits(d, 2, 3); length; --length) { + lengths[num++] = prev; + } + break; + case 17: + /* repeat code length 0 for 3-10 times (read 3 bits) */ + for (length = tinf_read_bits(d, 3, 3); length; --length) { + lengths[num++] = 0; + } + break; + case 18: + /* repeat code length 0 for 11-138 times (read 7 bits) */ + for (length = tinf_read_bits(d, 7, 11); length; --length) { + lengths[num++] = 0; + } + break; + default: + /* values 0-15 represent the actual code lengths */ + lengths[num++] = sym; + break; + } + } + + /* build dynamic trees */ + tinf_build_tree(lt, lengths, 0, hlit); + tinf_build_tree(dt, lengths, hlit, hdist); +} + +/* ----------------------------- * + * -- block inflate functions -- * + * ----------------------------- */ + +/* given a stream and two trees, inflate a block of data */ +function tinf_inflate_block_data(d: Data, lt: Tree, dt: Tree): number { + for (;;) { + var sym = tinf_decode_symbol(d, lt); + + /* check for end of block */ + if (sym === 256) { + return TINF_OK; + } + + if (sym < 256) { + d.dest[d.destLen++] = sym; + } else { + var length, dist, offs; + var i; + + sym -= 257; + + /* possibly get more bits from length code */ + length = tinf_read_bits(d, length_bits[sym], length_base[sym]); + + dist = tinf_decode_symbol(d, dt); + + /* possibly get more bits from distance code */ + offs = d.destLen - tinf_read_bits(d, dist_bits[dist], dist_base[dist]); + + /* copy match */ + for (i = offs; i < offs + length; ++i) { + d.dest[d.destLen++] = d.dest[i]; + } + } + } +} + +/* inflate an uncompressed block of data */ +function tinf_inflate_uncompressed_block(d: Data) { + var length, invlength; + var i; + + /* unread from bitbuffer */ + while (d.bitcount > 8) { + d.sourceIndex--; + d.bitcount -= 8; + } + + /* get length */ + length = d.source[d.sourceIndex + 1]; + length = 256 * length + d.source[d.sourceIndex]; + + /* get one's complement of length */ + invlength = d.source[d.sourceIndex + 3]; + invlength = 256 * invlength + d.source[d.sourceIndex + 2]; + + /* check length */ + if (length !== (~invlength & 0x0000ffff)) + return TINF_DATA_ERROR; + + d.sourceIndex += 4; + + /* copy block */ + for (i = length; i; --i) + d.dest[d.destLen++] = d.source[d.sourceIndex++]; + + /* make sure we start next block on a byte boundary */ + d.bitcount = 0; + + return TINF_OK; +} + +/* inflate stream from source to dest */ +function tinf_uncompress(source: Uint8Array, dest: Uint8Array) { + var d = new Data(source, dest); + var bfinal, btype, res; + + do { + /* read final block flag */ + bfinal = tinf_getbit(d); + + /* read block type (2 bits) */ + btype = tinf_read_bits(d, 2, 0); + + /* decompress block */ + switch (btype) { + case 0: + /* decompress uncompressed block */ + res = tinf_inflate_uncompressed_block(d); + break; + case 1: + /* decompress block with fixed huffman trees */ + res = tinf_inflate_block_data(d, sltree, sdtree); + break; + case 2: + /* decompress block with dynamic huffman trees */ + tinf_decode_trees(d, d.ltree, d.dtree); + res = tinf_inflate_block_data(d, d.ltree, d.dtree); + break; + default: + res = TINF_DATA_ERROR; + } + + if (res !== TINF_OK) + throw new Error('Data error'); + + } while (!bfinal); + + if (d.destLen < d.dest.length) { + if (typeof d.dest.slice === 'function') + return d.dest.slice(0, d.destLen); + else + return d.dest.subarray(0, d.destLen); + } + + return d.dest; +} + +/* -------------------- * + * -- initialization -- * + * -------------------- */ + +/* build fixed huffman trees */ +tinf_build_fixed_trees(sltree, sdtree); + +/* build extra bits and base tables */ +tinf_build_bits_base(length_bits, length_base, 4, 3); +tinf_build_bits_base(dist_bits, dist_base, 2, 1); + +/* fix a special case */ +length_bits[28] = 0; +length_base[28] = 258; + +export default tinf_uncompress diff --git a/addons/xterm-addon-unicode-graphemes/src/tsconfig.json b/addons/xterm-addon-unicode-graphemes/src/tsconfig.json new file mode 100644 index 0000000000..f5489fcc1b --- /dev/null +++ b/addons/xterm-addon-unicode-graphemes/src/tsconfig.json @@ -0,0 +1,33 @@ +{ + "compilerOptions": { + "module": "commonjs", + "target": "es2015", + "lib": [ + "dom", + "es2015" + ], + "rootDir": ".", + "outDir": "../out", + "sourceMap": true, + "removeComments": true, + "strict": true, + "baseUrl": ".", + "paths": { + "common/*": [ + "../../../src/common/*" + ] + }, + "types": [ + "../../../node_modules/@types/mocha" + ] + }, + "include": [ + "./**/*", + "../../../typings/xterm.d.ts" + ], + "references": [ + { + "path": "../../../src/common" + } + ] +} diff --git a/addons/xterm-addon-unicode-graphemes/src/unicode-trie.ts b/addons/xterm-addon-unicode-graphemes/src/unicode-trie.ts new file mode 100644 index 0000000000..2125f04d67 --- /dev/null +++ b/addons/xterm-addon-unicode-graphemes/src/unicode-trie.ts @@ -0,0 +1,134 @@ +import inflate from './tiny-inflate' + +// Shift size for getting the index-1 table offset. +const SHIFT_1 = 6 + 5; + +// Shift size for getting the index-2 table offset. +const SHIFT_2 = 5; + +// Difference between the two shift sizes, +// for getting an index-1 offset from an index-2 offset. 6=11-5 +const SHIFT_1_2 = SHIFT_1 - SHIFT_2; + +// Number of index-1 entries for the BMP. 32=0x20 +// This part of the index-1 table is omitted from the serialized form. +const OMITTED_BMP_INDEX_1_LENGTH = 0x10000 >> SHIFT_1; + +// Number of entries in an index-2 block. 64=0x40 +const INDEX_2_BLOCK_LENGTH = 1 << SHIFT_1_2; + +// Mask for getting the lower bits for the in-index-2-block offset. */ +const INDEX_2_MASK = INDEX_2_BLOCK_LENGTH - 1; + +// Shift size for shifting left the index array values. +// Increases possible data size with 16-bit index values at the cost +// of compactability. +// This requires data blocks to be aligned by DATA_GRANULARITY. +const INDEX_SHIFT = 2; + +// Number of entries in a data block. 32=0x20 +const DATA_BLOCK_LENGTH = 1 << SHIFT_2; + +// Mask for getting the lower bits for the in-data-block offset. +const DATA_MASK = DATA_BLOCK_LENGTH - 1; + +// The part of the index-2 table for U+D800..U+DBFF stores values for +// lead surrogate code _units_ not code _points_. +// Values for lead surrogate code _points_ are indexed with this portion of the table. +// Length=32=0x20=0x400>>SHIFT_2. (There are 1024=0x400 lead surrogates.) +const LSCP_INDEX_2_OFFSET = 0x10000 >> SHIFT_2; +const LSCP_INDEX_2_LENGTH = 0x400 >> SHIFT_2; + +// Count the lengths of both BMP pieces. 2080=0x820 +const INDEX_2_BMP_LENGTH = LSCP_INDEX_2_OFFSET + LSCP_INDEX_2_LENGTH; + +// The 2-byte UTF-8 version of the index-2 table follows at offset 2080=0x820. +// Length 32=0x20 for lead bytes C0..DF, regardless of SHIFT_2. +const UTF8_2B_INDEX_2_OFFSET = INDEX_2_BMP_LENGTH; +const UTF8_2B_INDEX_2_LENGTH = 0x800 >> 6; // U+0800 is the first code point after 2-byte UTF-8 + +// The index-1 table, only used for supplementary code points, at offset 2112=0x840. +// Variable length, for code points up to highStart, where the last single-value range starts. +// Maximum length 512=0x200=0x100000>>SHIFT_1. +// (For 0x100000 supplementary code points U+10000..U+10ffff.) +// +// The part of the index-2 table for supplementary code points starts +// after this index-1 table. +// +// Both the index-1 table and the following part of the index-2 table +// are omitted completely if there is only BMP data. +const INDEX_1_OFFSET = UTF8_2B_INDEX_2_OFFSET + UTF8_2B_INDEX_2_LENGTH; + +// The alignment size of a data block. Also the granularity for compaction. +const DATA_GRANULARITY = 1 << INDEX_SHIFT; + +const isBigEndian = (new Uint8Array(new Uint32Array([0x12345678]).buffer)[0] === 0x12); + +class UnicodeTrie { + private data: Uint32Array; + private highStart: number; + private errorValue: number; + constructor(data: Uint8Array) { + // read binary format + + const view = new DataView(data.buffer); + this.highStart = view.getUint32(0, true); + this.errorValue = view.getUint32(4, true); + let uncompressedLength = view.getUint32(8, true); + data = data.subarray(12); + + // double inflate the actual trie data + data = inflate(data, new Uint8Array(uncompressedLength)); + data = inflate(data, new Uint8Array(uncompressedLength)); + + if (isBigEndian) { + // swap bytes from little-endian + const len = data.length; + for (let i = 0; i < len; i += 4) { + // Exchange data[i] and data[i + 3]: + let x = data[i]; data[i] = data[i+3]; data[i+3] = x; + // Exchange data[i + 1] and data[i + 2]: + let y = data[i+1]; data[i+1] = data[i+2]; data[i+2] = y; + } + } + + this.data = new Uint32Array(data.buffer); + + } + + get(codePoint: number): number { + let index; + if ((codePoint < 0) || (codePoint > 0x10ffff)) { + return this.errorValue; + } + + if ((codePoint < 0xd800) || ((codePoint > 0xdbff) && (codePoint <= 0xffff))) { + // Ordinary BMP code point, excluding leading surrogates. + // BMP uses a single level lookup. BMP index starts at offset 0 in the index. + // data is stored in the index array itself. + index = (this.data[codePoint >> SHIFT_2] << INDEX_SHIFT) + (codePoint & DATA_MASK); + return this.data[index]; + } + + if (codePoint <= 0xffff) { + // Lead Surrogate Code Point. A Separate index section is stored for + // lead surrogate code units and code points. + // The main index has the code unit data. + // For this function, we need the code point data. + index = (this.data[LSCP_INDEX_2_OFFSET + ((codePoint - 0xd800) >> SHIFT_2)] << INDEX_SHIFT) + (codePoint & DATA_MASK); + return this.data[index]; + } + + if (codePoint < this.highStart) { + // Supplemental code point, use two-level lookup. + index = this.data[(INDEX_1_OFFSET - OMITTED_BMP_INDEX_1_LENGTH) + (codePoint >> SHIFT_1)]; + index = this.data[index + ((codePoint >> SHIFT_2) & INDEX_2_MASK)]; + index = (index << INDEX_SHIFT) + (codePoint & DATA_MASK); + return this.data[index]; + } + + return this.data[this.data.length - DATA_GRANULARITY]; + } +} + +export default UnicodeTrie diff --git a/addons/xterm-addon-unicode-graphemes/tsconfig.json b/addons/xterm-addon-unicode-graphemes/tsconfig.json new file mode 100644 index 0000000000..2d820dd1a6 --- /dev/null +++ b/addons/xterm-addon-unicode-graphemes/tsconfig.json @@ -0,0 +1,8 @@ +{ + "files": [], + "include": [], + "references": [ + { "path": "./src" }, + { "path": "./test" } + ] +} diff --git a/addons/xterm-addon-unicode-graphemes/typings/xterm-addon-unicode11.d.ts b/addons/xterm-addon-unicode-graphemes/typings/xterm-addon-unicode11.d.ts new file mode 100644 index 0000000000..1d0dce1b1b --- /dev/null +++ b/addons/xterm-addon-unicode-graphemes/typings/xterm-addon-unicode11.d.ts @@ -0,0 +1,14 @@ +/** + * Copyright (c) 2017 The xterm.js authors. All rights reserved. + * @license MIT + */ + +import { Terminal, ITerminalAddon } from 'xterm'; + +declare module 'xterm-addon-unicode11' { + export class Unicode11Addon implements ITerminalAddon { + constructor(); + public activate(terminal: Terminal): void; + public dispose(): void; + } +} diff --git a/addons/xterm-addon-unicode-graphemes/webpack.config.js b/addons/xterm-addon-unicode-graphemes/webpack.config.js new file mode 100644 index 0000000000..89abf53aad --- /dev/null +++ b/addons/xterm-addon-unicode-graphemes/webpack.config.js @@ -0,0 +1,38 @@ +/** + * Copyright (c) 2019 The xterm.js authors. All rights reserved. + * @license MIT + */ + +const path = require('path'); + +const addonName = 'UnicodeGraphemesAddon'; +const mainFile = 'xterm-addon-unicode-graphemes.js'; + +module.exports = { + entry: `./out/${addonName}.js`, + devtool: 'source-map', + module: { + rules: [ + { + test: /\.js$/, + use: ["source-map-loader"], + enforce: "pre", + exclude: /node_modules/ + } + ] + }, + resolve: { + modules: ['./node_modules'], + extensions: [ '.js' ], + alias: { + common: path.resolve('../../out/common') + } + }, + output: { + filename: mainFile, + path: path.resolve('./lib'), + library: addonName, + libraryTarget: 'umd' + }, + mode: 'production' +}; diff --git a/addons/xterm-addon-unicode11/src/UnicodeV11.ts b/addons/xterm-addon-unicode11/src/UnicodeV11.ts index b616091ab3..c58e2fde25 100644 --- a/addons/xterm-addon-unicode11/src/UnicodeV11.ts +++ b/addons/xterm-addon-unicode11/src/UnicodeV11.ts @@ -4,8 +4,8 @@ */ import { IUnicodeVersionProvider } from 'xterm'; - -type CharWidth = 0 | 1 | 2; +import { UnicodeCharProperties, UnicodeCharWidth } from 'common/services/Services'; +import { UnicodeService } from 'common/services/UnicodeService'; const BMP_COMBINING = [ [0x0300, 0x036F], [0x0483, 0x0489], [0x0591, 0x05BD], @@ -210,12 +210,28 @@ export class UnicodeV11 implements IUnicodeVersionProvider { } } - public wcwidth(num: number): CharWidth { + public wcwidth(num: number): UnicodeCharWidth { if (num < 32) return 0; if (num < 127) return 1; - if (num < 65536) return table[num] as CharWidth; + if (num < 65536) return table[num] as UnicodeCharWidth; if (bisearch(num, HIGH_COMBINING)) return 0; if (bisearch(num, HIGH_WIDE)) return 2; return 1; } + + charProperties(codepoint: number, preceding: UnicodeCharProperties): UnicodeCharProperties { + let width = this.wcwidth(codepoint); + let shouldJoin = width === 0; + if (shouldJoin) { + let oldWidth = preceding === 0 ? 0 + : UnicodeService.extractWidth(preceding); + if (oldWidth === 0) { + width = 1; + shouldJoin = false; + } else if (oldWidth > width) { + width = oldWidth; + } + } + return UnicodeService.createPropertyValue(0, width, shouldJoin); + } } diff --git a/src/common/InputHandler.ts b/src/common/InputHandler.ts index fa65b93bac..4412a3aedc 100644 --- a/src/common/InputHandler.ts +++ b/src/common/InputHandler.ts @@ -10,13 +10,14 @@ import { CHARSETS, DEFAULT_CHARSET } from 'common/data/Charsets'; import { EscapeSequenceParser } from 'common/parser/EscapeSequenceParser'; import { Disposable } from 'common/Lifecycle'; import { StringToUtf32, stringFromCodePoint, Utf8ToUtf32 } from 'common/input/TextDecoder'; -import { DEFAULT_ATTR_DATA } from 'common/buffer/BufferLine'; +import { BufferLine, DEFAULT_ATTR_DATA } from 'common/buffer/BufferLine'; import { EventEmitter } from 'common/EventEmitter'; import { IParsingState, IEscapeSequenceParser, IParams, IFunctionIdentifier } from 'common/parser/Types'; import { NULL_CELL_CODE, NULL_CELL_WIDTH, Attributes, FgFlags, BgFlags, Content, UnderlineStyle } from 'common/buffer/Constants'; import { CellData } from 'common/buffer/CellData'; import { AttributeData } from 'common/buffer/AttributeData'; import { ICoreService, IBufferService, IOptionsService, ILogService, ICoreMouseService, ICharsetService, IUnicodeService, LogLevelEnum, IOscLinkService } from 'common/services/Services'; +import { UnicodeService } from 'common/services/UnicodeService'; import { OscHandler } from 'common/parser/OscParser'; import { DcsHandler } from 'common/parser/DcsParser'; import { IBuffer } from 'common/buffer/Types'; @@ -176,7 +177,7 @@ export class InputHandler extends Disposable implements IInputHandler { private readonly _oscLinkService: IOscLinkService, private readonly _coreMouseService: ICoreMouseService, private readonly _unicodeService: IUnicodeService, - private readonly _parser: IEscapeSequenceParser = new EscapeSequenceParser() + private readonly _parser: EscapeSequenceParser = new EscapeSequenceParser() ) { super(); this.register(this._parser); @@ -519,10 +520,6 @@ export class InputHandler extends Disposable implements IInputHandler { for (let pos = start; pos < end; ++pos) { code = data[pos]; - // calculate print space - // expensive call, therefore we save width in line buffer - chWidth = this._unicodeService.wcwidth(code); - // get charset replacement character // charset is only defined for ASCII, therefore we only // search for an replacement char if code < 127 @@ -533,6 +530,16 @@ export class InputHandler extends Disposable implements IInputHandler { } } + let precedingInfo = this._parser.precedingCodepoint === 0 ? 0 + : this._parser.precedingJoinState; + // calculate print space + // expensive call, therefore we save width in line buffer + let currentInfo = this._unicodeService.charProperties(code, precedingInfo); + let chWidth = UnicodeService.extractWidth(currentInfo); + let shouldJoin = UnicodeService.extractShouldJoin(currentInfo); + const oldWidth = shouldJoin ? UnicodeService.extractWidth(precedingInfo) : 0; + this._parser.precedingCodepoint = code; + this._parser.precedingJoinState = currentInfo; if (screenReaderMode) { this._onA11yChar.fire(stringFromCodePoint(code)); } @@ -540,34 +547,16 @@ export class InputHandler extends Disposable implements IInputHandler { this._oscLinkService.addLineToLink(this._getCurrentLinkId(), this._activeBuffer.ybase + this._activeBuffer.y); } - // insert combining char at last cursor position - // this._activeBuffer.x should never be 0 for a combining char - // since they always follow a cell consuming char - // therefore we can test for this._activeBuffer.x to avoid overflow left - if (!chWidth && this._activeBuffer.x) { - if (!bufferRow.getWidth(this._activeBuffer.x - 1)) { - // found empty cell after fullwidth, need to go 2 cells back - // it is save to step 2 cells back here - // since an empty cell is only set by fullwidth chars - bufferRow.addCodepointToCell(this._activeBuffer.x - 2, code); - } else { - bufferRow.addCodepointToCell(this._activeBuffer.x - 1, code); - } - continue; - } - // goto next line if ch would overflow // NOTE: To avoid costly width checks here, // the terminal does not allow a cols < 2. - if (this._activeBuffer.x + chWidth - 1 >= cols) { + if (this._activeBuffer.x + chWidth - oldWidth > cols) { // autowrap - DECAWM // automatically wraps to the beginning of the next line if (wraparoundMode) { - // clear left over cells to the right - while (this._activeBuffer.x < cols) { - bufferRow.setCellFromCodePoint(this._activeBuffer.x++, 0, 1, curAttr.fg, curAttr.bg, curAttr.extended); - } - this._activeBuffer.x = 0; + const oldRow = bufferRow; + let oldCol = this._activeBuffer.x - oldWidth; + this._activeBuffer.x = oldWidth; this._activeBuffer.y++; if (this._activeBuffer.y === this._activeBuffer.scrollBottom + 1) { this._activeBuffer.y--; @@ -582,6 +571,16 @@ export class InputHandler extends Disposable implements IInputHandler { } // row changed, get it again bufferRow = this._activeBuffer.lines.get(this._activeBuffer.ybase + this._activeBuffer.y)!; + if (oldWidth > 0 && bufferRow instanceof BufferLine) { + // Combining character widens 1 column to 2. + // Move old character to next line. + bufferRow.copyCellsFrom(oldRow as BufferLine, + oldCol, 0, oldWidth, false); + } + // clear left over cells to the right + while (oldCol < cols) { + oldRow.setCellFromCodePoint(oldCol++, 0, 1, curAttr.fg, curAttr.bg, curAttr.extended); + } } else { this._activeBuffer.x = cols - 1; if (chWidth === 2) { @@ -592,6 +591,21 @@ export class InputHandler extends Disposable implements IInputHandler { } } + // insert combining char at last cursor position + // this._activeBuffer.x should never be 0 for a combining char + // since they always follow a cell consuming char + // therefore we can test for this._activeBuffer.x to avoid overflow left + if (shouldJoin && this._activeBuffer.x) { + const offset = bufferRow.getWidth(this._activeBuffer.x - 1) ? 1 : 2 + // if empty cell after fullwidth, need to go 2 cells back + // it is save to step 2 cells back here + // since an empty cell is only set by fullwidth chars + bufferRow.addCodepointToCell(this._activeBuffer.x - offset, + code, chWidth); + this._activeBuffer.x += chWidth - oldWidth; + continue; + } + // insert mode: move characters to right if (insertMode) { // right shift cells according to the width @@ -617,6 +631,7 @@ export class InputHandler extends Disposable implements IInputHandler { } } } + /* // store last char in Parser.precedingCodepoint for REP to work correctly // This needs to check whether: // - fullwidth + surrogates: reset @@ -631,7 +646,7 @@ export class InputHandler extends Disposable implements IInputHandler { this._parser.precedingCodepoint = this._workCell.content; } } - + */ // handle wide chars: reset cell to the right if it is second cell of a wide char if (this._activeBuffer.x < cols && end - start > 0 && bufferRow.getWidth(this._activeBuffer.x) === 0 && !bufferRow.hasContent(this._activeBuffer.x)) { bufferRow.setCellFromCodePoint(this._activeBuffer.x, 0, 1, curAttr.fg, curAttr.bg, curAttr.extended); diff --git a/src/common/TestUtils.test.ts b/src/common/TestUtils.test.ts index cae90ec803..513465838f 100644 --- a/src/common/TestUtils.test.ts +++ b/src/common/TestUtils.test.ts @@ -3,7 +3,8 @@ * @license MIT */ -import { IBufferService, ICoreService, ILogService, IOptionsService, ITerminalOptions, ICoreMouseService, ICharsetService, IUnicodeService, IUnicodeVersionProvider, LogLevelEnum, IDecorationService, IInternalDecoration, IOscLinkService } from 'common/services/Services'; +import { IBufferService, ICoreService, ILogService, IOptionsService, ITerminalOptions, ICoreMouseService, ICharsetService, UnicodeCharProperties, UnicodeCharWidth, IUnicodeService, IUnicodeVersionProvider, LogLevelEnum, IDecorationService, IInternalDecoration, IOscLinkService } from 'common/services/Services'; +import { UnicodeService } from 'common/services/UnicodeService'; import { IEvent, EventEmitter } from 'common/EventEmitter'; import { clone } from 'common/Clone'; import { DEFAULT_OPTIONS } from 'common/services/OptionsService'; @@ -167,7 +168,12 @@ export class MockUnicodeService implements IUnicodeService { public versions: string[] = []; public activeVersion: string = ''; public onChange: IEvent = new EventEmitter().event; - public wcwidth = (codepoint: number): number => this._provider.wcwidth(codepoint); + public wcwidth = (codepoint: number): UnicodeCharWidth => this._provider.wcwidth(codepoint); + public charProperties(codepoint: number, preceding: UnicodeCharProperties): UnicodeCharProperties { + const w = this.wcwidth(codepoint); + const shouldJoin = w !== 0; + return UnicodeService.createPropertyValue(0, w, shouldJoin); + } public getStringCellWidth(s: string): number { throw new Error('Method not implemented.'); } diff --git a/src/common/Types.d.ts b/src/common/Types.d.ts index 734715122d..a558e49468 100644 --- a/src/common/Types.d.ts +++ b/src/common/Types.d.ts @@ -231,7 +231,7 @@ export interface IBufferLine { loadCell(index: number, cell: ICellData): ICellData; setCell(index: number, cell: ICellData): void; setCellFromCodePoint(index: number, codePoint: number, width: number, fg: number, bg: number, eAttrs: IExtendedAttrs): void; - addCodepointToCell(index: number, codePoint: number): void; + addCodepointToCell(index: number, codePoint: number, width: number): void; insertCells(pos: number, n: number, ch: ICellData, eraseAttr?: IAttributeData): void; deleteCells(pos: number, n: number, fill: ICellData, eraseAttr?: IAttributeData): void; replaceCells(start: number, end: number, fill: ICellData, eraseAttr?: IAttributeData, respectProtect?: boolean): void; diff --git a/src/common/buffer/BufferLine.test.ts b/src/common/buffer/BufferLine.test.ts index 111aae036c..f2819aa8c4 100644 --- a/src/common/buffer/BufferLine.test.ts +++ b/src/common/buffer/BufferLine.test.ts @@ -431,7 +431,7 @@ describe('BufferLine', function(): void { describe('addCharToCell', () => { it('should set width to 1 for empty cell', () => { const line = new TestBufferLine(3, CellData.fromCharData([DEFAULT_ATTR, NULL_CELL_CHAR, NULL_CELL_WIDTH, NULL_CELL_CODE]), false); - line.addCodepointToCell(0, '\u0301'.charCodeAt(0)); + line.addCodepointToCell(0, '\u0301'.charCodeAt(0), 0); const cell = line.loadCell(0, new CellData()); // chars contains single combining char // width is set to 1 @@ -444,7 +444,7 @@ describe('BufferLine', function(): void { const cell = line .loadCell(0, new CellData()); cell.setFromCharData([123, 'e\u0301', 1, 'e\u0301'.charCodeAt(1)]); line.setCell(0, cell); - line.addCodepointToCell(0, '\u0301'.charCodeAt(0)); + line.addCodepointToCell(0, '\u0301'.charCodeAt(0), 0); line.loadCell(0, cell); // chars contains 3 chars // width is set to 1 @@ -457,7 +457,7 @@ describe('BufferLine', function(): void { const cell = line .loadCell(0, new CellData()); cell.setFromCharData([123, 'e', 1, 'e'.charCodeAt(1)]); line.setCell(0, cell); - line.addCodepointToCell(0, '\u0301'.charCodeAt(0)); + line.addCodepointToCell(0, '\u0301'.charCodeAt(0), 0); line.loadCell(0, cell); // chars contains 2 chars // width is set to 1 diff --git a/src/common/buffer/BufferLine.ts b/src/common/buffer/BufferLine.ts index d5f4384455..6059d2c5f4 100644 --- a/src/common/buffer/BufferLine.ts +++ b/src/common/buffer/BufferLine.ts @@ -227,7 +227,7 @@ export class BufferLine implements IBufferLine { * onto a leading char. Since we already set the attrs * by the previous `setDataFromCodePoint` call, we can omit it here. */ - public addCodepointToCell(index: number, codePoint: number): void { + public addCodepointToCell(index: number, codePoint: number, width: number): void { let content = this._data[index * CELL_SIZE + Cell.CONTENT]; if (content & Content.IS_COMBINED_MASK) { // we already have a combined string, simply add @@ -245,8 +245,12 @@ export class BufferLine implements IBufferLine { // simply set the data in the cell buffer with a width of 1 content = codePoint | (1 << Content.WIDTH_SHIFT); } - this._data[index * CELL_SIZE + Cell.CONTENT] = content; } + if (width) { + content &= ~Content.WIDTH_MASK; + content |= width << Content.WIDTH_SHIFT; + } + this._data[index * CELL_SIZE + Cell.CONTENT] = content; } public insertCells(pos: number, n: number, fillCellData: ICellData, eraseAttr?: IAttributeData): void { diff --git a/src/common/input/UnicodeV6.ts b/src/common/input/UnicodeV6.ts index bf63a18b22..352d992028 100644 --- a/src/common/input/UnicodeV6.ts +++ b/src/common/input/UnicodeV6.ts @@ -3,8 +3,8 @@ * @license MIT */ import { IUnicodeVersionProvider } from 'common/services/Services'; - -type CharWidth = 0 | 1 | 2; +import { UnicodeCharProperties, UnicodeCharWidth } from 'common/services/Services'; +import { UnicodeService } from 'common/services/UnicodeService'; const BMP_COMBINING = [ [0x0300, 0x036F], [0x0483, 0x0486], [0x0488, 0x0489], @@ -121,12 +121,28 @@ export class UnicodeV6 implements IUnicodeVersionProvider { } } - public wcwidth(num: number): CharWidth { + public wcwidth(num: number): UnicodeCharWidth { if (num < 32) return 0; if (num < 127) return 1; - if (num < 65536) return table[num] as CharWidth; + if (num < 65536) return table[num] as UnicodeCharWidth; if (bisearch(num, HIGH_COMBINING)) return 0; if ((num >= 0x20000 && num <= 0x2fffd) || (num >= 0x30000 && num <= 0x3fffd)) return 2; return 1; } + + charProperties(codepoint: number, preceding: UnicodeCharProperties): UnicodeCharProperties { + let width = this.wcwidth(codepoint); + let shouldJoin = width === 0; + if (shouldJoin) { + let oldWidth = preceding === 0 ? 0 + : UnicodeService.extractWidth(preceding); + if (oldWidth === 0) { + width = 1; + shouldJoin = false; + } else if (oldWidth > width) { + width = oldWidth; + } + } + return UnicodeService.createPropertyValue(0, width, shouldJoin); + } } diff --git a/src/common/parser/EscapeSequenceParser.ts b/src/common/parser/EscapeSequenceParser.ts index 2f3ddd9296..f8da3b3854 100644 --- a/src/common/parser/EscapeSequenceParser.ts +++ b/src/common/parser/EscapeSequenceParser.ts @@ -231,6 +231,9 @@ export class EscapeSequenceParser extends Disposable implements IEscapeSequenceP public initialState: number; public currentState: number; public precedingCodepoint: number; + // Cached result of getJoinProperties(..., precedingCodepoint). + // Only valid if precedingCodepoint !== 0 + public precedingJoinState: number = -1; // UnicodeJoinProperties // buffers over several parse calls protected _params: Params; diff --git a/src/common/services/Services.ts b/src/common/services/Services.ts index b2009690fe..d9785d353b 100644 --- a/src/common/services/Services.ts +++ b/src/common/services/Services.ts @@ -294,6 +294,11 @@ export interface IOscLinkService { getLinkData(linkId: number): IOscLinkData | undefined; } +/** Width and Grapheme_Cluster_Break properties of a character. */ +export type UnicodeCharProperties = number; +export const UnicodeInitialProperties: UnicodeCharProperties = 0; // UNEEDED? +export type UnicodeCharWidth = 0 | 1 | 2; + export const IUnicodeService = createDecorator('UnicodeService'); export interface IUnicodeService { serviceBrand: undefined; @@ -309,13 +314,18 @@ export interface IUnicodeService { /** * Unicode version dependent */ - wcwidth(codepoint: number): number; + wcwidth(codepoint: number): UnicodeCharWidth; getStringCellWidth(s: string): number; + /** Return character width, character type (for grapheme clustering). + * If preceding!=0, it is return code from previous character; + * in that case result specifies if characters should be joined. */ + charProperties(codepoint: number, preceding: UnicodeCharProperties): UnicodeCharProperties; } export interface IUnicodeVersionProvider { readonly version: string; - wcwidth(ucs: number): 0 | 1 | 2; + wcwidth(ucs: number): UnicodeCharWidth; + charProperties(codepoint: number, preceding: UnicodeCharProperties): UnicodeCharProperties; } export const IDecorationService = createDecorator('DecorationService'); diff --git a/src/common/services/UnicodeService.test.ts b/src/common/services/UnicodeService.test.ts index a2c4b6367f..110e725cbc 100644 --- a/src/common/services/UnicodeService.test.ts +++ b/src/common/services/UnicodeService.test.ts @@ -12,6 +12,9 @@ class DummyProvider implements IUnicodeVersionProvider { public wcwidth(n: number): 0 | 1 | 2 { return 2; } + charProperties(codepoint: number): number { + return UnicodeService.createPropertyValue(0, this.wcwidth(codepoint)); + } } describe('unicode provider', () => { diff --git a/src/common/services/UnicodeService.ts b/src/common/services/UnicodeService.ts index 5c5b74f698..da0eae118e 100644 --- a/src/common/services/UnicodeService.ts +++ b/src/common/services/UnicodeService.ts @@ -2,7 +2,7 @@ * Copyright (c) 2019 The xterm.js authors. All rights reserved. * @license MIT */ -import { IUnicodeService, IUnicodeVersionProvider } from 'common/services/Services'; +import { IUnicodeService, IUnicodeVersionProvider, UnicodeCharProperties, UnicodeCharWidth } from 'common/services/Services'; import { EventEmitter, IEvent } from 'common/EventEmitter'; import { UnicodeV6 } from 'common/input/UnicodeV6'; @@ -16,6 +16,19 @@ export class UnicodeService implements IUnicodeService { private readonly _onChange = new EventEmitter(); public readonly onChange = this._onChange.event; + public static extractShouldJoin(value: UnicodeCharProperties): boolean { + return (value & 1) !== 0; + } + public static extractWidth(value: UnicodeCharProperties): UnicodeCharWidth { + return ((value >> 1) & 0x3) as UnicodeCharWidth; + } + public static extractCharKind(value: UnicodeCharProperties): number { + return value >> 3; + } + public static createPropertyValue(state: number, width: number, shouldJoin: boolean = false): UnicodeCharProperties { + return ((state & 0xffffff) << 3) | ((width & 3) << 1) | (shouldJoin?1:0); + } + constructor() { const defaultProvider = new UnicodeV6(); this.register(defaultProvider); @@ -51,7 +64,7 @@ export class UnicodeService implements IUnicodeService { /** * Unicode version dependent interface. */ - public wcwidth(num: number): number { + public wcwidth(num: number): UnicodeCharWidth { return this._activeProvider.wcwidth(num); } @@ -83,4 +96,8 @@ export class UnicodeService implements IUnicodeService { } return result; } + + charProperties(codepoint: number, preceding: UnicodeCharProperties): UnicodeCharProperties { + return this._activeProvider.charProperties(codepoint, preceding); + } } diff --git a/tsconfig.all.json b/tsconfig.all.json index 4d2df3066a..c678a4cc3a 100644 --- a/tsconfig.all.json +++ b/tsconfig.all.json @@ -13,6 +13,7 @@ { "path": "./addons/xterm-addon-search" }, { "path": "./addons/xterm-addon-serialize" }, { "path": "./addons/xterm-addon-unicode11" }, + { "path": "./addons/xterm-addon-unicode-graphemes" }, { "path": "./addons/xterm-addon-web-links" }, { "path": "./addons/xterm-addon-webgl" } ] diff --git a/typings/xterm-headless.d.ts b/typings/xterm-headless.d.ts index 2ceb6a94a4..013998d061 100644 --- a/typings/xterm-headless.d.ts +++ b/typings/xterm-headless.d.ts @@ -1144,6 +1144,7 @@ declare module 'xterm-headless' { * Unicode version dependent wcwidth implementation. */ wcwidth(codepoint: number): 0 | 1 | 2; + charProperties(codepoint: number, preceding: number): number; } /** diff --git a/typings/xterm.d.ts b/typings/xterm.d.ts index 68b37153ea..d22bc61e17 100644 --- a/typings/xterm.d.ts +++ b/typings/xterm.d.ts @@ -1674,6 +1674,7 @@ declare module 'xterm' { * Unicode version dependent wcwidth implementation. */ wcwidth(codepoint: number): 0 | 1 | 2; + charProperties(codepoint: number, preceding: number): number; } /** From 67e968926c7e07224878de04d1b3b45b459fa3fc Mon Sep 17 00:00:00 2001 From: Per Bothner Date: Wed, 17 May 2023 18:37:27 -0700 Subject: [PATCH 02/42] Cleanup - fix various tests. --- .../src/UnicodeGraphemeProvider.ts | 20 +++++--- .../test/UnicodeGraphemesAddon.api.ts | 46 +++++++++++++++++++ .../test/tsconfig.json | 35 ++++++++++++++ ....ts => xterm-addon-unicode-graphemes.d.ts} | 4 +- .../xterm-addon-unicode11/src/UnicodeV11.ts | 8 ++-- src/common/InputHandler.ts | 36 ++++----------- src/common/TestUtils.test.ts | 14 ++++-- src/common/buffer/BufferLine.ts | 2 +- src/common/input/UnicodeV6.ts | 11 ++--- src/common/services/Services.ts | 1 - src/common/services/UnicodeService.test.ts | 2 +- src/common/services/UnicodeService.ts | 11 ++++- 12 files changed, 136 insertions(+), 54 deletions(-) create mode 100644 addons/xterm-addon-unicode-graphemes/test/UnicodeGraphemesAddon.api.ts create mode 100644 addons/xterm-addon-unicode-graphemes/test/tsconfig.json rename addons/xterm-addon-unicode-graphemes/typings/{xterm-addon-unicode11.d.ts => xterm-addon-unicode-graphemes.d.ts} (67%) diff --git a/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemeProvider.ts b/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemeProvider.ts index 69f113fbec..3dd8af9c78 100644 --- a/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemeProvider.ts +++ b/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemeProvider.ts @@ -10,7 +10,7 @@ import * as UC from './UnicodeProperties'; export class UnicodeGraphemeProvider implements IUnicodeVersionProvider { public readonly version = '15-graphemes'; - + public ambiguousCharsAreWide: boolean = false; constructor() { } @@ -19,9 +19,8 @@ export class UnicodeGraphemeProvider implements IUnicodeVersionProvider { let w = UC.infoToWidthInfo(charInfo); let shouldJoin = false; if (w >= 2) { - const preferWide = false; //this.ambiguousCharsAreWide(context); // Treat emoji_presentation_selector as WIDE. - w = w == 3 || preferWide || codepoint === 0xfe0f ? 2 : 1; + w = w == 3 || this.ambiguousCharsAreWide || codepoint === 0xfe0f ? 2 : 1; } else w = 1; if (preceding !== 0) { @@ -31,14 +30,23 @@ export class UnicodeGraphemeProvider implements IUnicodeVersionProvider { if (shouldJoin) { if (oldWidth > w) w = oldWidth; - else if (charInfo === 32) // FIXME UC.GRAPHEME_BREAK_SAW_Regional_Pair) + else if (charInfo === 32) // FIXME UC.GRAPHEME_BREAK_SAW_Regional_Pair) w = 2; } } return UnicodeService.createPropertyValue(charInfo, w, shouldJoin); } - public wcwidth(num: number): UnicodeCharWidth { - return UC.infoToWidth(UC.getInfo(num)); + public wcwidth(codepoint: number): UnicodeCharWidth { + let charInfo = UC.getInfo(codepoint); + let w = UC.infoToWidthInfo(charInfo); + let kind = (charInfo & UC.GRAPHEME_BREAK_MASK) >> UC.GRAPHEME_BREAK_SHIFT; + if (kind === UC.GRAPHEME_BREAK_Extend + || kind === UC.GRAPHEME_BREAK_Prepend) + return 0; + else if (w >= 2) + return w == 3 || this.ambiguousCharsAreWide? 2 : 1; + else + return 1; } } diff --git a/addons/xterm-addon-unicode-graphemes/test/UnicodeGraphemesAddon.api.ts b/addons/xterm-addon-unicode-graphemes/test/UnicodeGraphemesAddon.api.ts new file mode 100644 index 0000000000..f00e023d8c --- /dev/null +++ b/addons/xterm-addon-unicode-graphemes/test/UnicodeGraphemesAddon.api.ts @@ -0,0 +1,46 @@ +/** + * Copyright (c) 2019 The xterm.js authors. All rights reserved. + * @license MIT + */ + +import { assert } from 'chai'; +import { openTerminal, launchBrowser } from '../../../out-test/api/TestUtils'; +import { Browser, Page } from 'playwright'; + +const APP = 'http://127.0.0.1:3001/test'; + +let browser: Browser; +let page: Page; +const width = 800; +const height = 600; + +describe('UnicodeGraphemesAddon', () => { + before(async function(): Promise { + browser = await launchBrowser(); + page = await (await browser.newContext()).newPage(); + await page.setViewportSize({ width, height }); + }); + + after(async () => { + await browser.close(); + }); + + beforeEach(async function(): Promise { + await page.goto(APP); + await openTerminal(page); + }); + const ourVersion = '15-graphemes'; + it('wcwidth V15 emoji test', async () => { + await page.evaluate(` + window.unicode = new UnicodeGraphemesAddon(); + window.term.loadAddon(window.unicode); + `); + // should have loaded '15-graphemes' + assert.deepEqual(await page.evaluate(`window.term.unicode.versions`), ['6', ourVersion]); + // switch should not throw + await page.evaluate(`window.term.unicode.activeVersion = '${ourVersion}';`); + assert.deepEqual(await page.evaluate(`window.term.unicode.activeVersion`), ourVersion); + // v6: 10, V15: 20 + assert.deepEqual(await page.evaluate(`window.term._core.unicodeService.getStringCellWidth('🤣🤣🤣🤣🤣🤣🤣🤣🤣🤣')`), 20); + }); +}); diff --git a/addons/xterm-addon-unicode-graphemes/test/tsconfig.json b/addons/xterm-addon-unicode-graphemes/test/tsconfig.json new file mode 100644 index 0000000000..4b3cb31cfd --- /dev/null +++ b/addons/xterm-addon-unicode-graphemes/test/tsconfig.json @@ -0,0 +1,35 @@ +{ + "compilerOptions": { + "module": "commonjs", + "target": "es2015", + "lib": [ + "dom", + "es2015" + ], + "rootDir": ".", + "outDir": "../out-test", + "sourceMap": true, + "removeComments": true, + "strict": true, + "baseUrl": ".", + "paths": { + "common/*": [ + "../../../src/common/*" + ] + }, + "types": [ + "../../../node_modules/@types/mocha", + "../../../node_modules/@types/node", + "../../../out-test/api/TestUtils" + ] + }, + "include": [ + "./**/*", + "../../../typings/xterm.d.ts" + ], + "references": [ + { + "path": "../../../src/common" + } + ] +} diff --git a/addons/xterm-addon-unicode-graphemes/typings/xterm-addon-unicode11.d.ts b/addons/xterm-addon-unicode-graphemes/typings/xterm-addon-unicode-graphemes.d.ts similarity index 67% rename from addons/xterm-addon-unicode-graphemes/typings/xterm-addon-unicode11.d.ts rename to addons/xterm-addon-unicode-graphemes/typings/xterm-addon-unicode-graphemes.d.ts index 1d0dce1b1b..e4a333504f 100644 --- a/addons/xterm-addon-unicode-graphemes/typings/xterm-addon-unicode11.d.ts +++ b/addons/xterm-addon-unicode-graphemes/typings/xterm-addon-unicode-graphemes.d.ts @@ -1,11 +1,11 @@ /** - * Copyright (c) 2017 The xterm.js authors. All rights reserved. + * Copyright (c) 2023 The xterm.js authors. All rights reserved. * @license MIT */ import { Terminal, ITerminalAddon } from 'xterm'; -declare module 'xterm-addon-unicode11' { +declare module 'xterm-addon-unicode-graphemes' { export class Unicode11Addon implements ITerminalAddon { constructor(); public activate(terminal: Terminal): void; diff --git a/addons/xterm-addon-unicode11/src/UnicodeV11.ts b/addons/xterm-addon-unicode11/src/UnicodeV11.ts index c58e2fde25..c1ef08c169 100644 --- a/addons/xterm-addon-unicode11/src/UnicodeV11.ts +++ b/addons/xterm-addon-unicode11/src/UnicodeV11.ts @@ -219,14 +219,12 @@ export class UnicodeV11 implements IUnicodeVersionProvider { return 1; } - charProperties(codepoint: number, preceding: UnicodeCharProperties): UnicodeCharProperties { + public charProperties(codepoint: number, preceding: UnicodeCharProperties): UnicodeCharProperties { let width = this.wcwidth(codepoint); - let shouldJoin = width === 0; + let shouldJoin = width === 0 && preceding !== 0; if (shouldJoin) { - let oldWidth = preceding === 0 ? 0 - : UnicodeService.extractWidth(preceding); + const oldWidth = UnicodeService.extractWidth(preceding); if (oldWidth === 0) { - width = 1; shouldJoin = false; } else if (oldWidth > width) { width = oldWidth; diff --git a/src/common/InputHandler.ts b/src/common/InputHandler.ts index 4412a3aedc..baabd82f3c 100644 --- a/src/common/InputHandler.ts +++ b/src/common/InputHandler.ts @@ -530,16 +530,15 @@ export class InputHandler extends Disposable implements IInputHandler { } } - let precedingInfo = this._parser.precedingCodepoint === 0 ? 0 + const precedingInfo = this._parser.precedingCodepoint === 0 ? 0 : this._parser.precedingJoinState; - // calculate print space - // expensive call, therefore we save width in line buffer - let currentInfo = this._unicodeService.charProperties(code, precedingInfo); - let chWidth = UnicodeService.extractWidth(currentInfo); - let shouldJoin = UnicodeService.extractShouldJoin(currentInfo); + const currentInfo = this._unicodeService.charProperties(code, precedingInfo); + chWidth = UnicodeService.extractWidth(currentInfo); + const shouldJoin = UnicodeService.extractShouldJoin(currentInfo); const oldWidth = shouldJoin ? UnicodeService.extractWidth(precedingInfo) : 0; this._parser.precedingCodepoint = code; this._parser.precedingJoinState = currentInfo; + if (screenReaderMode) { this._onA11yChar.fire(stringFromCodePoint(code)); } @@ -575,7 +574,7 @@ export class InputHandler extends Disposable implements IInputHandler { // Combining character widens 1 column to 2. // Move old character to next line. bufferRow.copyCellsFrom(oldRow as BufferLine, - oldCol, 0, oldWidth, false); + oldCol, 0, oldWidth, false); } // clear left over cells to the right while (oldCol < cols) { @@ -596,12 +595,12 @@ export class InputHandler extends Disposable implements IInputHandler { // since they always follow a cell consuming char // therefore we can test for this._activeBuffer.x to avoid overflow left if (shouldJoin && this._activeBuffer.x) { - const offset = bufferRow.getWidth(this._activeBuffer.x - 1) ? 1 : 2 + const offset = bufferRow.getWidth(this._activeBuffer.x - 1) ? 1 : 2; // if empty cell after fullwidth, need to go 2 cells back // it is save to step 2 cells back here // since an empty cell is only set by fullwidth chars bufferRow.addCodepointToCell(this._activeBuffer.x - offset, - code, chWidth); + code, chWidth); this._activeBuffer.x += chWidth - oldWidth; continue; } @@ -609,7 +608,7 @@ export class InputHandler extends Disposable implements IInputHandler { // insert mode: move characters to right if (insertMode) { // right shift cells according to the width - bufferRow.insertCells(this._activeBuffer.x, chWidth, this._activeBuffer.getNullCell(curAttr), curAttr); + bufferRow.insertCells(this._activeBuffer.x, chWidth - oldWidth, this._activeBuffer.getNullCell(curAttr), curAttr); // test last cell - since the last cell has only room for // a halfwidth char any fullwidth shifted there is lost // and will be set to empty cell @@ -631,22 +630,7 @@ export class InputHandler extends Disposable implements IInputHandler { } } } - /* - // store last char in Parser.precedingCodepoint for REP to work correctly - // This needs to check whether: - // - fullwidth + surrogates: reset - // - combining: only base char gets carried on (bug in xterm?) - if (end - start > 0) { - bufferRow.loadCell(this._activeBuffer.x - 1, this._workCell); - if (this._workCell.getWidth() === 2 || this._workCell.getCode() > 0xFFFF) { - this._parser.precedingCodepoint = 0; - } else if (this._workCell.isCombined()) { - this._parser.precedingCodepoint = this._workCell.getChars().charCodeAt(0); - } else { - this._parser.precedingCodepoint = this._workCell.content; - } - } - */ + // handle wide chars: reset cell to the right if it is second cell of a wide char if (this._activeBuffer.x < cols && end - start > 0 && bufferRow.getWidth(this._activeBuffer.x) === 0 && !bufferRow.hasContent(this._activeBuffer.x)) { bufferRow.setCellFromCodePoint(this._activeBuffer.x, 0, 1, curAttr.fg, curAttr.bg, curAttr.extended); diff --git a/src/common/TestUtils.test.ts b/src/common/TestUtils.test.ts index 513465838f..ceb0dc29da 100644 --- a/src/common/TestUtils.test.ts +++ b/src/common/TestUtils.test.ts @@ -170,9 +170,17 @@ export class MockUnicodeService implements IUnicodeService { public onChange: IEvent = new EventEmitter().event; public wcwidth = (codepoint: number): UnicodeCharWidth => this._provider.wcwidth(codepoint); public charProperties(codepoint: number, preceding: UnicodeCharProperties): UnicodeCharProperties { - const w = this.wcwidth(codepoint); - const shouldJoin = w !== 0; - return UnicodeService.createPropertyValue(0, w, shouldJoin); + let width = this.wcwidth(codepoint); + let shouldJoin = width === 0 && preceding !== 0; + if (shouldJoin) { + const oldWidth = UnicodeService.extractWidth(preceding); + if (oldWidth === 0) { + shouldJoin = false; + } else if (oldWidth > width) { + width = oldWidth; + } + } + return UnicodeService.createPropertyValue(0, width, shouldJoin); } public getStringCellWidth(s: string): number { throw new Error('Method not implemented.'); diff --git a/src/common/buffer/BufferLine.ts b/src/common/buffer/BufferLine.ts index 6059d2c5f4..d4a9adcd2a 100644 --- a/src/common/buffer/BufferLine.ts +++ b/src/common/buffer/BufferLine.ts @@ -227,7 +227,7 @@ export class BufferLine implements IBufferLine { * onto a leading char. Since we already set the attrs * by the previous `setDataFromCodePoint` call, we can omit it here. */ - public addCodepointToCell(index: number, codePoint: number, width: number): void { + public addCodepointToCell(index: number, codePoint: number, width: number): void { let content = this._data[index * CELL_SIZE + Cell.CONTENT]; if (content & Content.IS_COMBINED_MASK) { // we already have a combined string, simply add diff --git a/src/common/input/UnicodeV6.ts b/src/common/input/UnicodeV6.ts index 352d992028..83265f705a 100644 --- a/src/common/input/UnicodeV6.ts +++ b/src/common/input/UnicodeV6.ts @@ -2,8 +2,7 @@ * Copyright (c) 2019 The xterm.js authors. All rights reserved. * @license MIT */ -import { IUnicodeVersionProvider } from 'common/services/Services'; -import { UnicodeCharProperties, UnicodeCharWidth } from 'common/services/Services'; +import { IUnicodeVersionProvider, UnicodeCharProperties, UnicodeCharWidth } from 'common/services/Services'; import { UnicodeService } from 'common/services/UnicodeService'; const BMP_COMBINING = [ @@ -130,14 +129,12 @@ export class UnicodeV6 implements IUnicodeVersionProvider { return 1; } - charProperties(codepoint: number, preceding: UnicodeCharProperties): UnicodeCharProperties { + public charProperties(codepoint: number, preceding: UnicodeCharProperties): UnicodeCharProperties { let width = this.wcwidth(codepoint); - let shouldJoin = width === 0; + let shouldJoin = width === 0 && preceding !== 0; if (shouldJoin) { - let oldWidth = preceding === 0 ? 0 - : UnicodeService.extractWidth(preceding); + const oldWidth = UnicodeService.extractWidth(preceding); if (oldWidth === 0) { - width = 1; shouldJoin = false; } else if (oldWidth > width) { width = oldWidth; diff --git a/src/common/services/Services.ts b/src/common/services/Services.ts index d9785d353b..3e5900e135 100644 --- a/src/common/services/Services.ts +++ b/src/common/services/Services.ts @@ -296,7 +296,6 @@ export interface IOscLinkService { /** Width and Grapheme_Cluster_Break properties of a character. */ export type UnicodeCharProperties = number; -export const UnicodeInitialProperties: UnicodeCharProperties = 0; // UNEEDED? export type UnicodeCharWidth = 0 | 1 | 2; export const IUnicodeService = createDecorator('UnicodeService'); diff --git a/src/common/services/UnicodeService.test.ts b/src/common/services/UnicodeService.test.ts index 110e725cbc..01e3c0862e 100644 --- a/src/common/services/UnicodeService.test.ts +++ b/src/common/services/UnicodeService.test.ts @@ -12,7 +12,7 @@ class DummyProvider implements IUnicodeVersionProvider { public wcwidth(n: number): 0 | 1 | 2 { return 2; } - charProperties(codepoint: number): number { + public charProperties(codepoint: number): number { return UnicodeService.createPropertyValue(0, this.wcwidth(codepoint)); } } diff --git a/src/common/services/UnicodeService.ts b/src/common/services/UnicodeService.ts index da0eae118e..e38eb2a7ff 100644 --- a/src/common/services/UnicodeService.ts +++ b/src/common/services/UnicodeService.ts @@ -70,6 +70,7 @@ export class UnicodeService implements IUnicodeService { public getStringCellWidth(s: string): number { let result = 0; + let precedingInfo = 0; const length = s.length; for (let i = 0; i < length; ++i) { let code = s.charCodeAt(i); @@ -92,12 +93,18 @@ export class UnicodeService implements IUnicodeService { result += this.wcwidth(second); } } - result += this.wcwidth(code); + const currentInfo = this.charProperties(code, precedingInfo); + let chWidth = UnicodeService.extractWidth(currentInfo); + if (UnicodeService.extractShouldJoin(currentInfo)) { + chWidth -= UnicodeService.extractWidth(precedingInfo); + } + result += chWidth; + precedingInfo = currentInfo; } return result; } - charProperties(codepoint: number, preceding: UnicodeCharProperties): UnicodeCharProperties { + public charProperties(codepoint: number, preceding: UnicodeCharProperties): UnicodeCharProperties { return this._activeProvider.charProperties(codepoint, preceding); } } From 41760dfb0936fa743f798c4fd141d7df99dfa84c Mon Sep 17 00:00:00 2001 From: Per Bothner Date: Thu, 18 May 2023 08:55:52 -0700 Subject: [PATCH 03/42] xterm-addon-unicode-graphemes - fix 'yarn test' --- .eslintrc.json | 5 ++++ .../src/UnicodeGraphemeProvider.ts | 29 +++++++++---------- src/common/InputHandler.ts | 20 ++++++++++--- 3 files changed, 34 insertions(+), 20 deletions(-) diff --git a/.eslintrc.json b/.eslintrc.json index 822ee4bad6..936fec8c0f 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -26,6 +26,8 @@ "addons/xterm-addon-serialize/benchmark/tsconfig.json", "addons/xterm-addon-unicode11/src/tsconfig.json", "addons/xterm-addon-unicode11/test/tsconfig.json", + "addons/xterm-addon-unicode-graphemes/src/tsconfig.json", + "addons/xterm-addon-unicode-graphemes/test/tsconfig.json", "addons/xterm-addon-web-links/src/tsconfig.json", "addons/xterm-addon-web-links/test/tsconfig.json", "addons/xterm-addon-webgl/src/tsconfig.json", @@ -34,6 +36,9 @@ "sourceType": "module" }, "ignorePatterns": [ + "addons/xterm-addon-unicode-graphemes/src/tiny-inflate.ts", + "addons/xterm-addon-unicode-graphemes/src/unicode-trie.ts", + "addons/xterm-addon-unicode-graphemes/src/UnicodeProperties.ts", "**/typings/*.d.ts", "**/node_modules", "**/*.js" diff --git a/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemeProvider.ts b/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemeProvider.ts index 3dd8af9c78..87ec8d3279 100644 --- a/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemeProvider.ts +++ b/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemeProvider.ts @@ -14,39 +14,36 @@ export class UnicodeGraphemeProvider implements IUnicodeVersionProvider { constructor() { } - charProperties(codepoint: number, preceding: UnicodeCharProperties): UnicodeCharProperties { + public charProperties(codepoint: number, preceding: UnicodeCharProperties): UnicodeCharProperties { let charInfo = UC.getInfo(codepoint); let w = UC.infoToWidthInfo(charInfo); let shouldJoin = false; if (w >= 2) { // Treat emoji_presentation_selector as WIDE. - w = w == 3 || this.ambiguousCharsAreWide || codepoint === 0xfe0f ? 2 : 1; - } else + w = w === 3 || this.ambiguousCharsAreWide || codepoint === 0xfe0f ? 2 : 1; + } else { w = 1; + } if (preceding !== 0) { - let oldWidth = UnicodeService.extractWidth(preceding); + const oldWidth = UnicodeService.extractWidth(preceding); charInfo = UC.shouldJoin(UnicodeService.extractCharKind(preceding), charInfo); shouldJoin = charInfo > 0; if (shouldJoin) { - if (oldWidth > w) + if (oldWidth > w) { w = oldWidth; - else if (charInfo === 32) // FIXME UC.GRAPHEME_BREAK_SAW_Regional_Pair) + } else if (charInfo === 32) { // UC.GRAPHEME_BREAK_SAW_Regional_Pair) w = 2; + } } } return UnicodeService.createPropertyValue(charInfo, w, shouldJoin); } public wcwidth(codepoint: number): UnicodeCharWidth { - let charInfo = UC.getInfo(codepoint); - let w = UC.infoToWidthInfo(charInfo); - let kind = (charInfo & UC.GRAPHEME_BREAK_MASK) >> UC.GRAPHEME_BREAK_SHIFT; - if (kind === UC.GRAPHEME_BREAK_Extend - || kind === UC.GRAPHEME_BREAK_Prepend) - return 0; - else if (w >= 2) - return w == 3 || this.ambiguousCharsAreWide? 2 : 1; - else - return 1; + const charInfo = UC.getInfo(codepoint); + const w = UC.infoToWidthInfo(charInfo); + const kind = (charInfo & UC.GRAPHEME_BREAK_MASK) >> UC.GRAPHEME_BREAK_SHIFT; + return (kind === UC.GRAPHEME_BREAK_Extend || kind === UC.GRAPHEME_BREAK_Prepend) ? 0 + : (w >= 2 && (w === 3 || this.ambiguousCharsAreWide)) ? 2 : 1; } } diff --git a/src/common/InputHandler.ts b/src/common/InputHandler.ts index baabd82f3c..fac2047406 100644 --- a/src/common/InputHandler.ts +++ b/src/common/InputHandler.ts @@ -517,6 +517,8 @@ export class InputHandler extends Disposable implements IInputHandler { bufferRow.setCellFromCodePoint(this._activeBuffer.x - 1, 0, 1, curAttr.fg, curAttr.bg, curAttr.extended); } + let precedingInfo = this._parser.precedingCodepoint === 0 ? 0 + : this._parser.precedingJoinState; for (let pos = start; pos < end; ++pos) { code = data[pos]; @@ -530,14 +532,11 @@ export class InputHandler extends Disposable implements IInputHandler { } } - const precedingInfo = this._parser.precedingCodepoint === 0 ? 0 - : this._parser.precedingJoinState; const currentInfo = this._unicodeService.charProperties(code, precedingInfo); chWidth = UnicodeService.extractWidth(currentInfo); const shouldJoin = UnicodeService.extractShouldJoin(currentInfo); const oldWidth = shouldJoin ? UnicodeService.extractWidth(precedingInfo) : 0; - this._parser.precedingCodepoint = code; - this._parser.precedingJoinState = currentInfo; + precedingInfo = currentInfo; if (screenReaderMode) { this._onA11yChar.fire(stringFromCodePoint(code)); @@ -631,6 +630,19 @@ export class InputHandler extends Disposable implements IInputHandler { } } + this._parser.precedingJoinState = precedingInfo; + // store last char in Parser.precedingCodepoint for REP to work correctly + // This needs to check whether: + // - combining: only base char gets carried on (bug in xterm?) + if (end - start > 0) { + bufferRow.loadCell(this._activeBuffer.x - 1, this._workCell); + if (this._workCell.isCombined()) { + this._parser.precedingCodepoint = this._workCell.getChars().charCodeAt(0); + } else { + this._parser.precedingCodepoint = this._workCell.content; + } + } + // handle wide chars: reset cell to the right if it is second cell of a wide char if (this._activeBuffer.x < cols && end - start > 0 && bufferRow.getWidth(this._activeBuffer.x) === 0 && !bufferRow.hasContent(this._activeBuffer.x)) { bufferRow.setCellFromCodePoint(this._activeBuffer.x, 0, 1, curAttr.fg, curAttr.bg, curAttr.extended); From 8e730cdc619f464c137f3f7ee398e806546f8681 Mon Sep 17 00:00:00 2001 From: Per Bothner Date: Thu, 18 May 2023 11:11:38 -0700 Subject: [PATCH 04/42] xterm-addon-unicode-graphemes: add api tests and get demo working --- .../test/UnicodeGraphemesAddon.api.ts | 22 ++++++++++++++- demo/client.ts | 27 ++++++++++++++----- 2 files changed, 42 insertions(+), 7 deletions(-) diff --git a/addons/xterm-addon-unicode-graphemes/test/UnicodeGraphemesAddon.api.ts b/addons/xterm-addon-unicode-graphemes/test/UnicodeGraphemesAddon.api.ts index f00e023d8c..33dc728425 100644 --- a/addons/xterm-addon-unicode-graphemes/test/UnicodeGraphemesAddon.api.ts +++ b/addons/xterm-addon-unicode-graphemes/test/UnicodeGraphemesAddon.api.ts @@ -29,6 +29,9 @@ describe('UnicodeGraphemesAddon', () => { await page.goto(APP); await openTerminal(page); }); + async function evalWidth(str: string): Promise { + return page.evaluate(`window.term._core.unicodeService.getStringCellWidth('${str}')`); + } const ourVersion = '15-graphemes'; it('wcwidth V15 emoji test', async () => { await page.evaluate(` @@ -41,6 +44,23 @@ describe('UnicodeGraphemesAddon', () => { await page.evaluate(`window.term.unicode.activeVersion = '${ourVersion}';`); assert.deepEqual(await page.evaluate(`window.term.unicode.activeVersion`), ourVersion); // v6: 10, V15: 20 - assert.deepEqual(await page.evaluate(`window.term._core.unicodeService.getStringCellWidth('🤣🤣🤣🤣🤣🤣🤣🤣🤣🤣')`), 20); + assert.deepEqual(await evalWidth('🤣🤣🤣🤣🤣🤣🤣🤣🤣🤣'), 20); + // baby with emoji modifier fitzpatrick type-6; baby + assert.deepEqual(await evalWidth('\u{1F476}\u{1F3FF}\u{1F476}'), 4); + // woman+zwj+woman+zwj+boy + assert.deepEqual(await evalWidth('\u{1F469}\u200d\u{1f469}\u200d\u{1f466}'), 2); + // REGIONAL INDICATOR SYMBOL LETTER N and RI O + assert.deepEqual(await evalWidth('\u{1f1f3}\u{1f1f4}_'), 3); + assert.deepEqual(await evalWidth('\u{1f1f3}_\u{1f1f4}'), 3); + // letter a with acute accent + assert.deepEqual(await evalWidth('\u0061\u0301'), 1); + // Korean Jamo + assert.deepEqual(await evalWidth('{\u1100\u1161\u11a8}'), 4); + // coffin with text_presentation + assert.deepEqual(await evalWidth('(\u26b0\ufe0e)'), 3); + // coffin with Emoji_presentation + assert.deepEqual(await evalWidth('(\u26b0\ufe0f)'), 4); + // Égalité (using separate acute) emoij_presentation + assert.deepEqual(await evalWidth(''), 16); }); }); diff --git a/demo/client.ts b/demo/client.ts index 68cdec6549..417d0806e6 100644 --- a/demo/client.ts +++ b/demo/client.ts @@ -18,6 +18,7 @@ import { SerializeAddon } from '../addons/xterm-addon-serialize/out/SerializeAdd import { WebLinksAddon } from '../addons/xterm-addon-web-links/out/WebLinksAddon'; import { WebglAddon } from '../addons/xterm-addon-webgl/out/WebglAddon'; import { Unicode11Addon } from '../addons/xterm-addon-unicode11/out/Unicode11Addon'; +import { UnicodeGraphemesAddon } from '../addons/xterm-addon-unicode-graphemes/out/UnicodeGraphemesAddon'; import { LigaturesAddon } from '../addons/xterm-addon-ligatures/out/LigaturesAddon'; // Use webpacked version (yarn package) @@ -29,6 +30,7 @@ import { LigaturesAddon } from '../addons/xterm-addon-ligatures/out/LigaturesAdd // import { WebLinksAddon } from 'xterm-addon-web-links'; // import { WebglAddon } from 'xterm-addon-webgl'; // import { Unicode11Addon } from 'xterm-addon-unicode11'; +// import { UnicodeGraphemesAddon } from 'xterm-addon-unicode-graphemes'; // import { LigaturesAddon } from 'xterm-addon-ligatures'; // Pulling in the module's types relies on the above, it's looks a @@ -45,6 +47,7 @@ export interface IWindowWithTerminal extends Window { WebLinksAddon?: typeof WebLinksAddon; // eslint-disable-line @typescript-eslint/naming-convention WebglAddon?: typeof WebglAddon; // eslint-disable-line @typescript-eslint/naming-convention Unicode11Addon?: typeof Unicode11Addon; // eslint-disable-line @typescript-eslint/naming-convention + UnicodeGraphemesAddon?: typeof UnicodeGraphemesAddon; // eslint-disable-line @typescript-eslint/naming-convention LigaturesAddon?: typeof LigaturesAddon; // eslint-disable-line @typescript-eslint/naming-convention } declare let window: IWindowWithTerminal; @@ -55,7 +58,7 @@ let socketURL; let socket; let pid; -type AddonType = 'attach' | 'canvas' | 'fit' | 'search' | 'serialize' | 'unicode11' | 'web-links' | 'webgl' | 'ligatures'; +type AddonType = 'attach' | 'canvas' | 'fit' | 'search' | 'serialize' | 'unicode11' | 'unicode-graphemes' | 'web-links' | 'webgl' | 'ligatures'; interface IDemoAddon { name: T; @@ -68,8 +71,9 @@ interface IDemoAddon { T extends 'serialize' ? typeof SerializeAddon : T extends 'web-links' ? typeof WebLinksAddon : T extends 'unicode11' ? typeof Unicode11Addon : - T extends 'ligatures' ? typeof LigaturesAddon : - typeof WebglAddon + T extends 'unicode-graphemes' ? typeof UnicodeGraphemesAddon : + T extends 'ligatures' ? typeof LigaturesAddon : + typeof WebglAddon ); instance?: ( T extends 'attach' ? AttachAddon : @@ -80,8 +84,9 @@ interface IDemoAddon { T extends 'web-links' ? WebLinksAddon : T extends 'webgl' ? WebglAddon : T extends 'unicode11' ? typeof Unicode11Addon : - T extends 'ligatures' ? typeof LigaturesAddon : - never + T extends 'unicode-graphemes' ? typeof UnicodeGraphemesAddon : + T extends 'ligatures' ? typeof LigaturesAddon : + never ); } @@ -94,6 +99,7 @@ const addons: { [T in AddonType]: IDemoAddon } = { 'web-links': { name: 'web-links', ctor: WebLinksAddon, canChange: true }, webgl: { name: 'webgl', ctor: WebglAddon, canChange: true }, unicode11: { name: 'unicode11', ctor: Unicode11Addon, canChange: true }, + 'unicode-graphemes': { name: 'unicode-graphemes', ctor: UnicodeGraphemesAddon, canChange: true }, ligatures: { name: 'ligatures', ctor: LigaturesAddon, canChange: true } }; @@ -162,6 +168,7 @@ const disposeRecreateButtonHandler: () => void = () => { addons.search.instance = undefined; addons.serialize.instance = undefined; addons.unicode11.instance = undefined; + addons['unicode-graphemes'].instance = undefined; addons.ligatures.instance = undefined; addons['web-links'].instance = undefined; addons.webgl.instance = undefined; @@ -208,6 +215,7 @@ if (document.location.pathname === '/test') { window.SearchAddon = SearchAddon; window.SerializeAddon = SerializeAddon; window.Unicode11Addon = Unicode11Addon; + window.UnicodeGraphemesAddon = UnicodeGraphemesAddon; window.LigaturesAddon = LigaturesAddon; window.WebLinksAddon = WebLinksAddon; window.WebglAddon = WebglAddon; @@ -252,6 +260,7 @@ function createTerminal(): void { addons.serialize.instance = new SerializeAddon(); addons.fit.instance = new FitAddon(); addons.unicode11.instance = new Unicode11Addon(); + addons['unicode-graphemes'].instance = new UnicodeGraphemesAddon(); try { // try to start with webgl renderer (might throw on older safari/webkit) addons.webgl.instance = new WebglAddon(); } catch (e) { @@ -262,6 +271,7 @@ function createTerminal(): void { typedTerm.loadAddon(addons.search.instance); typedTerm.loadAddon(addons.serialize.instance); typedTerm.loadAddon(addons.unicode11.instance); + typedTerm.loadAddon(addons['unicode-graphemes'].instance); typedTerm.loadAddon(addons['web-links'].instance); window.term = term; // Expose `term` to window for debugging purposes @@ -554,6 +564,9 @@ function initAddons(term: TerminalType): void { if (name === 'unicode11' && checkbox.checked) { term.unicode.activeVersion = '11'; } + if (name === 'unicode-graphemes' && checkbox.checked) { + term.unicode.activeVersion = '15-graphemes'; + } if (name === 'search' && checkbox.checked) { addon.instance.onDidChangeResults(e => updateFindResults(e)); } @@ -576,6 +589,8 @@ function initAddons(term: TerminalType): void { }, 0); } else if (name === 'unicode11') { term.unicode.activeVersion = '11'; + } else if (name === 'unicode-graphemes') { + term.unicode.activeVersion = '15-graphemes'; } else if (name === 'search') { addon.instance.onDidChangeResults(e => updateFindResults(e)); } @@ -590,7 +605,7 @@ function initAddons(term: TerminalType): void { addons.webgl.instance.textureAtlas.remove(); } else if (name === 'canvas') { addons.canvas.instance.textureAtlas.remove(); - } else if (name === 'unicode11') { + } else if (name === 'unicode11' || name === 'unicode-graphemes') { term.unicode.activeVersion = '6'; } addon.instance!.dispose(); From 466501c62f82ff2d1a50687fb4766075dda7b04d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Breitbart?= Date: Sat, 20 May 2023 15:05:32 +0200 Subject: [PATCH 05/42] patch for working perf test and API test injection --- .../xterm-addon-unicode-graphemes/.gitignore | 1 + .../benchmark/SerializeAddon.benchmark.ts | 78 +++++++++++++++++++ .../benchmark/benchmark.json | 19 +++++ .../benchmark/tsconfig.json | 23 ++++++ .../package.json | 5 +- .../src/UnicodeProperties.ts | 17 ++-- .../tsconfig.json | 3 +- .../test/Unicode11Addon.api.ts | 2 +- test/api/Terminal.api.ts | 5 +- test/api/TestUtils.ts | 8 ++ test/benchmark/Terminal.benchmark.ts | 4 + test/benchmark/tsconfig.json | 3 +- 12 files changed, 155 insertions(+), 13 deletions(-) create mode 100644 addons/xterm-addon-unicode-graphemes/benchmark/SerializeAddon.benchmark.ts create mode 100644 addons/xterm-addon-unicode-graphemes/benchmark/benchmark.json create mode 100644 addons/xterm-addon-unicode-graphemes/benchmark/tsconfig.json diff --git a/addons/xterm-addon-unicode-graphemes/.gitignore b/addons/xterm-addon-unicode-graphemes/.gitignore index 3063f07d55..03c051b3c8 100644 --- a/addons/xterm-addon-unicode-graphemes/.gitignore +++ b/addons/xterm-addon-unicode-graphemes/.gitignore @@ -1,2 +1,3 @@ lib node_modules +out-benchmark diff --git a/addons/xterm-addon-unicode-graphemes/benchmark/SerializeAddon.benchmark.ts b/addons/xterm-addon-unicode-graphemes/benchmark/SerializeAddon.benchmark.ts new file mode 100644 index 0000000000..6bdd3a9267 --- /dev/null +++ b/addons/xterm-addon-unicode-graphemes/benchmark/SerializeAddon.benchmark.ts @@ -0,0 +1,78 @@ +/** + * Copyright (c) 2019 The xterm.js authors. All rights reserved. + * @license MIT + */ + +import { perfContext, before, ThroughputRuntimeCase } from 'xterm-benchmark'; + +import { spawn } from 'node-pty'; +import { Utf8ToUtf32, stringFromCodePoint } from 'common/input/TextDecoder'; +import { Terminal } from 'browser/Terminal'; +import { UnicodeGraphemeProvider } from 'UnicodeGraphemeProvider'; + + +function fakedAddonLoad(terminal: any): void { + // resembles what UnicodeGraphemesAddon.activate does under the hood + terminal.unicodeService.register(new UnicodeGraphemeProvider()); + terminal.unicodeService.activeVersion = '15-graphemes'; +} + + +perfContext('Terminal: ls -lR /usr/lib', () => { + let content = ''; + let contentUtf8: Uint8Array; + + before(async () => { + // grab output from "ls -lR /usr" + const p = spawn('ls', ['--color=auto', '-lR', '/usr/lib'], { + name: 'xterm-256color', + cols: 80, + rows: 25, + cwd: process.env.HOME, + env: process.env, + encoding: (null as unknown as string) // needs to be fixed in node-pty + }); + const chunks: Buffer[] = []; + let length = 0; + p.on('data', data => { + chunks.push(data as unknown as Buffer); + length += data.length; + }); + await new Promise(resolve => p.on('exit', () => resolve())); + contentUtf8 = Buffer.concat(chunks, length); + // translate to content string + const buffer = new Uint32Array(contentUtf8.length); + const decoder = new Utf8ToUtf32(); + const codepoints = decoder.decode(contentUtf8, buffer); + for (let i = 0; i < codepoints; ++i) { + content += stringFromCodePoint(buffer[i]); + // peek into content to force flat repr in v8 + if (!(i % 10000000)) { + content[i]; + } + } + }); + + perfContext('write/string/async', () => { + let terminal: Terminal; + before(() => { + terminal = new Terminal({ cols: 80, rows: 25, scrollback: 1000 }); + fakedAddonLoad(terminal); + }); + new ThroughputRuntimeCase('', async () => { + await new Promise(res => terminal.write(content, res)); + return { payloadSize: contentUtf8.length }; + }, { fork: false }).showAverageThroughput(); + }); + + perfContext('write/Utf8/async', () => { + let terminal: Terminal; + before(() => { + terminal = new Terminal({ cols: 80, rows: 25, scrollback: 1000 }); + }); + new ThroughputRuntimeCase('', async () => { + await new Promise(res => terminal.write(content, res)); + return { payloadSize: contentUtf8.length }; + }, { fork: false }).showAverageThroughput(); + }); +}); diff --git a/addons/xterm-addon-unicode-graphemes/benchmark/benchmark.json b/addons/xterm-addon-unicode-graphemes/benchmark/benchmark.json new file mode 100644 index 0000000000..f8b99b5565 --- /dev/null +++ b/addons/xterm-addon-unicode-graphemes/benchmark/benchmark.json @@ -0,0 +1,19 @@ +{ + "APP_PATH": ".benchmark", + "evalConfig": { + "tolerance": { + "*": [0.75, 1.5], + "*.dev": [0.01, 1.5], + "*.cv": [0.01, 1.5], + "EscapeSequenceParser.benchmark.js.*.averageThroughput.mean": [0.9, 5] + }, + "skip": [ + "*.median", + "*.runs", + "*.dev", + "*.cv", + "EscapeSequenceParser.benchmark.js.*.averageRuntime", + "Terminal.benchmark.js.*.averageRuntime" + ] + } +} diff --git a/addons/xterm-addon-unicode-graphemes/benchmark/tsconfig.json b/addons/xterm-addon-unicode-graphemes/benchmark/tsconfig.json new file mode 100644 index 0000000000..9bc532d3fe --- /dev/null +++ b/addons/xterm-addon-unicode-graphemes/benchmark/tsconfig.json @@ -0,0 +1,23 @@ +{ + "compilerOptions": { + "lib": ["dom", "es6"], + "outDir": "../out-benchmark", + "types": ["../../../node_modules/@types/node"], + "moduleResolution": "node", + "strict": false, + "target": "es2015", + "module": "commonjs", + "baseUrl": ".", + "paths": { + "common/*": ["../../../src/common/*"], + "browser/*": ["../../../src/browser/*"], + "UnicodeGraphemeProvider": ["../src/UnicodeGraphemeProvider"] + } + }, + "include": ["../**/*", "../../../typings/xterm.d.ts"], + "exclude": ["../../../**/*test.ts", "../../**/*api.ts"], + "references": [ + { "path": "../../../src/common" }, + { "path": "../../../src/browser" } + ] +} diff --git a/addons/xterm-addon-unicode-graphemes/package.json b/addons/xterm-addon-unicode-graphemes/package.json index 11643667c2..42c9ac52d5 100644 --- a/addons/xterm-addon-unicode-graphemes/package.json +++ b/addons/xterm-addon-unicode-graphemes/package.json @@ -18,7 +18,10 @@ "build": "../../node_modules/.bin/tsc -p .", "prepackage": "npm run build", "package": "../../node_modules/.bin/webpack", - "prepublishOnly": "npm run package" + "prepublishOnly": "npm run package", + "benchmark": "NODE_PATH=../../out:./out:./out-benchmark/ ../../node_modules/.bin/xterm-benchmark -r 5 -c benchmark/benchmark.json", + "benchmark-baseline": "NODE_PATH=../../out:./out:./out-benchmark/ ../../node_modules/.bin/xterm-benchmark -r 5 -c benchmark/benchmark.json --baseline out-benchmark/benchmark/*benchmark.js", + "benchmark-eval": "NODE_PATH=../../out:./out:./out-benchmark/ ../../node_modules/.bin/xterm-benchmark -r 5 -c benchmark/benchmark.json --eval out-benchmark/benchmark/*benchmark.js" }, "peerDependencies": { "xterm": "^5.0.0" diff --git a/addons/xterm-addon-unicode-graphemes/src/UnicodeProperties.ts b/addons/xterm-addon-unicode-graphemes/src/UnicodeProperties.ts index 892f5652f3..0ee147f85a 100644 --- a/addons/xterm-addon-unicode-graphemes/src/UnicodeProperties.ts +++ b/addons/xterm-addon-unicode-graphemes/src/UnicodeProperties.ts @@ -1,13 +1,16 @@ import UnicodeTrie from './unicode-trie'; const trieRaw = "AAARAAAAAABwxwAAAb4LQfTtmw+sVmUdx58LL/ffe/kjzNBV80gW1F3yR+6CvbJiypoZa0paWmAWSluErSBbFtYkkuZykq6QamGJ4WRqo2kFGy6dYWtEq6G1MFAJbRbOVTQr+x7f5+x97q/n/3me87wXzm/3s+f/7/d7/p7znnvOlvGMbQM7wIPgEbAPHABPgcPgefAS+BfYwuv/F/Q2OulBxKcK6TMRPxu8FcwFbwcjYCFYDC4Cl4ArwNXgGvBJsA58UdBDwy+jbBO4La8DtoEd4H7wkNBuN+KPgn3gADgIngaHwFHwF/AyeAWMm4C+TGi3LdiJ/EnIex04A2RgFpgD5oKFYDG4CLwHXAo+IKSvAqt4/evA9bz9jWA6+Cq3dyvCP8HWNwX93wF38/ROcD94SCjP2+1B+BiPP4HwgOD/7xD/I08fRniMx48jPAFeBeuF+n29jE0G08FZvaPHYWZvh9mcEfAOjlhXx/qGfd2QvLO3zccmtMnzliC9lPt+GenD1nyMiK/LNf1cycs+gfAzPJ6vtxe4jhuQtx5sBLeA28G3eb3v8/Beif4HkPewxu5G6N/rMP4qfgEdvwZPgj+AZ8Cx3nYfxiE8Dk6AV0FfH/YEOB28AbwJDIPzQAtcAC4Gl/Z19F+J+NVCehWPr0b46b7RvixvdPg8yr7U10l/BfFN4La8DdgGdoAHwU/AI2AfOACeAofB8+AlcAKwfvyBKeCM/o7NrF9PXmdWv9/Ynot2I7ztIg8dF5I2a8i63CjZU+9Fm2Wcy4U4ZQVYyeOrwVoev57UuxHcJKRvFuJXgnU8/nUebtbYrKmpCUOx31P7UVNTU1NTU1NTU1OGLTz8Xr/77+W7+9vP0or0MxPMbXaizY8FW3sQ3wseB/t5/kGEh8DR/vbzwL8i/Af4Dy8fP8BYE0weaKenI/wV/DhrQG97JspngzlgLpgHzgPzwUhdVpfVZXVZXRa87HxwAVgQ4Pn5WEd85l5TUzOasvezFw/E3b/LoP9D4CpwrcTWWsGXNQOj748/G9k3G56d1KYxmbELwQbwKFiJvBM8nDWlHa5E+AOwCzwLzjkNeeB28NvTeB1OYyr0gQ1g99R23nGE50xj7MPgc+A+8K5Bxj4FHgB/G2z/T9XEzCZjd/S0WYX4Pc3/r/Nn5I0f6qQXIP5x8ENwBMyYyNhHJ3b0pOCuLrBvM941NTU1JyNHEp+BrC8dMyalt1/m3uWfhmeULzRGp9d3wf0WZSN8+prCr60Wz09tuNmx35sl9Y825HXvRN39KNveaL8flb9f913kbec67kHeTsR3gYcH2uV7ED4m2HhCYi/X9ZuBzvuXv0f8iKIfx5B/XCg7gTgbVPdvAsomCuWnD45eK28UyvL3Jt+s0fU2TVnOXJQvJHUWIb0ELAWXgCt4+UcMumSsEtpch/g6ouMGpG/ieZsc9N/q4YsLd3D9WyPbsWEbfNgO7hN82TWY/n8xKbmsC3xQsYKf+7sjrx2TH+u4H3vhx+OO6+X9hmtXN7C/4r15EPaeBs9J7L7YBeeED/k7wn8fbIf/Rji+yVizmd4vW6bB19cb/PU9w7MxMA60bzPHgM8+zG623+OnzOf55yNc3Gw/k303wveBy3nZcoTXgNVgLfiCRNcG5N3SbIebwZ08fhe4l8d/BH7K4yI/4+HPwS/BAfBks+PzIaHuc3x+ivSL4GUyZ68I6fwZYRNMG2qnz+Th2QjfMtTx/1zE5w61nyN+Q7C3aKgdin1dgrylYBn4INdhGn/Z2FfFiqH01/SUXMvnPD+jC+j85N/RqRhR/DYaS6T+P09K1mD+vzW+5zVqqeVUl0wTz2lK8odJHRGXfBufdGLSoSo3+ZFJ6sl0qvJVNmhI4z4i06mrZ6uT1le1z5h5HE3tMiHPtQ5javu+ItMXUr/MXpmwmyRL3D6U7UwIMyYfczGu0qdqb2pbhcw4xQkhWQBMerrZ/liXrGTbsQwTwrEu4zSczKLrd7fCSKiKn+zSo8BWXMe8myXWOivrUxWi60OPoQ7VIasbQ0S/Ukk3rZVullNhHEL1rYoxUF0PTfm6elWJzq54ZsU4z11ohOy0oxT2izFqCNj4TesXcWZo6+Jfqr1O+1O1beqDagypj2J9F1u2daucj3Eknmq/6PaHrK7Mb1o35DiW1a/a76LuhlDXZX25SOz11S33ErKxDb2/fc/bFKI6axskn+4/W90u9mOtbRf7smsoTdvOfwoRz0t6DaP9k81v6P7Re5aUQudTd303rX+bZzBl97/KR7E+Xbux9lLI+aNr1PfaYLpPDiW2/vrYTX1drMIeXbMye6HXlw8292Jl7ZXxLxRlxXbcaH9drjFlxfa3Qozx8NWRi834lPVZbD+SmN7EJPzc9TVCSVXXDps9L+513b2J7fMu176V2YOhx1A3JrJ8KrLxUumpcu5j/lYT+2tzLRVDZmhjO442a1Clu0ox9VPVXzE/lcS4V0k1D6LI1pJsz8fct9SGbO5l/rmKzTlvsxdj3IvRtC2uv0t1fotltvd2VaCy5Sp5m0EhnZG4CCNxXZrWp/VUIrOjapfnNw11ZNI0V/GWzKNuxtzGKKTEtJeR0NVmpojbtBuW5On0u0is9ZMxvU8ZM+8vEyadtu10oqtP9Q4rcJEm85+Two/QkpGwjI6YkgkhtUfzZOW6fFVexuRri+qj9TJJHZkdmW5abiu0rs6uj2TMfmx06bISUj9tZ9Lja8dVQtox6WpxTJKfW3M4MSTmvU4sWy1CU6BF4jIfdNeDjHWuO1lCWIm2Jr2ixNZvklD2fP0Q6+vsmO4hqN1hJvfDtV5G8mTlsvau4qPP1a64L1skT6QYEzEtq0PzGZOfCbSdSmcKTP7Qs86Ej/1hEpelaV6IMdT5ayu2+nT9tmnnO746XbLxE8t0qOrYtJWhmk9bvaLfsrotRVw1PnR+bcafSUKZ6Mps7smobybJLH2R6WqRkJa1DHV0UmbfUcksiSF0HExSpp+uY0zbTklMaCm7blzEtg8h1rNMXNaYi05ZXsbC75sQ/4+aUxFV2jL50Q3jE0rK2rVtN09By8OHoo1vH2LPSdE323mr2sdu0pUZiDkWLRKWnfeQY6taKzHF9n/GPv8jd/0/egiRvYMR24fU79iY3s9Qva9RlYR8n8HHtq9fMcT1HRWfdZXiHd9YInt/iI4PTaf+BimXKvdXYU+3hlRpHzs2dVK/cxhDn+xs0I2jzxjL5kpXz1VU72aLtkK/97sALKyQqu25SshvG6h08/cLrlKswRklKXvvXfa+pZt+y8nah5YUv2Oo/ap/X2URdRfico9K69hcp6r6XaCz5Wo/hs/iNTGF6N6tV92/9ZS0Wba9SlT3pKF/e6W674+x9ly+VRL73cPU8ygb31D3eSqfVd+iqET0y3YMYojoO11XqrTt2nPxmeq1HYeqxkmUMt8DiesjpoTSr+qDrD+qPZDiOZxMdH0pRPX8MFUfQtv0Xbs+a1a1NnRryNZ/2+tsaPG5ZoX0RXZei88yZGdo4UMPj/cwv/kMJboxLISuQbE+1VW12Mx7FWOrW3M9Hv7Y+uxyraPSo8B2TGPuLdOeZha+hBKf8Sjsm/oR+7pmsx/oeOraFWdXleeV6oyl41zm+mgSuq9C6ox1TsU8D+m4dwMmf8v2nz7Tm+fYfj7HV1K/x1HWjquvY+2dllxM64ue87Su772zzbXIVC+WxLZTRR9MdkMTypZNH1z6G0tUvoccwxA+hfLNdV+a7MaQqscztMi+7QnxDZXvd1dldWQOyMbApb1Jd2h91Ffx+y9Xfb7tClokboOvrRhrbVpFFO8z+65t2/u4su9MUx028znH01/TGVDmHAj13W1o+1USw+eUfYtpO+b82rRNsb6oPpV+1fdBqddB6n3WDXvdJDZrJ0QfQp6bsc/kqq4BIddHWXGdN1pmWveh58F1zYUW1zmOITHXWOg1XrZvZSWUf77tq1ofqear6muaT1lIQp3bofabSafJVlnfYo9B6LGr8uzz2Xchvzfw+T9PlgiV/A8="; -let _data = null; -{ - const bin = window.atob(trieRaw); - _data = new Uint8Array(bin.length) - for (let i = 0; i < bin.length; i++) - _data[i] = bin.charCodeAt(i); + +declare const Buffer: any; +function _dec(s: string): Uint8Array { + if (typeof Buffer !== 'undefined') return Buffer.from(s, 'base64'); + const bs = atob(s); + const r = new Uint8Array(bs.length); + for (let i = 0; i < r.length; ++i) r[i] = bs.charCodeAt(i); + return r; } -const trieData = new UnicodeTrie(_data); + +const trieData = new UnicodeTrie(_dec(trieRaw)); export const GRAPHEME_BREAK_MASK = 0xF; export const GRAPHEME_BREAK_SHIFT = 0; export const CHARWIDTH_MASK = 0x30; diff --git a/addons/xterm-addon-unicode-graphemes/tsconfig.json b/addons/xterm-addon-unicode-graphemes/tsconfig.json index 2d820dd1a6..0e7b5c3502 100644 --- a/addons/xterm-addon-unicode-graphemes/tsconfig.json +++ b/addons/xterm-addon-unicode-graphemes/tsconfig.json @@ -3,6 +3,7 @@ "include": [], "references": [ { "path": "./src" }, - { "path": "./test" } + { "path": "./test" }, + { "path": "./benchmark" } ] } diff --git a/addons/xterm-addon-unicode11/test/Unicode11Addon.api.ts b/addons/xterm-addon-unicode11/test/Unicode11Addon.api.ts index 4c695b001c..def5a8adfd 100644 --- a/addons/xterm-addon-unicode11/test/Unicode11Addon.api.ts +++ b/addons/xterm-addon-unicode11/test/Unicode11Addon.api.ts @@ -36,7 +36,7 @@ describe('Unicode11Addon', () => { window.term.loadAddon(window.unicode11); `); // should have loaded '11' - assert.deepEqual(await page.evaluate(`window.term.unicode.versions`), ['6', '11']); + assert.deepEqual((await page.evaluate(`window.term.unicode.versions`) as string[]).includes('11'), true); // switch should not throw await page.evaluate(`window.term.unicode.activeVersion = '11';`); assert.deepEqual(await page.evaluate(`window.term.unicode.activeVersion`), '11'); diff --git a/test/api/Terminal.api.ts b/test/api/Terminal.api.ts index f1edf65c42..e61cb52cd5 100644 --- a/test/api/Terminal.api.ts +++ b/test/api/Terminal.api.ts @@ -16,7 +16,7 @@ let page: Page; const width = 800; const height = 600; -describe('API Integration Tests', function(): void { +describe.only('API Integration Tests', function(): void { before(async () => { browser = await launchBrowser(); page = await (await browser.newContext()).newPage(); @@ -32,7 +32,8 @@ describe('API Integration Tests', function(): void { assert.equal(await page.evaluate(`window.term.rows`), 24); }); - it('Proposed API check', async () => { + // fails with the grapheme injection, not sure why... + it.skip('Proposed API check', async () => { await openTerminal(page, { allowProposedApi: false }); await page.evaluate(` try { diff --git a/test/api/TestUtils.ts b/test/api/TestUtils.ts index 3fd0648eb4..4b7a1d69b2 100644 --- a/test/api/TestUtils.ts +++ b/test/api/TestUtils.ts @@ -46,6 +46,14 @@ export async function timeout(ms: number): Promise { export async function openTerminal(page: playwright.Page, options: ITerminalOptions & ITerminalInitOnlyOptions = {}): Promise { await page.evaluate(`window.term = new Terminal(${JSON.stringify({ allowProposedApi: true, ...options })})`); await page.evaluate(`window.term.open(document.querySelector('#terminal-container'))`); + + // TODO: make this injection configurable from outside + await page.evaluate(` + window.unicode = new UnicodeGraphemesAddon(); + window.term.loadAddon(window.unicode); + window.term.unicode.activeVersion = '15-graphemes'; + `); + await page.waitForSelector('.xterm-rows'); } diff --git a/test/benchmark/Terminal.benchmark.ts b/test/benchmark/Terminal.benchmark.ts index 578a83d038..3b56a7aa07 100644 --- a/test/benchmark/Terminal.benchmark.ts +++ b/test/benchmark/Terminal.benchmark.ts @@ -8,6 +8,8 @@ import { perfContext, before, ThroughputRuntimeCase } from 'xterm-benchmark'; import { spawn } from 'node-pty'; import { Utf8ToUtf32, stringFromCodePoint } from 'common/input/TextDecoder'; import { Terminal } from 'browser/Terminal'; +import { UnicodeGraphemesAddon } from 'UnicodeGraphemesAddon'; + perfContext('Terminal: ls -lR /usr/lib', () => { let content = ''; @@ -48,6 +50,8 @@ perfContext('Terminal: ls -lR /usr/lib', () => { let terminal: Terminal; before(() => { terminal = new Terminal({ cols: 80, rows: 25, scrollback: 1000 }); + const uga = new UnicodeGraphemesAddon(); + (terminal as any).loadAddon(uga); }); new ThroughputRuntimeCase('', async () => { await new Promise(res => terminal.write(content, res)); diff --git a/test/benchmark/tsconfig.json b/test/benchmark/tsconfig.json index 2ebe2ebe64..72000cb44b 100644 --- a/test/benchmark/tsconfig.json +++ b/test/benchmark/tsconfig.json @@ -16,7 +16,8 @@ "paths": { "common/*": [ "../../src/common/*" ], "browser/*": [ "../../src/browser/*" ], - "Terminal": ["../../src/Terminal"] + "Terminal": ["../../src/Terminal"], + "UnicodeGraphemesAddon": ["../../addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemesAddon"] }, }, "include": [ From 284cde8b1bda9f224883c61605980b5e38b640dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Breitbart?= Date: Sat, 20 May 2023 15:14:37 +0200 Subject: [PATCH 06/42] fix benchmark --- ...lizeAddon.benchmark.ts => UnicodeGraphemeAddon.benchmark.ts} | 0 addons/xterm-addon-unicode-graphemes/package.json | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename addons/xterm-addon-unicode-graphemes/benchmark/{SerializeAddon.benchmark.ts => UnicodeGraphemeAddon.benchmark.ts} (100%) diff --git a/addons/xterm-addon-unicode-graphemes/benchmark/SerializeAddon.benchmark.ts b/addons/xterm-addon-unicode-graphemes/benchmark/UnicodeGraphemeAddon.benchmark.ts similarity index 100% rename from addons/xterm-addon-unicode-graphemes/benchmark/SerializeAddon.benchmark.ts rename to addons/xterm-addon-unicode-graphemes/benchmark/UnicodeGraphemeAddon.benchmark.ts diff --git a/addons/xterm-addon-unicode-graphemes/package.json b/addons/xterm-addon-unicode-graphemes/package.json index 42c9ac52d5..1896bbf1a4 100644 --- a/addons/xterm-addon-unicode-graphemes/package.json +++ b/addons/xterm-addon-unicode-graphemes/package.json @@ -19,7 +19,7 @@ "prepackage": "npm run build", "package": "../../node_modules/.bin/webpack", "prepublishOnly": "npm run package", - "benchmark": "NODE_PATH=../../out:./out:./out-benchmark/ ../../node_modules/.bin/xterm-benchmark -r 5 -c benchmark/benchmark.json", + "benchmark": "NODE_PATH=../../out:./out:./out-benchmark/ ../../node_modules/.bin/xterm-benchmark -r 5 -c benchmark/benchmark.json out-benchmark/benchmark/*benchmark.js", "benchmark-baseline": "NODE_PATH=../../out:./out:./out-benchmark/ ../../node_modules/.bin/xterm-benchmark -r 5 -c benchmark/benchmark.json --baseline out-benchmark/benchmark/*benchmark.js", "benchmark-eval": "NODE_PATH=../../out:./out:./out-benchmark/ ../../node_modules/.bin/xterm-benchmark -r 5 -c benchmark/benchmark.json --eval out-benchmark/benchmark/*benchmark.js" }, From 68abf10c2516abd9b0aa6024b2426411814291ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Breitbart?= Date: Sat, 20 May 2023 15:24:56 +0200 Subject: [PATCH 07/42] remove spurious .only --- test/api/Terminal.api.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/api/Terminal.api.ts b/test/api/Terminal.api.ts index e61cb52cd5..81b80b41a8 100644 --- a/test/api/Terminal.api.ts +++ b/test/api/Terminal.api.ts @@ -16,7 +16,7 @@ let page: Page; const width = 800; const height = 600; -describe.only('API Integration Tests', function(): void { +describe('API Integration Tests', function(): void { before(async () => { browser = await launchBrowser(); page = await (await browser.newContext()).newPage(); From ebfa2018dd5611e367880db21f045ac7dd34178a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Breitbart?= Date: Sat, 20 May 2023 15:28:15 +0200 Subject: [PATCH 08/42] revert wrong added file --- test/benchmark/Terminal.benchmark.ts | 3 --- 1 file changed, 3 deletions(-) diff --git a/test/benchmark/Terminal.benchmark.ts b/test/benchmark/Terminal.benchmark.ts index 3b56a7aa07..812871a669 100644 --- a/test/benchmark/Terminal.benchmark.ts +++ b/test/benchmark/Terminal.benchmark.ts @@ -8,7 +8,6 @@ import { perfContext, before, ThroughputRuntimeCase } from 'xterm-benchmark'; import { spawn } from 'node-pty'; import { Utf8ToUtf32, stringFromCodePoint } from 'common/input/TextDecoder'; import { Terminal } from 'browser/Terminal'; -import { UnicodeGraphemesAddon } from 'UnicodeGraphemesAddon'; perfContext('Terminal: ls -lR /usr/lib', () => { @@ -50,8 +49,6 @@ perfContext('Terminal: ls -lR /usr/lib', () => { let terminal: Terminal; before(() => { terminal = new Terminal({ cols: 80, rows: 25, scrollback: 1000 }); - const uga = new UnicodeGraphemesAddon(); - (terminal as any).loadAddon(uga); }); new ThroughputRuntimeCase('', async () => { await new Promise(res => terminal.write(content, res)); From 3003185676060cd5a813bdd3d8c567091857bac9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Breitbart?= Date: Sat, 20 May 2023 15:29:38 +0200 Subject: [PATCH 09/42] revert wrongly editied files --- test/benchmark/Terminal.benchmark.ts | 1 - test/benchmark/tsconfig.json | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/test/benchmark/Terminal.benchmark.ts b/test/benchmark/Terminal.benchmark.ts index 812871a669..578a83d038 100644 --- a/test/benchmark/Terminal.benchmark.ts +++ b/test/benchmark/Terminal.benchmark.ts @@ -9,7 +9,6 @@ import { spawn } from 'node-pty'; import { Utf8ToUtf32, stringFromCodePoint } from 'common/input/TextDecoder'; import { Terminal } from 'browser/Terminal'; - perfContext('Terminal: ls -lR /usr/lib', () => { let content = ''; let contentUtf8: Uint8Array; diff --git a/test/benchmark/tsconfig.json b/test/benchmark/tsconfig.json index 72000cb44b..2ebe2ebe64 100644 --- a/test/benchmark/tsconfig.json +++ b/test/benchmark/tsconfig.json @@ -16,8 +16,7 @@ "paths": { "common/*": [ "../../src/common/*" ], "browser/*": [ "../../src/browser/*" ], - "Terminal": ["../../src/Terminal"], - "UnicodeGraphemesAddon": ["../../addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemesAddon"] + "Terminal": ["../../src/Terminal"] }, }, "include": [ From 1a8c7dad5f93f8de99473e5b1b4ce06a676aba7f Mon Sep 17 00:00:00 2001 From: Per Bothner Date: Sun, 21 May 2023 10:27:36 -0700 Subject: [PATCH 10/42] Update .eslintrc.json to include xterm-addon-unicode-graphemes/benchmark --- .eslintrc.json | 1 + 1 file changed, 1 insertion(+) diff --git a/.eslintrc.json b/.eslintrc.json index 936fec8c0f..37aee4fa97 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -28,6 +28,7 @@ "addons/xterm-addon-unicode11/test/tsconfig.json", "addons/xterm-addon-unicode-graphemes/src/tsconfig.json", "addons/xterm-addon-unicode-graphemes/test/tsconfig.json", + "addons/xterm-addon-unicode-graphemes/benchmark/tsconfig.json", "addons/xterm-addon-web-links/src/tsconfig.json", "addons/xterm-addon-web-links/test/tsconfig.json", "addons/xterm-addon-webgl/src/tsconfig.json", From fa89e1776b107fd602213af42c5f8dc6e722b668 Mon Sep 17 00:00:00 2001 From: Per Bothner Date: Sun, 21 May 2023 10:29:13 -0700 Subject: [PATCH 11/42] Optimize UnicodeGraphemeProvider for the ASCII case. --- .../src/UnicodeGraphemeProvider.ts | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemeProvider.ts b/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemeProvider.ts index 87ec8d3279..00740dacb3 100644 --- a/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemeProvider.ts +++ b/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemeProvider.ts @@ -15,6 +15,15 @@ export class UnicodeGraphemeProvider implements IUnicodeVersionProvider { } public charProperties(codepoint: number, preceding: UnicodeCharProperties): UnicodeCharProperties { + // Optimize the simple ASCII case, under the condition that + // UnicodeService.extractCharKind(preceding) === GRAPHEME_BREAK_Other + // (which also covers the case that preceding === 0). + + if ((codepoint >= 32 && codepoint < 127) && (preceding >> 3) === 0) { + // Inlined UnicodeService.createPropertyValue(GRAPHEME_BREAK_Other, 1, false) + return 1; + } + let charInfo = UC.getInfo(codepoint); let w = UC.infoToWidthInfo(charInfo); let shouldJoin = false; From dc6818de2b879692d7c557a7db41b753a1ec3803 Mon Sep 17 00:00:00 2001 From: Per Bothner Date: Wed, 28 Jun 2023 15:19:44 -0700 Subject: [PATCH 12/42] Fix error in charProperties optimization. Result should be 2, not 1. Use a static field to avoid this kind of error (though it might be slightly slower, depending on the compiler/toolchain). --- .../src/UnicodeGraphemeProvider.ts | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemeProvider.ts b/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemeProvider.ts index 00740dacb3..8e4b019ed8 100644 --- a/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemeProvider.ts +++ b/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemeProvider.ts @@ -14,14 +14,15 @@ export class UnicodeGraphemeProvider implements IUnicodeVersionProvider { constructor() { } + static readonly plainNarrowProperties: UnicodeCharProperties + = UnicodeService.createPropertyValue(UC.GRAPHEME_BREAK_Other, 1, false); + public charProperties(codepoint: number, preceding: UnicodeCharProperties): UnicodeCharProperties { // Optimize the simple ASCII case, under the condition that // UnicodeService.extractCharKind(preceding) === GRAPHEME_BREAK_Other // (which also covers the case that preceding === 0). - if ((codepoint >= 32 && codepoint < 127) && (preceding >> 3) === 0) { - // Inlined UnicodeService.createPropertyValue(GRAPHEME_BREAK_Other, 1, false) - return 1; + return UnicodeGraphemeProvider.plainNarrowProperties; } let charInfo = UC.getInfo(codepoint); From 556823328f72184e6e2a79c93c06fa7807233019 Mon Sep 17 00:00:00 2001 From: Per Bothner Date: Wed, 28 Jun 2023 15:33:55 -0700 Subject: [PATCH 13/42] Fix for lint the previous charProperties change. --- .../src/UnicodeGraphemeProvider.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemeProvider.ts b/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemeProvider.ts index 8e4b019ed8..09a7f25ed7 100644 --- a/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemeProvider.ts +++ b/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemeProvider.ts @@ -14,7 +14,7 @@ export class UnicodeGraphemeProvider implements IUnicodeVersionProvider { constructor() { } - static readonly plainNarrowProperties: UnicodeCharProperties + private static readonly _plainNarrowProperties: UnicodeCharProperties = UnicodeService.createPropertyValue(UC.GRAPHEME_BREAK_Other, 1, false); public charProperties(codepoint: number, preceding: UnicodeCharProperties): UnicodeCharProperties { @@ -22,7 +22,7 @@ export class UnicodeGraphemeProvider implements IUnicodeVersionProvider { // UnicodeService.extractCharKind(preceding) === GRAPHEME_BREAK_Other // (which also covers the case that preceding === 0). if ((codepoint >= 32 && codepoint < 127) && (preceding >> 3) === 0) { - return UnicodeGraphemeProvider.plainNarrowProperties; + return UnicodeGraphemeProvider._plainNarrowProperties; } let charInfo = UC.getInfo(codepoint); From 08a0914a78dfda08e3a6480c08dc5b8aedd213e2 Mon Sep 17 00:00:00 2001 From: Daniel Imms <2193314+Tyriar@users.noreply.github.com> Date: Wed, 2 Aug 2023 11:59:30 -0700 Subject: [PATCH 14/42] Add grapheme cluster test button to demo --- demo/client.ts | 28 ++++++++++++++++++++++++++-- demo/index.html | 3 ++- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/demo/client.ts b/demo/client.ts index 65e494423d..ff6bc7a4d1 100644 --- a/demo/client.ts +++ b/demo/client.ts @@ -241,6 +241,7 @@ if (document.location.pathname === '/test') { document.getElementById('ansi-colors').addEventListener('click', ansiColorsTest); document.getElementById('osc-hyperlinks').addEventListener('click', addAnsiHyperlink); document.getElementById('sgr-test').addEventListener('click', sgrTest); + document.getElementById('add-grapheme-clusters').addEventListener('click', addGraphemeClusters); document.getElementById('add-decoration').addEventListener('click', addDecoration); document.getElementById('add-overview-ruler').addEventListener('click', addOverviewRuler); document.getElementById('weblinks-test').addEventListener('click', testWeblinks); @@ -1096,6 +1097,29 @@ function getRandomSgr(): string { return randomSgrAttributes[Math.floor(Math.random() * randomSgrAttributes.length)]; } +function addGraphemeClusters(): void { + term.write('\n\n\r'); + term.writeln('Simple emoji (v6: 10 cells, v15: 20 cells)'); + term.writeln('🤣🤣🤣🤣🤣🤣🤣🤣🤣🤣'); + term.writeln('baby with emoji modifier fitzpatrick type-6; baby'); + term.writeln('\u{1F476}\u{1F3FF}\u{1F476}'); + term.writeln('woman+zwj+woman+zwj+boy'); + term.writeln('\u{1F469}\u200d\u{1f469}\u200d\u{1f466}'); + term.writeln('REGIONAL INDICATOR SYMBOL LETTER N and RI O'); + term.writeln('\u{1f1f3}\u{1f1f4}_'); + term.writeln('\u{1f1f3}_\u{1f1f4}'); + term.writeln('letter a with acute accent'); + term.writeln('\u0061\u0301'); + term.writeln('Korean Jamo'); + term.writeln('{\u1100\u1161\u11a8}'); + term.writeln('coffin with text_presentation'); + term.writeln('(\u26b0\ufe0e)'); + term.writeln('coffin with Emoji_presentation'); + term.writeln('(\u26b0\ufe0f)'); + term.writeln('Égalité (using separate acute) emoij_presentation'); + term.writeln(''); +} + function addDecoration(): void { term.options['overviewRulerWidth'] = 15; const marker = term.registerMarker(1); @@ -1241,7 +1265,7 @@ Test BG-colored Erase (BCE): for all cells to the right. def 41 42 43 44 45 46 47\x1b[47m -\x1b[m \x1b[41m \x1b[42m \x1b[43m \x1b[44m \x1b[45m \x1b[46m \x1b[47m +\x1b[m \x1b[41m \x1b[42m \x1b[43m \x1b[44m \x1b[45m \x1b[46m \x1b[47m \x1b[m\x1b[5X\x1b[41m\x1b[5C\x1b[5X\x1b[42m\x1b[5C\x1b[5X\x1b[43m\x1b[5C\x1b[5X\x1b[44m\x1b[5C\x1b[5X\x1b[45m\x1b[5C\x1b[5X\x1b[46m\x1b[5C\x1b[5X\x1b[47m\x1b[5C\x1b[5X\x1b[m `; term.write(data.split('\n').join('\r\n')); @@ -1277,7 +1301,7 @@ function initImageAddonExposed(): void { term.write('\r\n'); term.write(new Uint8Array(buffer)); }); - + const iip_demo = (url: string) => () => fetch(url) .then(resp => resp.arrayBuffer()) .then(buffer => { diff --git a/demo/index.html b/demo/index.html index 0e22d2d290..e6d7eba42f 100644 --- a/demo/index.html +++ b/demo/index.html @@ -97,6 +97,7 @@

Test

+
Decorations
@@ -104,7 +105,7 @@

Test

Weblinks Addon
- +
Image Test
From 6b2459317be4e472f3737146639f6df85182872f Mon Sep 17 00:00:00 2001 From: Per Bothner Date: Thu, 3 Aug 2023 22:29:36 -0700 Subject: [PATCH 15/42] Null out following columns after grapheme cluster --- src/common/InputHandler.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/common/InputHandler.ts b/src/common/InputHandler.ts index 5d9147aafb..7be2f1aacc 100644 --- a/src/common/InputHandler.ts +++ b/src/common/InputHandler.ts @@ -600,7 +600,9 @@ export class InputHandler extends Disposable implements IInputHandler { // since an empty cell is only set by fullwidth chars bufferRow.addCodepointToCell(this._activeBuffer.x - offset, code, chWidth); - this._activeBuffer.x += chWidth - oldWidth; + for (let delta = chWidth - oldWidth; --delta >= 0; ) { + bufferRow.setCellFromCodePoint(this._activeBuffer.x++, 0, 0, curAttr.fg, curAttr.bg, curAttr.extended); + } continue; } From dccfc11b4fffe75fdbf2137e057ef36a6df787a7 Mon Sep 17 00:00:00 2001 From: Per Bothner Date: Sun, 6 Aug 2023 08:19:14 -0700 Subject: [PATCH 16/42] Tweak to avoid line-ending spaces. Line-ending spaces were unintentionally removed - this avoids the problem, --- demo/client.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/demo/client.ts b/demo/client.ts index ff6bc7a4d1..f8e5d4c471 100644 --- a/demo/client.ts +++ b/demo/client.ts @@ -1258,6 +1258,7 @@ stop at final '?': Maybe this one http://example.com/with?arguments=false? function coloredErase(): void { + const sp5 = ' '; const data = ` Test BG-colored Erase (BCE): The color block in the following lines should look identical. @@ -1265,7 +1266,7 @@ Test BG-colored Erase (BCE): for all cells to the right. def 41 42 43 44 45 46 47\x1b[47m -\x1b[m \x1b[41m \x1b[42m \x1b[43m \x1b[44m \x1b[45m \x1b[46m \x1b[47m +\x1b[m${sp5}\x1b[41m${sp5}\x1b[42m${sp5}\x1b[43m${sp5}\x1b[44m${sp5}\x1b[45m${sp5}\x1b[46m${sp5}\x1b[47m${sp5} \x1b[m\x1b[5X\x1b[41m\x1b[5C\x1b[5X\x1b[42m\x1b[5C\x1b[5X\x1b[43m\x1b[5C\x1b[5X\x1b[44m\x1b[5C\x1b[5X\x1b[45m\x1b[5C\x1b[5X\x1b[46m\x1b[5C\x1b[5X\x1b[47m\x1b[5C\x1b[5X\x1b[m `; term.write(data.split('\n').join('\r\n')); From a0dc88165fb8886f143025ddfad104c4ba5a7438 Mon Sep 17 00:00:00 2001 From: Per Bothner Date: Sun, 6 Aug 2023 08:20:52 -0700 Subject: [PATCH 17/42] Fix copyright year in new file. --- addons/xterm-addon-unicode-graphemes/LICENSE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/addons/xterm-addon-unicode-graphemes/LICENSE b/addons/xterm-addon-unicode-graphemes/LICENSE index 8f17892587..b6c38b1547 100644 --- a/addons/xterm-addon-unicode-graphemes/LICENSE +++ b/addons/xterm-addon-unicode-graphemes/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2019, The xterm.js authors (https://github.com/xtermjs/xterm.js) +Copyright (c) 2023, The xterm.js authors (https://github.com/xtermjs/xterm.js) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal From 88bfc6bb2b55f3814160749d7739879693fc7d7b Mon Sep 17 00:00:00 2001 From: Per Bothner Date: Sun, 6 Aug 2023 08:28:08 -0700 Subject: [PATCH 18/42] Tweak `charProperties` doc comment. --- src/common/services/Services.ts | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/common/services/Services.ts b/src/common/services/Services.ts index 82e5ce960f..4178a45bb1 100644 --- a/src/common/services/Services.ts +++ b/src/common/services/Services.ts @@ -314,9 +314,11 @@ export interface IUnicodeService { */ wcwidth(codepoint: number): UnicodeCharWidth; getStringCellWidth(s: string): number; - /** Return character width, character type (for grapheme clustering). - * If preceding!=0, it is return code from previous character; - * in that case result specifies if characters should be joined. */ + /** + * Return character width and type for grapheme clustering. + * If preceding != 0, it is the return code from the previous character; + * in that case the result specifies if the characters should be joined. + */ charProperties(codepoint: number, preceding: UnicodeCharProperties): UnicodeCharProperties; } From 988b3b1ab2c6532ca3d92015cc703f17ee6aa1f0 Mon Sep 17 00:00:00 2001 From: Per Bothner Date: Sun, 6 Aug 2023 11:52:39 -0700 Subject: [PATCH 19/42] Remove tailing space. --- addons/xterm-addon-unicode-graphemes/benchmark/benchmark.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/addons/xterm-addon-unicode-graphemes/benchmark/benchmark.json b/addons/xterm-addon-unicode-graphemes/benchmark/benchmark.json index f8b99b5565..1333eac883 100644 --- a/addons/xterm-addon-unicode-graphemes/benchmark/benchmark.json +++ b/addons/xterm-addon-unicode-graphemes/benchmark/benchmark.json @@ -15,5 +15,5 @@ "EscapeSequenceParser.benchmark.js.*.averageRuntime", "Terminal.benchmark.js.*.averageRuntime" ] - } + } } From 720297946b607a651c2b88e4d8a12c4d789a2f16 Mon Sep 17 00:00:00 2001 From: Per Bothner Date: Sun, 6 Aug 2023 11:53:11 -0700 Subject: [PATCH 20/42] Changed xterm-addon-unicode-graphemes version number to 0.1.0. --- addons/xterm-addon-unicode-graphemes/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/addons/xterm-addon-unicode-graphemes/package.json b/addons/xterm-addon-unicode-graphemes/package.json index 1896bbf1a4..82a28bd404 100644 --- a/addons/xterm-addon-unicode-graphemes/package.json +++ b/addons/xterm-addon-unicode-graphemes/package.json @@ -1,6 +1,6 @@ { "name": "xterm-addon-unicode-graphemes", - "version": "0.5.0", + "version": "0.1.0", "author": { "name": "The xterm.js authors", "url": "https://xtermjs.org/" From f1035405e648ab7a8f3f546c325f06d3b60193f4 Mon Sep 17 00:00:00 2001 From: Per Bothner Date: Sun, 6 Aug 2023 12:44:43 -0700 Subject: [PATCH 21/42] Move precedingJoinState from EscapeSequenceParser to InputHandler This allows InputHandler._parser to be IEscapeSequenceParser rather than EscapeSequenceParser. --- src/common/InputHandler.ts | 10 +++++++--- src/common/parser/EscapeSequenceParser.ts | 3 --- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/common/InputHandler.ts b/src/common/InputHandler.ts index 7be2f1aacc..76f77fddf8 100644 --- a/src/common/InputHandler.ts +++ b/src/common/InputHandler.ts @@ -126,6 +126,10 @@ export class InputHandler extends Disposable implements IInputHandler { protected _windowTitleStack: string[] = []; protected _iconNameStack: string[] = []; + // Cached result of getJoinProperties(..., precedingCodepoint). + // Only valid if precedingCodepoint !== 0 + public precedingJoinState: number = -1; // UnicodeJoinProperties + private _curAttrData: IAttributeData = DEFAULT_ATTR_DATA.clone(); public getAttrData(): IAttributeData { return this._curAttrData; } private _eraseAttrDataInternal: IAttributeData = DEFAULT_ATTR_DATA.clone(); @@ -177,7 +181,7 @@ export class InputHandler extends Disposable implements IInputHandler { private readonly _oscLinkService: IOscLinkService, private readonly _coreMouseService: ICoreMouseService, private readonly _unicodeService: IUnicodeService, - private readonly _parser: EscapeSequenceParser = new EscapeSequenceParser() + private readonly _parser: IEscapeSequenceParser = new EscapeSequenceParser() ) { super(); this.register(this._parser); @@ -518,7 +522,7 @@ export class InputHandler extends Disposable implements IInputHandler { } let precedingInfo = this._parser.precedingCodepoint === 0 ? 0 - : this._parser.precedingJoinState; + : this.precedingJoinState; for (let pos = start; pos < end; ++pos) { code = data[pos]; @@ -632,7 +636,7 @@ export class InputHandler extends Disposable implements IInputHandler { } } - this._parser.precedingJoinState = precedingInfo; + this.precedingJoinState = precedingInfo; // store last char in Parser.precedingCodepoint for REP to work correctly // This needs to check whether: // - combining: only base char gets carried on (bug in xterm?) diff --git a/src/common/parser/EscapeSequenceParser.ts b/src/common/parser/EscapeSequenceParser.ts index f8da3b3854..2f3ddd9296 100644 --- a/src/common/parser/EscapeSequenceParser.ts +++ b/src/common/parser/EscapeSequenceParser.ts @@ -231,9 +231,6 @@ export class EscapeSequenceParser extends Disposable implements IEscapeSequenceP public initialState: number; public currentState: number; public precedingCodepoint: number; - // Cached result of getJoinProperties(..., precedingCodepoint). - // Only valid if precedingCodepoint !== 0 - public precedingJoinState: number = -1; // UnicodeJoinProperties // buffers over several parse calls protected _params: Params; From b9abbc0a85ec17b73d0808a3734c9df24909e106 Mon Sep 17 00:00:00 2001 From: Per Bothner Date: Mon, 7 Aug 2023 11:10:06 -0700 Subject: [PATCH 22/42] Split complicated conditional expression. --- .../src/UnicodeGraphemeProvider.ts | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemeProvider.ts b/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemeProvider.ts index 09a7f25ed7..06987c189f 100644 --- a/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemeProvider.ts +++ b/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemeProvider.ts @@ -53,7 +53,12 @@ export class UnicodeGraphemeProvider implements IUnicodeVersionProvider { const charInfo = UC.getInfo(codepoint); const w = UC.infoToWidthInfo(charInfo); const kind = (charInfo & UC.GRAPHEME_BREAK_MASK) >> UC.GRAPHEME_BREAK_SHIFT; - return (kind === UC.GRAPHEME_BREAK_Extend || kind === UC.GRAPHEME_BREAK_Prepend) ? 0 - : (w >= 2 && (w === 3 || this.ambiguousCharsAreWide)) ? 2 : 1; + if (kind === UC.GRAPHEME_BREAK_Extend || kind === UC.GRAPHEME_BREAK_Prepend) { + return 0; + } + if (w >= 2 && (w === 3 || this.ambiguousCharsAreWide)) { + return 2; + } + return 1; } } From 4fbc623706743afe44deb894f551390b08542c22 Mon Sep 17 00:00:00 2001 From: Per Bothner Date: Mon, 7 Aug 2023 11:55:52 -0700 Subject: [PATCH 23/42] Expand doc comments for UnicodeCharProperties and UnicodeCharWidth. --- src/common/services/Services.ts | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/src/common/services/Services.ts b/src/common/services/Services.ts index 4178a45bb1..8f5ec7c6af 100644 --- a/src/common/services/Services.ts +++ b/src/common/services/Services.ts @@ -293,8 +293,27 @@ export interface IOscLinkService { getLinkData(linkId: number): IOscLinkData | undefined; } -/** Width and Grapheme_Cluster_Break properties of a character. */ +/* + * Width and Grapheme_Cluster_Break properties of a character as a bit mask. + * + * bit 0: shouldJoin - should combine with preceding character. + * bit 1..2: wcwidth - see UnicodeCharWidth. + * bit 3..31: class of character (currently only 4 bits are used). + * This is used to determined grapheme clustering - i.e. which codepoints + * are to be combined into a single compound character. + * + * Use the UnicodeService static function createPropertyValue to create a + * UnicodeCharProperties; use extractShouldJoin, extractWidth, and + * extractCharKind to extract the components. + */ export type UnicodeCharProperties = number; + +/** + * Width in columns of a character. + * In a CJK context, "half-width" characters (such as Latin) are width 1, + * while "full-width" characters (such as Kanji) are 2 columns wide. + * Combining characters (such as accents) are width 0. + */ export type UnicodeCharWidth = 0 | 1 | 2; export const IUnicodeService = createDecorator('UnicodeService'); From 302da18c81cef1f38854436671ee9f81d9a7de18 Mon Sep 17 00:00:00 2001 From: Per Bothner Date: Mon, 7 Aug 2023 13:08:35 -0700 Subject: [PATCH 24/42] Rename local variable precedingInfo in InputHandler to precedingJoinState --- src/common/InputHandler.ts | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/common/InputHandler.ts b/src/common/InputHandler.ts index 76f77fddf8..60a9226600 100644 --- a/src/common/InputHandler.ts +++ b/src/common/InputHandler.ts @@ -521,7 +521,7 @@ export class InputHandler extends Disposable implements IInputHandler { bufferRow.setCellFromCodePoint(this._activeBuffer.x - 1, 0, 1, curAttr.fg, curAttr.bg, curAttr.extended); } - let precedingInfo = this._parser.precedingCodepoint === 0 ? 0 + let precedingJoinState = this._parser.precedingCodepoint === 0 ? 0 : this.precedingJoinState; for (let pos = start; pos < end; ++pos) { code = data[pos]; @@ -536,11 +536,11 @@ export class InputHandler extends Disposable implements IInputHandler { } } - const currentInfo = this._unicodeService.charProperties(code, precedingInfo); + const currentInfo = this._unicodeService.charProperties(code, precedingJoinState); chWidth = UnicodeService.extractWidth(currentInfo); const shouldJoin = UnicodeService.extractShouldJoin(currentInfo); - const oldWidth = shouldJoin ? UnicodeService.extractWidth(precedingInfo) : 0; - precedingInfo = currentInfo; + const oldWidth = shouldJoin ? UnicodeService.extractWidth(precedingJoinState) : 0; + precedingJoinState = currentInfo; if (screenReaderMode) { this._onA11yChar.fire(stringFromCodePoint(code)); @@ -636,7 +636,7 @@ export class InputHandler extends Disposable implements IInputHandler { } } - this.precedingJoinState = precedingInfo; + this.precedingJoinState = precedingJoinState; // store last char in Parser.precedingCodepoint for REP to work correctly // This needs to check whether: // - combining: only base char gets carried on (bug in xterm?) From b9a4760b446c930469122c637ca1d9194d7b7269 Mon Sep 17 00:00:00 2001 From: Per Bothner Date: Tue, 8 Aug 2023 18:32:37 -0700 Subject: [PATCH 25/42] Move files imported from unicode-properties into subdirectory Specifically addons/xterm-addon-unicode-graphemes/src/third-party/. --- .eslintrc.json | 4 +--- .../src/UnicodeGraphemeProvider.ts | 2 +- .../src/{ => third-party}/UnicodeProperties.ts | 0 .../src/{ => third-party}/tiny-inflate.ts | 0 .../src/{ => third-party}/unicode-trie.ts | 0 test/api/Terminal.api.ts | 2 +- test/api/TestUtils.ts | 3 ++- 7 files changed, 5 insertions(+), 6 deletions(-) rename addons/xterm-addon-unicode-graphemes/src/{ => third-party}/UnicodeProperties.ts (100%) rename addons/xterm-addon-unicode-graphemes/src/{ => third-party}/tiny-inflate.ts (100%) rename addons/xterm-addon-unicode-graphemes/src/{ => third-party}/unicode-trie.ts (100%) diff --git a/.eslintrc.json b/.eslintrc.json index 701c3a72a2..2ad17123a0 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -39,9 +39,7 @@ "sourceType": "module" }, "ignorePatterns": [ - "addons/xterm-addon-unicode-graphemes/src/tiny-inflate.ts", - "addons/xterm-addon-unicode-graphemes/src/unicode-trie.ts", - "addons/xterm-addon-unicode-graphemes/src/UnicodeProperties.ts", + "addons/*/src/third-party/*.ts", "**/inwasm-sdks/*", "**/typings/*.d.ts", "**/node_modules", diff --git a/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemeProvider.ts b/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemeProvider.ts index 06987c189f..b1c6202044 100644 --- a/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemeProvider.ts +++ b/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemeProvider.ts @@ -6,7 +6,7 @@ import { IUnicodeVersionProvider } from 'xterm'; import { UnicodeCharProperties, UnicodeCharWidth } from 'common/services/Services'; import { UnicodeService } from 'common/services/UnicodeService'; -import * as UC from './UnicodeProperties'; +import * as UC from './third-party/UnicodeProperties'; export class UnicodeGraphemeProvider implements IUnicodeVersionProvider { public readonly version = '15-graphemes'; diff --git a/addons/xterm-addon-unicode-graphemes/src/UnicodeProperties.ts b/addons/xterm-addon-unicode-graphemes/src/third-party/UnicodeProperties.ts similarity index 100% rename from addons/xterm-addon-unicode-graphemes/src/UnicodeProperties.ts rename to addons/xterm-addon-unicode-graphemes/src/third-party/UnicodeProperties.ts diff --git a/addons/xterm-addon-unicode-graphemes/src/tiny-inflate.ts b/addons/xterm-addon-unicode-graphemes/src/third-party/tiny-inflate.ts similarity index 100% rename from addons/xterm-addon-unicode-graphemes/src/tiny-inflate.ts rename to addons/xterm-addon-unicode-graphemes/src/third-party/tiny-inflate.ts diff --git a/addons/xterm-addon-unicode-graphemes/src/unicode-trie.ts b/addons/xterm-addon-unicode-graphemes/src/third-party/unicode-trie.ts similarity index 100% rename from addons/xterm-addon-unicode-graphemes/src/unicode-trie.ts rename to addons/xterm-addon-unicode-graphemes/src/third-party/unicode-trie.ts diff --git a/test/api/Terminal.api.ts b/test/api/Terminal.api.ts index 17c8555fd0..81eaada27d 100644 --- a/test/api/Terminal.api.ts +++ b/test/api/Terminal.api.ts @@ -33,7 +33,7 @@ describe('API Integration Tests', function(): void { }); // fails with the grapheme injection, not sure why... - it.skip('Proposed API check', async () => { + it('Proposed API check', async () => { await openTerminal(page, { allowProposedApi: false }); await page.evaluate(` try { diff --git a/test/api/TestUtils.ts b/test/api/TestUtils.ts index 4b7a1d69b2..220bd42cde 100644 --- a/test/api/TestUtils.ts +++ b/test/api/TestUtils.ts @@ -47,13 +47,14 @@ export async function openTerminal(page: playwright.Page, options: ITerminalOpti await page.evaluate(`window.term = new Terminal(${JSON.stringify({ allowProposedApi: true, ...options })})`); await page.evaluate(`window.term.open(document.querySelector('#terminal-container'))`); + /* // TODO: make this injection configurable from outside await page.evaluate(` window.unicode = new UnicodeGraphemesAddon(); window.term.loadAddon(window.unicode); window.term.unicode.activeVersion = '15-graphemes'; `); - +*/ await page.waitForSelector('.xterm-rows'); } From 3bfe5d86c0e93534a4a1ff562b431bea87c95320 Mon Sep 17 00:00:00 2001 From: Per Bothner Date: Wed, 9 Aug 2023 14:44:45 -0700 Subject: [PATCH 26/42] Add testOptions parameter to openTerminal in TestUtils. This is used to control whether the UnicodeGraphemesAddon is loaded. --- test/api/Terminal.api.ts | 2 +- test/api/TestUtils.ts | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/test/api/Terminal.api.ts b/test/api/Terminal.api.ts index 81eaada27d..dcaea33ee8 100644 --- a/test/api/Terminal.api.ts +++ b/test/api/Terminal.api.ts @@ -34,7 +34,7 @@ describe('API Integration Tests', function(): void { // fails with the grapheme injection, not sure why... it('Proposed API check', async () => { - await openTerminal(page, { allowProposedApi: false }); + await openTerminal(page, { allowProposedApi: false }, { loadUnicodeGraphemesAddon: false }); await page.evaluate(` try { window.term.markers; diff --git a/test/api/TestUtils.ts b/test/api/TestUtils.ts index 220bd42cde..387eb3229b 100644 --- a/test/api/TestUtils.ts +++ b/test/api/TestUtils.ts @@ -43,18 +43,18 @@ export async function timeout(ms: number): Promise { return new Promise(r => setTimeout(r, ms)); } -export async function openTerminal(page: playwright.Page, options: ITerminalOptions & ITerminalInitOnlyOptions = {}): Promise { +export async function openTerminal(page: playwright.Page, options: ITerminalOptions & ITerminalInitOnlyOptions = {}, testOptions: any = { loadUnicodeGraphemesAddon: true}): Promise { await page.evaluate(`window.term = new Terminal(${JSON.stringify({ allowProposedApi: true, ...options })})`); await page.evaluate(`window.term.open(document.querySelector('#terminal-container'))`); - /* - // TODO: make this injection configurable from outside - await page.evaluate(` - window.unicode = new UnicodeGraphemesAddon(); - window.term.loadAddon(window.unicode); - window.term.unicode.activeVersion = '15-graphemes'; - `); -*/ + // See https://github.com/xtermjs/xterm.js/pull/4519#discussion_r1285234453 + if (testOptions.loadUnicodeGraphemesAddon) { + await page.evaluate(` + window.unicode = new UnicodeGraphemesAddon(); + window.term.loadAddon(window.unicode); + window.term.unicode.activeVersion = '15-graphemes'; + `); + } await page.waitForSelector('.xterm-rows'); } From fb2c6801d43042be11708ce124be86d6fc4c02c7 Mon Sep 17 00:00:00 2001 From: Per Bothner Date: Thu, 10 Aug 2023 11:39:47 -0700 Subject: [PATCH 27/42] Replace comments by assert message in unicode-graphemes tests Also use plain equal instead of deepEqual (overkill). --- .../test/UnicodeGraphemesAddon.api.ts | 41 ++++++++++--------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/addons/xterm-addon-unicode-graphemes/test/UnicodeGraphemesAddon.api.ts b/addons/xterm-addon-unicode-graphemes/test/UnicodeGraphemesAddon.api.ts index 33dc728425..13a5ef84bc 100644 --- a/addons/xterm-addon-unicode-graphemes/test/UnicodeGraphemesAddon.api.ts +++ b/addons/xterm-addon-unicode-graphemes/test/UnicodeGraphemesAddon.api.ts @@ -42,25 +42,26 @@ describe('UnicodeGraphemesAddon', () => { assert.deepEqual(await page.evaluate(`window.term.unicode.versions`), ['6', ourVersion]); // switch should not throw await page.evaluate(`window.term.unicode.activeVersion = '${ourVersion}';`); - assert.deepEqual(await page.evaluate(`window.term.unicode.activeVersion`), ourVersion); - // v6: 10, V15: 20 - assert.deepEqual(await evalWidth('🤣🤣🤣🤣🤣🤣🤣🤣🤣🤣'), 20); - // baby with emoji modifier fitzpatrick type-6; baby - assert.deepEqual(await evalWidth('\u{1F476}\u{1F3FF}\u{1F476}'), 4); - // woman+zwj+woman+zwj+boy - assert.deepEqual(await evalWidth('\u{1F469}\u200d\u{1f469}\u200d\u{1f466}'), 2); - // REGIONAL INDICATOR SYMBOL LETTER N and RI O - assert.deepEqual(await evalWidth('\u{1f1f3}\u{1f1f4}_'), 3); - assert.deepEqual(await evalWidth('\u{1f1f3}_\u{1f1f4}'), 3); - // letter a with acute accent - assert.deepEqual(await evalWidth('\u0061\u0301'), 1); - // Korean Jamo - assert.deepEqual(await evalWidth('{\u1100\u1161\u11a8}'), 4); - // coffin with text_presentation - assert.deepEqual(await evalWidth('(\u26b0\ufe0e)'), 3); - // coffin with Emoji_presentation - assert.deepEqual(await evalWidth('(\u26b0\ufe0f)'), 4); - // Égalité (using separate acute) emoij_presentation - assert.deepEqual(await evalWidth(''), 16); + assert.equal(await page.evaluate(`window.term.unicode.activeVersion`), ourVersion); + assert.equal(await evalWidth('🤣🤣🤣🤣🤣🤣🤣🤣🤣🤣'), 20, + '10 emoji - width 10 in V6; 20 in V11 or later'); + assert.equal(await evalWidth('\u{1F476}\u{1F3FF}\u{1F476}'), 4, + 'baby with emoji modifier fitzpatrick type-6; baby'); + assert.equal(await evalWidth('\u{1F469}\u200d\u{1f469}\u200d\u{1f466}'), 2, + 'woman+zwj+woman+zwj+boy'); + assert.equal(await evalWidth('\u{1f1f3}\u{1f1f4}/'), 3, + 'regional indicator symbol letters N and O, cluster'); + assert.equal(await evalWidth('\u{1f1f3}/\u{1f1f4}'), 3, + 'regional indicator symbol letters N and O, separated'); + assert.equal(await evalWidth('\u0061\u0301'), 1, + 'letter a with acute accent'); + assert.equal(await evalWidth('{\u1100\u1161\u11a8}'), 4, + 'Korean Jamo'); + assert.equal(await evalWidth('(\u26b0\ufe0e)'), 3, + 'coffin with text presentation'); + assert.equal(await evalWidth('(\u26b0\ufe0f)'), 4, + 'coffin with emoji presentation'); + assert.equal(await evalWidth(''), 16, + 'Égalité (using separate acute) emoij_presentation'); }); }); From f1bc95622d357b081a0fe22d051158f9622a3671 Mon Sep 17 00:00:00 2001 From: Per Bothner Date: Thu, 17 Aug 2023 13:23:10 -0700 Subject: [PATCH 28/42] Add more grapheme-cluster examples to test and demo --- .../test/UnicodeGraphemesAddon.api.ts | 8 ++++- demo/client.ts | 33 ++++++++----------- 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/addons/xterm-addon-unicode-graphemes/test/UnicodeGraphemesAddon.api.ts b/addons/xterm-addon-unicode-graphemes/test/UnicodeGraphemesAddon.api.ts index 13a5ef84bc..ff4f52aabf 100644 --- a/addons/xterm-addon-unicode-graphemes/test/UnicodeGraphemesAddon.api.ts +++ b/addons/xterm-addon-unicode-graphemes/test/UnicodeGraphemesAddon.api.ts @@ -49,14 +49,20 @@ describe('UnicodeGraphemesAddon', () => { 'baby with emoji modifier fitzpatrick type-6; baby'); assert.equal(await evalWidth('\u{1F469}\u200d\u{1f469}\u200d\u{1f466}'), 2, 'woman+zwj+woman+zwj+boy'); + assert.equal(await evalWidth('=\u{1F3CB}\u{FE0F}=\u{F3CB}\u{1F3FE}\u200D\u2640='), 7, + 'person lifting weights (plain, emoji); woman lighting weights, medium dark'); + assert.equal(await evalWidth('\u{1F469}\u{1F469}\u{200D}\u{1F393}\u{1F468}\u{1F3FF}\u{200D}\u{1F393}'), 6, + 'woman; woman student; man student dark'); assert.equal(await evalWidth('\u{1f1f3}\u{1f1f4}/'), 3, 'regional indicator symbol letters N and O, cluster'); assert.equal(await evalWidth('\u{1f1f3}/\u{1f1f4}'), 3, 'regional indicator symbol letters N and O, separated'); assert.equal(await evalWidth('\u0061\u0301'), 1, 'letter a with acute accent'); - assert.equal(await evalWidth('{\u1100\u1161\u11a8}'), 4, + assert.equal(await evalWidth('{\u1100\u1161\u11a8\u1100\u1161}'), 6, 'Korean Jamo'); + assert.equal(await evalWidth('\uAC00=\uD685='), 6, + 'Hangul syllables (pre-composed)'); assert.equal(await evalWidth('(\u26b0\ufe0e)'), 3, 'coffin with text presentation'); assert.equal(await evalWidth('(\u26b0\ufe0f)'), 4, diff --git a/demo/client.ts b/demo/client.ts index ff38623ca0..d733ec7e5c 100644 --- a/demo/client.ts +++ b/demo/client.ts @@ -1100,25 +1100,20 @@ function getRandomSgr(): string { function addGraphemeClusters(): void { term.write('\n\n\r'); - term.writeln('Simple emoji (v6: 10 cells, v15: 20 cells)'); - term.writeln('🤣🤣🤣🤣🤣🤣🤣🤣🤣🤣'); - term.writeln('baby with emoji modifier fitzpatrick type-6; baby'); - term.writeln('\u{1F476}\u{1F3FF}\u{1F476}'); - term.writeln('woman+zwj+woman+zwj+boy'); - term.writeln('\u{1F469}\u200d\u{1f469}\u200d\u{1f466}'); - term.writeln('REGIONAL INDICATOR SYMBOL LETTER N and RI O'); - term.writeln('\u{1f1f3}\u{1f1f4}_'); - term.writeln('\u{1f1f3}_\u{1f1f4}'); - term.writeln('letter a with acute accent'); - term.writeln('\u0061\u0301'); - term.writeln('Korean Jamo'); - term.writeln('{\u1100\u1161\u11a8}'); - term.writeln('coffin with text_presentation'); - term.writeln('(\u26b0\ufe0e)'); - term.writeln('coffin with Emoji_presentation'); - term.writeln('(\u26b0\ufe0f)'); - term.writeln('Égalité (using separate acute) emoij_presentation'); - term.writeln(''); + term.writeln('🤣🤣🤣🤣🤣🤣🤣🤣🤣🤣 [Simple emoji v6: 10 cells, v15: 20 cells]'); + term.writeln('\u{1F476}\u{1F3FF}\u{1F476} [baby with emoji modifier fitzpatrick type-6; baby]'); + term.writeln('\u{1F469}\u200d\u{1f469}\u200d\u{1f466} [woman+zwj+woman+zwj+boy]'); + term.writeln('\u{1F64B}\u{1F64B}\u{200D}\u{2642}\u{FE0F} [person/man raising hand]'); + term.writeln('\u{1F3CB}\u{FE0F}=\u{1F3CB}\u{1F3FE}\u{200D}\u{2640}\u{FE0F} [person lifting weights emoji; woman lighting weights, medium dark]'); + term.writeln('\u{1F469}\u{1F469}\u{200D}\u{1F393}\u{1F468}\u{1F3FF}\u{200D}\u{1F393} [woman; woman student; man student dark]'); + term.writeln('\u{1f1f3}\u{1f1f4}_ [REGIONAL INDICATOR SYMBOL LETTER N and RI O]'); + term.writeln('\u{1f1f3}_\u{1f1f4} {RI N; underscore; RI O]'); + term.writeln('\u0061\u0301 [letter a with acute accent]'); + term.writeln('\u1100\u1161\u11A8=\u1100\u1161= [Korean Jamo]'); + term.writeln('\uAC00=\uD685= [Hangul syllables (pre-composed)]'); + term.writeln('(\u26b0\ufe0e) [coffin with text_presentation]'); + term.writeln('(\u26b0\ufe0f) [coffin with Emoji_presentation]'); + term.writeln(' [Égalité (using separate acute) emoij_presentation]'); } function addDecoration(): void { From 6c1eb97a1842625e0f82933fbfde5e331dfbe214 Mon Sep 17 00:00:00 2001 From: Per Bothner Date: Thu, 17 Aug 2023 14:53:23 -0700 Subject: [PATCH 29/42] Remove no-longer-applicable comment. --- test/api/Terminal.api.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/test/api/Terminal.api.ts b/test/api/Terminal.api.ts index dcaea33ee8..5fa3a83106 100644 --- a/test/api/Terminal.api.ts +++ b/test/api/Terminal.api.ts @@ -32,7 +32,6 @@ describe('API Integration Tests', function(): void { assert.equal(await page.evaluate(`window.term.rows`), 24); }); - // fails with the grapheme injection, not sure why... it('Proposed API check', async () => { await openTerminal(page, { allowProposedApi: false }, { loadUnicodeGraphemesAddon: false }); await page.evaluate(` From 21fd5453816a74732d88bab8a9d77860a2460858 Mon Sep 17 00:00:00 2001 From: Per Bothner Date: Fri, 18 Aug 2023 08:46:33 -0700 Subject: [PATCH 30/42] Don't load unicode11 addon on demo startup. --- demo/client.ts | 2 -- 1 file changed, 2 deletions(-) diff --git a/demo/client.ts b/demo/client.ts index d733ec7e5c..ce6a5c642d 100644 --- a/demo/client.ts +++ b/demo/client.ts @@ -274,7 +274,6 @@ function createTerminal(): void { addons.serialize.instance = new SerializeAddon(); addons.fit.instance = new FitAddon(); addons.image.instance = new ImageAddon(); - addons.unicode11.instance = new Unicode11Addon(); addons['unicode-graphemes'].instance = new UnicodeGraphemesAddon(); try { // try to start with webgl renderer (might throw on older safari/webkit) addons.webgl.instance = new WebglAddon(); @@ -286,7 +285,6 @@ function createTerminal(): void { typedTerm.loadAddon(addons.image.instance); typedTerm.loadAddon(addons.search.instance); typedTerm.loadAddon(addons.serialize.instance); - typedTerm.loadAddon(addons.unicode11.instance); typedTerm.loadAddon(addons['unicode-graphemes'].instance); typedTerm.loadAddon(addons['web-links'].instance); From 00fd2b850363f336ee20a75d0774f292e65722bb Mon Sep 17 00:00:00 2001 From: Per Bothner Date: Fri, 18 Aug 2023 14:17:39 -0700 Subject: [PATCH 31/42] Set unicode.activeVersion when loading UnicodeGraphemeProvider. --- .../xterm-addon-unicode-graphemes/README.md | 3 --- .../src/UnicodeGraphemesAddon.ts | 22 ++++++++++++++++--- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/addons/xterm-addon-unicode-graphemes/README.md b/addons/xterm-addon-unicode-graphemes/README.md index 72f1ee2759..6c23e34385 100644 --- a/addons/xterm-addon-unicode-graphemes/README.md +++ b/addons/xterm-addon-unicode-graphemes/README.md @@ -19,7 +19,4 @@ import { UnicodeGraphemeAddon } from 'xterm-addon-unicode-graphemes'; const terminal = new Terminal(); const unicodeGraphemeAddon = new UnicodeGraphemeAddon(); terminal.loadAddon(unicodeGraphemeAddon); - -// activate the new version -terminal.unicode.activeVersion = '15'; ``` diff --git a/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemesAddon.ts b/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemesAddon.ts index 53a92e4332..c9154ed608 100644 --- a/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemesAddon.ts +++ b/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemesAddon.ts @@ -5,13 +5,29 @@ * UnicodeVersionProvider for V15 with grapeme cluster handleing. */ -import { Terminal, ITerminalAddon } from 'xterm'; +import { Terminal, ITerminalAddon, IUnicodeHandling } from 'xterm'; import { UnicodeGraphemeProvider } from './UnicodeGraphemeProvider'; export class UnicodeGraphemesAddon implements ITerminalAddon { + private _provider?: UnicodeGraphemeProvider; + private _unicode?: IUnicodeHandling; + private _oldVersion: string = ''; + public activate(terminal: Terminal): void { - terminal.unicode.register(new UnicodeGraphemeProvider()); + if (! this._provider) { + this._provider = new UnicodeGraphemeProvider(); + } + const unicode = terminal.unicode; + this._unicode = unicode; + unicode.register(this._provider); + this._oldVersion = unicode.activeVersion; + unicode.activeVersion = '15-graphemes'; + } + + public dispose(): void { + if (this._unicode) { + this._unicode.activeVersion = this._oldVersion; + } } - public dispose(): void { } } From e5a2ebda0e2fa2fbd55971e975e3c6675abbc0cf Mon Sep 17 00:00:00 2001 From: Per Bothner Date: Mon, 21 Aug 2023 13:03:06 -0700 Subject: [PATCH 32/42] Provide with "15" and "15-graphemes" UnicodeProviders Th former doesn't support grapheme clusters, except the old-fashioned kind of simple modifiers. However, "15-graphemes" (with full cluster support) is the default. --- .../src/UnicodeGraphemeProvider.ts | 14 +++++++++++--- .../src/UnicodeGraphemesAddon.ts | 13 +++++++++---- .../test/UnicodeGraphemesAddon.api.ts | 2 +- 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemeProvider.ts b/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemeProvider.ts index b1c6202044..39fbec8407 100644 --- a/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemeProvider.ts +++ b/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemeProvider.ts @@ -9,9 +9,13 @@ import { UnicodeService } from 'common/services/UnicodeService'; import * as UC from './third-party/UnicodeProperties'; export class UnicodeGraphemeProvider implements IUnicodeVersionProvider { - public readonly version = '15-graphemes'; + public readonly version; public ambiguousCharsAreWide: boolean = false; - constructor() { + public readonly handleGraphemes: boolean; + + constructor(handleGraphemes: boolean = true) { + this.version = handleGraphemes ? '15-graphemes' : '15'; + this.handleGraphemes = handleGraphemes; } private static readonly _plainNarrowProperties: UnicodeCharProperties @@ -36,7 +40,11 @@ export class UnicodeGraphemeProvider implements IUnicodeVersionProvider { } if (preceding !== 0) { const oldWidth = UnicodeService.extractWidth(preceding); - charInfo = UC.shouldJoin(UnicodeService.extractCharKind(preceding), charInfo); + if (this.handleGraphemes) { + charInfo = UC.shouldJoin(UnicodeService.extractCharKind(preceding), charInfo); + } else { + charInfo = w === 0 ? 1 : 0; + } shouldJoin = charInfo > 0; if (shouldJoin) { if (oldWidth > w) { diff --git a/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemesAddon.ts b/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemesAddon.ts index c9154ed608..80290edfd6 100644 --- a/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemesAddon.ts +++ b/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemesAddon.ts @@ -10,17 +10,22 @@ import { UnicodeGraphemeProvider } from './UnicodeGraphemeProvider'; export class UnicodeGraphemesAddon implements ITerminalAddon { - private _provider?: UnicodeGraphemeProvider; + private _provider15Graphemes?: UnicodeGraphemeProvider; + private _provider15?: UnicodeGraphemeProvider; private _unicode?: IUnicodeHandling; private _oldVersion: string = ''; public activate(terminal: Terminal): void { - if (! this._provider) { - this._provider = new UnicodeGraphemeProvider(); + if (! this._provider15) { + this._provider15 = new UnicodeGraphemeProvider(false); + } + if (! this._provider15Graphemes) { + this._provider15Graphemes = new UnicodeGraphemeProvider(true); } const unicode = terminal.unicode; this._unicode = unicode; - unicode.register(this._provider); + unicode.register(this._provider15); + unicode.register(this._provider15Graphemes); this._oldVersion = unicode.activeVersion; unicode.activeVersion = '15-graphemes'; } diff --git a/addons/xterm-addon-unicode-graphemes/test/UnicodeGraphemesAddon.api.ts b/addons/xterm-addon-unicode-graphemes/test/UnicodeGraphemesAddon.api.ts index ff4f52aabf..622bf53163 100644 --- a/addons/xterm-addon-unicode-graphemes/test/UnicodeGraphemesAddon.api.ts +++ b/addons/xterm-addon-unicode-graphemes/test/UnicodeGraphemesAddon.api.ts @@ -39,7 +39,7 @@ describe('UnicodeGraphemesAddon', () => { window.term.loadAddon(window.unicode); `); // should have loaded '15-graphemes' - assert.deepEqual(await page.evaluate(`window.term.unicode.versions`), ['6', ourVersion]); + assert.deepEqual(await page.evaluate(`window.term.unicode.versions`), ['6', '15', '15-graphemes']); // switch should not throw await page.evaluate(`window.term.unicode.activeVersion = '${ourVersion}';`); assert.equal(await page.evaluate(`window.term.unicode.activeVersion`), ourVersion); From dc6dd6d2a84c24ef6452333055918d09268ed828 Mon Sep 17 00:00:00 2001 From: Per Bothner Date: Thu, 24 Aug 2023 07:58:06 -0700 Subject: [PATCH 33/42] Remove precedingCodepoint - use precedingJoinState instead repeatPrecedingCharacter (for REP sequence) changed to look at the grapheme cluser to the "left" in the BufferLine, and repeat that. This is an extension of the xterm behavior, and changes the semantics of REP, so is not fully compatible. However, the new behavior is cleaner and saner. Moved precedingJoinState property to EscapeSequenceParser. --- src/common/InputHandler.ts | 50 +++++++++++------------ src/common/parser/EscapeSequenceParser.ts | 18 ++++---- src/common/parser/Types.d.ts | 9 ++-- test/api/InputHandler.api.ts | 10 ++--- 4 files changed, 42 insertions(+), 45 deletions(-) diff --git a/src/common/InputHandler.ts b/src/common/InputHandler.ts index 8d36fb2189..afc41a3a01 100644 --- a/src/common/InputHandler.ts +++ b/src/common/InputHandler.ts @@ -126,10 +126,6 @@ export class InputHandler extends Disposable implements IInputHandler { protected _windowTitleStack: string[] = []; protected _iconNameStack: string[] = []; - // Cached result of getJoinProperties(..., precedingCodepoint). - // Only valid if precedingCodepoint !== 0 - public precedingJoinState: number = -1; // UnicodeJoinProperties - private _curAttrData: IAttributeData = DEFAULT_ATTR_DATA.clone(); public getAttrData(): IAttributeData { return this._curAttrData; } private _eraseAttrDataInternal: IAttributeData = DEFAULT_ATTR_DATA.clone(); @@ -521,8 +517,7 @@ export class InputHandler extends Disposable implements IInputHandler { bufferRow.setCellFromCodePoint(this._activeBuffer.x - 1, 0, 1, curAttr.fg, curAttr.bg, curAttr.extended); } - let precedingJoinState = this._parser.precedingCodepoint === 0 ? 0 - : this.precedingJoinState; + let precedingJoinState = this._parser.precedingJoinState; for (let pos = start; pos < end; ++pos) { code = data[pos]; @@ -636,18 +631,7 @@ export class InputHandler extends Disposable implements IInputHandler { } } - this.precedingJoinState = precedingJoinState; - // store last char in Parser.precedingCodepoint for REP to work correctly - // This needs to check whether: - // - combining: only base char gets carried on (bug in xterm?) - if (end - start > 0) { - bufferRow.loadCell(this._activeBuffer.x - 1, this._workCell); - if (this._workCell.isCombined()) { - this._parser.precedingCodepoint = this._workCell.getChars().charCodeAt(0); - } else { - this._parser.precedingCodepoint = this._workCell.content; - } - } + this._parser.precedingJoinState = precedingJoinState; // handle wide chars: reset cell to the right if it is second cell of a wide char if (this._activeBuffer.x < cols && end - start > 0 && bufferRow.getWidth(this._activeBuffer.x) === 0 && !bufferRow.hasContent(this._activeBuffer.x)) { @@ -1593,9 +1577,8 @@ export class InputHandler extends Disposable implements IInputHandler { * If the character preceding REP is a control function or part of a control function, * the effect of REP is not defined by this Standard. * - * Since we propagate the terminal as xterm-256color we have to follow xterm's behavior: - * - fullwidth + surrogate chars are ignored - * - for combining chars only the base char gets repeated + * We extend xterm's behavior to allow repeating entire grapheme clusters. + * This isn't 100% xterm-compatible, but it seems saner and more useful. * - text attrs are applied normally * - wrap around is respected * - any valid sequence resets the carried forward char @@ -1609,16 +1592,29 @@ export class InputHandler extends Disposable implements IInputHandler { * (NOOP for any other sequence in between or NON ASCII characters). */ public repeatPrecedingCharacter(params: IParams): boolean { - if (!this._parser.precedingCodepoint) { + const joinState = this._parser.precedingJoinState; + if (!joinState) { return true; } // call print to insert the chars and handle correct wrapping const length = params.params[0] || 1; - const data = new Uint32Array(length); - for (let i = 0; i < length; ++i) { - data[i] = this._parser.precedingCodepoint; - } - this.print(data, 0, data.length); + const chWidth = UnicodeService.extractWidth(joinState); + const x = this._activeBuffer.x - chWidth; + const bufferRow = this._activeBuffer.lines.get(this._activeBuffer.ybase + this._activeBuffer.y)!; + const text = bufferRow.getString(x); + const data = new Uint32Array(text.length * length); + let idata = 0; + for (let itext = 0; itext < text.length; ) { + const ch = text.codePointAt(itext) || 0; + data[idata++] = ch; + itext += ch > 0xffff ? 2 : 1; + } + let tlength = idata; + for (let i = 1; i < length; ++i) { + data.copyWithin(tlength, 0, idata); + tlength += idata; + } + this.print(data, 0, tlength); return true; } diff --git a/src/common/parser/EscapeSequenceParser.ts b/src/common/parser/EscapeSequenceParser.ts index de20632248..b3d02768b6 100644 --- a/src/common/parser/EscapeSequenceParser.ts +++ b/src/common/parser/EscapeSequenceParser.ts @@ -230,7 +230,7 @@ export const VT500_TRANSITION_TABLE = (function (): TransitionTable { export class EscapeSequenceParser extends Disposable implements IEscapeSequenceParser { public initialState: number; public currentState: number; - public precedingCodepoint: number; + public precedingJoinState: number; // UnicodeJoinProperties // buffers over several parse calls protected _params: Params; @@ -271,7 +271,7 @@ export class EscapeSequenceParser extends Disposable implements IEscapeSequenceP this._params = new Params(); // defaults to 32 storable params/subparams this._params.addParam(0); // ZDM this._collect = 0; - this.precedingCodepoint = 0; + this.precedingJoinState = 0; // set default fallback handlers and handler lookup containers this._printHandlerFb = (data, start, end): void => { }; @@ -448,7 +448,7 @@ export class EscapeSequenceParser extends Disposable implements IEscapeSequenceP this._params.reset(); this._params.addParam(0); // ZDM this._collect = 0; - this.precedingCodepoint = 0; + this.precedingJoinState = 0; // abort pending continuation from async handler // Here the RESET type indicates, that the next parse call will // ignore any saved stack, instead continues sync with next codepoint from GROUND @@ -610,7 +610,7 @@ export class EscapeSequenceParser extends Disposable implements IEscapeSequenceP // cleanup before continuing with the main sync loop this._parseStack.state = ParserStackType.NONE; start = this._parseStack.chunkPos + 1; - this.precedingCodepoint = 0; + this.precedingJoinState = 0; this.currentState = this._parseStack.transition & TableAccess.TRANSITION_STATE_MASK; } } @@ -653,7 +653,7 @@ export class EscapeSequenceParser extends Disposable implements IEscapeSequenceP case ParserAction.EXECUTE: if (this._executeHandlers[code]) this._executeHandlers[code](); else this._executeHandlerFb(code); - this.precedingCodepoint = 0; + this.precedingJoinState = 0; break; case ParserAction.IGNORE: break; @@ -688,7 +688,7 @@ export class EscapeSequenceParser extends Disposable implements IEscapeSequenceP if (j < 0) { this._csiHandlerFb(this._collect << 8 | code, this._params); } - this.precedingCodepoint = 0; + this.precedingJoinState = 0; break; case ParserAction.PARAM: // inner loop: digits (0x30 - 0x39) and ; (0x3b) and : (0x3a) @@ -727,7 +727,7 @@ export class EscapeSequenceParser extends Disposable implements IEscapeSequenceP if (jj < 0) { this._escHandlerFb(this._collect << 8 | code); } - this.precedingCodepoint = 0; + this.precedingJoinState = 0; break; case ParserAction.CLEAR: this._params.reset(); @@ -758,7 +758,7 @@ export class EscapeSequenceParser extends Disposable implements IEscapeSequenceP this._params.reset(); this._params.addParam(0); // ZDM this._collect = 0; - this.precedingCodepoint = 0; + this.precedingJoinState = 0; break; case ParserAction.OSC_START: this._oscParser.start(); @@ -783,7 +783,7 @@ export class EscapeSequenceParser extends Disposable implements IEscapeSequenceP this._params.reset(); this._params.addParam(0); // ZDM this._collect = 0; - this.precedingCodepoint = 0; + this.precedingJoinState = 0; break; } this.currentState = transition & TableAccess.TRANSITION_STATE_MASK; diff --git a/src/common/parser/Types.d.ts b/src/common/parser/Types.d.ts index a1ea0ec262..839a0a72e8 100644 --- a/src/common/parser/Types.d.ts +++ b/src/common/parser/Types.d.ts @@ -146,11 +146,12 @@ export type PrintFallbackHandlerType = PrintHandlerType; */ export interface IEscapeSequenceParser extends IDisposable { /** - * Preceding codepoint to get REP working correctly. - * This must be set by the print handler as last action. - * It gets reset by the parser for any valid sequence beside REP itself. + * Preceding grapheme-join-state. + * Used for joining graphame clusters across calls to `print`. + * Also used by REP to check if repeat a character is allowed. + * It gets reset by the parser for any valid sequence besides text. */ - precedingCodepoint: number; + precedingJoinState: number; // More specifically: UnicodeJoinProperties /** * Reset the parser to its initial state (handlers are kept). diff --git a/test/api/InputHandler.api.ts b/test/api/InputHandler.api.ts index c894a4d820..b8d20455fc 100644 --- a/test/api/InputHandler.api.ts +++ b/test/api/InputHandler.api.ts @@ -272,18 +272,18 @@ describe('InputHandler Integration Tests', function(): void { `); await pollFor(page, () => getLinesAsArray(4), ['##', '##', '##', '######']); await pollFor(page, () => getCursor(), { col: 6, row: 3 }); - // should not repeat on fullwidth chars + // do repeat on fullwidth chars (change from xterm) await page.evaluate(` window.term.reset(); window.term.write('¥\x1b[10b'); `); - await pollFor(page, () => getLinesAsArray(1), ['¥']); - // should repeat only base char of combining + await pollFor(page, () => getLinesAsArray(1), ['¥¥¥¥¥¥¥¥¥¥¥']); + // change from xterm: repeat grapheme cluster await page.evaluate(` window.term.reset(); - window.term.write('e\u0301\x1b[5b'); + window.term.write('e\u0301\x1b[2b'); `); - await pollFor(page, () => getLinesAsArray(1), ['e\u0301eeeee']); + await pollFor(page, () => getLinesAsArray(1), ['e\u0301e\u0301e\u0301']); // should wrap correctly await page.evaluate(` window.term.reset(); From 630d213bd03ef1bb4ce81cc380b4a5180e71aa4f Mon Sep 17 00:00:00 2001 From: Per Bothner Date: Wed, 6 Sep 2023 16:42:58 -0700 Subject: [PATCH 34/42] Fix 2 comment typos. --- src/common/parser/Types.d.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/common/parser/Types.d.ts b/src/common/parser/Types.d.ts index 839a0a72e8..2ed4acdcaf 100644 --- a/src/common/parser/Types.d.ts +++ b/src/common/parser/Types.d.ts @@ -147,8 +147,8 @@ export type PrintFallbackHandlerType = PrintHandlerType; export interface IEscapeSequenceParser extends IDisposable { /** * Preceding grapheme-join-state. - * Used for joining graphame clusters across calls to `print`. - * Also used by REP to check if repeat a character is allowed. + * Used for joining grapheme clusters across calls to `print`. + * Also used by REP to check if repeating a character is allowed. * It gets reset by the parser for any valid sequence besides text. */ precedingJoinState: number; // More specifically: UnicodeJoinProperties From 9963c58511e1a0de7eb4bf56599ef5799cd4004e Mon Sep 17 00:00:00 2001 From: Per Bothner Date: Thu, 7 Sep 2023 09:49:37 -0700 Subject: [PATCH 35/42] Two testing-related fixes --- .../test/UnicodeGraphemesAddon.api.ts | 2 +- test/playwright/InputHandler.test.ts | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/addons/xterm-addon-unicode-graphemes/test/UnicodeGraphemesAddon.api.ts b/addons/xterm-addon-unicode-graphemes/test/UnicodeGraphemesAddon.api.ts index 622bf53163..e13ad2921c 100644 --- a/addons/xterm-addon-unicode-graphemes/test/UnicodeGraphemesAddon.api.ts +++ b/addons/xterm-addon-unicode-graphemes/test/UnicodeGraphemesAddon.api.ts @@ -5,7 +5,7 @@ import { assert } from 'chai'; import { openTerminal, launchBrowser } from '../../../out-test/api/TestUtils'; -import { Browser, Page } from 'playwright'; +import { Browser, Page } from '@playwright/test'; const APP = 'http://127.0.0.1:3001/test'; diff --git a/test/playwright/InputHandler.test.ts b/test/playwright/InputHandler.test.ts index e7a63e8c75..fb2eeb9ce5 100644 --- a/test/playwright/InputHandler.test.ts +++ b/test/playwright/InputHandler.test.ts @@ -258,9 +258,9 @@ test.describe('InputHandler Integration Tests', () => { // repeat on fullwidth chars await ctx.page.evaluate(` window.term.reset(); - window.term.write('¥\x1b[10b'); + window.term.write('¥\x1b[8b'); `); - await pollFor(ctx.page, () => getLinesAsArray(1), ['¥¥¥¥¥¥¥¥¥¥¥']); + await pollFor(ctx.page, () => getLinesAsArray(1), ['¥¥¥¥¥']); // change from xterm: repeat grapheme cluster await ctx.page.evaluate(` window.term.reset(); From a003034b0264283b4030a7a2cf3fc2a0b061f261 Mon Sep 17 00:00:00 2001 From: Daniel Imms <2193314+Tyriar@users.noreply.github.com> Date: Mon, 11 Sep 2023 06:02:17 -0700 Subject: [PATCH 36/42] Increase node memory for lint in CI --- .github/workflows/ci.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fb003eb7d8..24bd2442d9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -68,6 +68,8 @@ jobs: yarn --frozen-lockfile yarn install-addons - name: Lint code + env: + NODE_OPTIONS: --max_old_space_size=4096 run: yarn lint - name: Lint API run: yarn lint-api From 1b5756c1b10a6c58b29d1d1bfaf32fbb95623e24 Mon Sep 17 00:00:00 2001 From: Daniel Imms <2193314+Tyriar@users.noreply.github.com> Date: Mon, 11 Sep 2023 06:04:55 -0700 Subject: [PATCH 37/42] Add unicode-grapheme addon files to build artifacts --- .github/workflows/ci.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 24bd2442d9..eea525d063 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -42,6 +42,8 @@ jobs: ./addons/xterm-addon-serialize/out-test/* \ ./addons/xterm-addon-unicode11/out/* \ ./addons/xterm-addon-unicode11/out-test/* \ + ./addons/xterm-addon-unicode-graphemes/out/* \ + ./addons/xterm-addon-unicode-graphemes/out-test/* \ ./addons/xterm-addon-web-links/out/* \ ./addons/xterm-addon-web-links/out-test/* \ ./addons/xterm-addon-webgl/out/* \ From 17c4d097bbb2ff7d6b2024e96417394bba335ac2 Mon Sep 17 00:00:00 2001 From: Daniel Imms <2193314+Tyriar@users.noreply.github.com> Date: Mon, 11 Sep 2023 06:06:58 -0700 Subject: [PATCH 38/42] Add experimental note in readme --- addons/xterm-addon-unicode-graphemes/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/addons/xterm-addon-unicode-graphemes/README.md b/addons/xterm-addon-unicode-graphemes/README.md index 6c23e34385..d5922a6fd9 100644 --- a/addons/xterm-addon-unicode-graphemes/README.md +++ b/addons/xterm-addon-unicode-graphemes/README.md @@ -1,5 +1,7 @@ ## xterm-addon-unicode-graphemes +⚠️ **This addon is currently experimental and may introduce unexpected and non-standard behavior** + An addon providing enhanced Unicode support (include grapheme clustering) for xterm.js. The file `src/UnicodeProperties.ts` is generated and depends on the Unicode version. See [the unicode-properties project](https://github.com/PerBothner/unicode-properties) for credits and re-generation instructions. From 9e536ca9c3c47df5850e4f1ae260dc6c14992e05 Mon Sep 17 00:00:00 2001 From: Daniel Imms <2193314+Tyriar@users.noreply.github.com> Date: Mon, 11 Sep 2023 06:28:54 -0700 Subject: [PATCH 39/42] Fix warnings in client.ts --- demo/client.ts | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/demo/client.ts b/demo/client.ts index 71e4eda041..9daaeacc9f 100644 --- a/demo/client.ts +++ b/demo/client.ts @@ -70,7 +70,7 @@ let socket; let pid; let autoResize: boolean = true; -type AddonType = 'attach' | 'canvas' | 'fit' | 'image' | 'search' | 'serialize' | 'unicode11' | 'unicode-graphemes' | 'webLinks' | 'webgl' | 'ligatures'; +type AddonType = 'attach' | 'canvas' | 'fit' | 'image' | 'search' | 'serialize' | 'unicode11' | 'unicodeGraphemes' | 'webLinks' | 'webgl' | 'ligatures'; interface IDemoAddon { name: T; @@ -84,7 +84,7 @@ interface IDemoAddon { T extends 'serialize' ? typeof SerializeAddon : T extends 'webLinks' ? typeof WebLinksAddon : T extends 'unicode11' ? typeof Unicode11Addon : - T extends 'unicode-graphemes' ? typeof UnicodeGraphemesAddon : + T extends 'unicodeGraphemes' ? typeof UnicodeGraphemesAddon : T extends 'ligatures' ? typeof LigaturesAddon : typeof WebglAddon ); @@ -98,7 +98,7 @@ interface IDemoAddon { T extends 'webLinks' ? WebLinksAddon : T extends 'webgl' ? WebglAddon : T extends 'unicode11' ? typeof Unicode11Addon : - T extends 'unicode-graphemes' ? typeof UnicodeGraphemesAddon : + T extends 'unicodeGraphemes' ? typeof UnicodeGraphemesAddon : T extends 'ligatures' ? typeof LigaturesAddon : never ); @@ -114,7 +114,7 @@ const addons: { [T in AddonType]: IDemoAddon } = { webLinks: { name: 'webLinks', ctor: WebLinksAddon, canChange: true }, webgl: { name: 'webgl', ctor: WebglAddon, canChange: true }, unicode11: { name: 'unicode11', ctor: Unicode11Addon, canChange: true }, - 'unicode-graphemes': { name: 'unicode-graphemes', ctor: UnicodeGraphemesAddon, canChange: true }, + unicodeGraphemes: { name: 'unicodeGraphemes', ctor: UnicodeGraphemesAddon, canChange: true }, ligatures: { name: 'ligatures', ctor: LigaturesAddon, canChange: true } }; @@ -184,7 +184,7 @@ const disposeRecreateButtonHandler: () => void = () => { addons.search.instance = undefined; addons.serialize.instance = undefined; addons.unicode11.instance = undefined; - addons['unicode-graphemes'].instance = undefined; + addons.unicodeGraphemes.instance = undefined; addons.ligatures.instance = undefined; addons.webLinks.instance = undefined; addons.webgl.instance = undefined; @@ -286,7 +286,7 @@ function createTerminal(): void { addons.serialize.instance = new SerializeAddon(); addons.fit.instance = new FitAddon(); addons.image.instance = new ImageAddon(); - addons['unicode-graphemes'].instance = new UnicodeGraphemesAddon(); + addons.unicodeGraphemes.instance = new UnicodeGraphemesAddon(); try { // try to start with webgl renderer (might throw on older safari/webkit) addons.webgl.instance = new WebglAddon(); } catch (e) { @@ -297,7 +297,7 @@ function createTerminal(): void { typedTerm.loadAddon(addons.image.instance); typedTerm.loadAddon(addons.search.instance); typedTerm.loadAddon(addons.serialize.instance); - typedTerm.loadAddon(addons['unicode-graphemes'].instance); + typedTerm.loadAddon(addons.unicodeGraphemes.instance); typedTerm.loadAddon(addons.webLinks.instance); window.term = term; // Expose `term` to window for debugging purposes @@ -622,7 +622,7 @@ function initAddons(term: TerminalType): void { if (name === 'unicode11' && checkbox.checked) { term.unicode.activeVersion = '11'; } - if (name === 'unicode-graphemes' && checkbox.checked) { + if (name === 'unicodeGraphemes' && checkbox.checked) { term.unicode.activeVersion = '15-graphemes'; } if (name === 'search' && checkbox.checked) { @@ -660,7 +660,7 @@ function initAddons(term: TerminalType): void { }, 0); } else if (name === 'unicode11') { term.unicode.activeVersion = '11'; - } else if (name === 'unicode-graphemes') { + } else if (name === 'unicodeGraphemes') { term.unicode.activeVersion = '15-graphemes'; } else if (name === 'search') { addon.instance.onDidChangeResults(e => updateFindResults(e)); @@ -676,7 +676,7 @@ function initAddons(term: TerminalType): void { addons.webgl.instance.textureAtlas.remove(); } else if (name === 'canvas') { addons.canvas.instance.textureAtlas.remove(); - } else if (name === 'unicode11' || name === 'unicode-graphemes') { + } else if (name === 'unicode11' || name === 'unicodeGraphemes') { term.unicode.activeVersion = '6'; } addon.instance!.dispose(); From 24b932d67aee51b58a723d0e62415b6dc234ee54 Mon Sep 17 00:00:00 2001 From: Daniel Imms <2193314+Tyriar@users.noreply.github.com> Date: Mon, 11 Sep 2023 06:34:32 -0700 Subject: [PATCH 40/42] Update to es2021 --- addons/xterm-addon-unicode-graphemes/src/tsconfig.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/addons/xterm-addon-unicode-graphemes/src/tsconfig.json b/addons/xterm-addon-unicode-graphemes/src/tsconfig.json index f5489fcc1b..60824fee94 100644 --- a/addons/xterm-addon-unicode-graphemes/src/tsconfig.json +++ b/addons/xterm-addon-unicode-graphemes/src/tsconfig.json @@ -1,10 +1,10 @@ { "compilerOptions": { "module": "commonjs", - "target": "es2015", + "target": "es2021", "lib": [ "dom", - "es2015" + "es2021" ], "rootDir": ".", "outDir": "../out", From 9ab0cd648eca7e23945eb786e41d7cccafb13888 Mon Sep 17 00:00:00 2001 From: Daniel Imms <2193314+Tyriar@users.noreply.github.com> Date: Tue, 12 Sep 2023 09:02:15 -0700 Subject: [PATCH 41/42] Set repository to sub-directory See #4795 --- addons/xterm-addon-unicode-graphemes/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/addons/xterm-addon-unicode-graphemes/package.json b/addons/xterm-addon-unicode-graphemes/package.json index 82a28bd404..38ea7271f2 100644 --- a/addons/xterm-addon-unicode-graphemes/package.json +++ b/addons/xterm-addon-unicode-graphemes/package.json @@ -7,7 +7,7 @@ }, "main": "lib/xterm-addon-unicode-graphemes.js", "types": "typings/xterm-addon-unicode-graphemes.d.ts", - "repository": "https://github.com/xtermjs/xterm.js", + "repository": "https://github.com/xtermjs/xterm.js/tree/master/addons/xterm-addon-unicode-graphemes", "license": "MIT", "keywords": [ "terminal", From 32c616512d428cb392badfcde7d7ee69b37bfb94 Mon Sep 17 00:00:00 2001 From: Daniel Imms <2193314+Tyriar@users.noreply.github.com> Date: Tue, 12 Sep 2023 12:45:29 -0700 Subject: [PATCH 42/42] Final polish --- test/api/TestUtils.ts | 7 ++++--- test/playwright/TestUtils.ts | 5 +++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/test/api/TestUtils.ts b/test/api/TestUtils.ts index 56318439f8..288a3c0719 100644 --- a/test/api/TestUtils.ts +++ b/test/api/TestUtils.ts @@ -43,11 +43,12 @@ export async function timeout(ms: number): Promise { return new Promise(r => setTimeout(r, ms)); } -export async function openTerminal(page: playwright.Page, options: ITerminalOptions & ITerminalInitOnlyOptions = {}, testOptions: any = { loadUnicodeGraphemesAddon: true}): Promise { +export async function openTerminal(page: playwright.Page, options: ITerminalOptions & ITerminalInitOnlyOptions = {}, testOptions: { loadUnicodeGraphemesAddon: boolean } = { loadUnicodeGraphemesAddon: true }): Promise { await page.evaluate(`window.term = new Terminal(${JSON.stringify({ allowProposedApi: true, ...options })})`); await page.evaluate(`window.term.open(document.querySelector('#terminal-container'))`); - - // See https://github.com/xtermjs/xterm.js/pull/4519#discussion_r1285234453 + + // HACK: This is a soft layer breaker that's temporarily included until unicode graphemes have + // more complete integration tests. See https://github.com/xtermjs/xterm.js/pull/4519#discussion_r1285234453 if (testOptions.loadUnicodeGraphemesAddon) { await page.evaluate(` window.unicode = new UnicodeGraphemesAddon(); diff --git a/test/playwright/TestUtils.ts b/test/playwright/TestUtils.ts index 65bfb459de..0a51757fac 100644 --- a/test/playwright/TestUtils.ts +++ b/test/playwright/TestUtils.ts @@ -351,7 +351,7 @@ class TerminalCoreProxy { } } -export async function openTerminal(ctx: ITestContext, options: ITerminalOptions | ITerminalInitOnlyOptions = {}, testOptions: any = { loadUnicodeGraphemesAddon: true}): Promise { +export async function openTerminal(ctx: ITestContext, options: ITerminalOptions | ITerminalInitOnlyOptions = {}, testOptions: { loadUnicodeGraphemesAddon: boolean } = { loadUnicodeGraphemesAddon: true }): Promise { await ctx.page.evaluate(` if ('term' in window) { try { @@ -366,7 +366,8 @@ export async function openTerminal(ctx: ITestContext, options: ITerminalOptions window.term = new window.Terminal(${JSON.stringify({ allowProposedApi: true, ...options })}); window.term.open(document.querySelector('#terminal-container')); `); - // See https://github.com/xtermjs/xterm.js/pull/4519#discussion_r1285234453 + // HACK: This is a soft layer breaker that's temporarily included until unicode graphemes have + // more complete integration tests. See https://github.com/xtermjs/xterm.js/pull/4519#discussion_r1285234453 if (testOptions.loadUnicodeGraphemesAddon) { await ctx.page.evaluate(` window.unicode = new UnicodeGraphemesAddon();