Skip to content

Commit

Permalink
fix(pass-style): only well-formed strings are passable
Browse files Browse the repository at this point in the history
  • Loading branch information
erights committed Jan 24, 2024
1 parent 5b2537c commit 47d0bb9
Show file tree
Hide file tree
Showing 5 changed files with 120 additions and 7 deletions.
6 changes: 6 additions & 0 deletions packages/pass-style/NEWS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
User-visible changes in `@endo/pass-style`:

# next release

- Previously, all JavaScript strings were considered Passable with `passStyleOf(str) === 'string'`. Now, only well-formed Unicode strings are considered Passable. For all others, `passStyleOf(str)` throws a diagnostic error. This brings us into closer conformance to the OCapN standard, which prohibits sending non-well-formed strings, and requires non-well-formed strings to be rejected when received.
- Exports `isWellFormedString` and `assertWellFormedString`. Unfortunately the [standard `String.prototype.isWellFormed`](https://tc39.es/proposal-is-usv-string/) first coerces its input to string, leading it to claim that some non-strings are well-formed strings. By contrast, `isWellFormedString` and `assertWellFormedString` will not judge any non-strings to be well-formed strings.
7 changes: 6 additions & 1 deletion packages/pass-style/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,12 @@ export {
passableSymbolForName,
} from './src/symbol.js';

export { passStyleOf, assertPassable } from './src/passStyleOf.js';
export {
isWellFormedString,
assertWellFormedString,
passStyleOf,
assertPassable,
} from './src/passStyleOf.js';

export { makeTagged } from './src/makeTagged.js';
export {
Expand Down
60 changes: 59 additions & 1 deletion packages/pass-style/src/passStyleOf.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,61 @@ const { details: X, Fail, quote: q } = assert;
const { ownKeys } = Reflect;
const { isFrozen } = Object;

// @ts-expect-error TS builtin `String` type does not yet
// know about`isWellFormed`
const hasWellFormedStringMethod = !!String.prototype.isWellFormed;

/**
* Is the argument a well-formed string?
*
* Unfortunately, the
* [standard built-in `String.prototype.isWellFormed`](https://github.com/tc39/proposal-is-usv-string)
* does a ToString on its input, causing it to judge non-strings to be
* well-formed strings if they coerce to a well-formed strings. This
* recapitulates the mistake in having the global `isNaN` coerce its inputs,
* causing it to judge non-string to be NaN if they coerce to NaN.
*
* This `isWellFormedString` function only judges well-formed strings to be
* well-formed strings. For all non-strings it returns false.
*
* @param {unknown} str
* @returns {str is boolean}
*/
export const isWellFormedString = hasWellFormedStringMethod
? // @ts-expect-error TS does not yet know about `isWellFormed`
str => typeof str === 'string' && str.isWellFormed()
: str => {
if (typeof str !== 'string') {
return false;
}
for (const ch of str) {
// The string iterator iterates by Unicode code point, not
// UTF16 code unit. But if it encounters an unpaired surrogate,
// it will produce it.
const cp = /** @type {number} */ (ch.codePointAt(0));
if (cp >= 0xd800 && cp <= 0xdfff) {
// All surrogates are in this range. The string iterator only
// produces a character in this range for unpaired surrogates,
// which only happens if the string is not well-formed.
return false;
}
}
return true;
};
harden(isWellFormedString);

/**
* Returns normally when `isWellFormedString(str)` would return true.
* Throws a diagnostic error when `isWellFormedString(str)` would return false.
*
* @param {unknown} str
* @returns {asserts str is string}
*/
export const assertWellFormedString = str => {
isWellFormedString(str) || Fail`Expected well-formed unicode string: ${str}`;
};
harden(assertWellFormedString);

/**
* @param {PassStyleHelper[]} passStyleHelpers
* @returns {Record<HelperPassStyle, PassStyleHelper> }
Expand Down Expand Up @@ -124,12 +179,15 @@ const makePassStyleOf = passStyleHelpers => {
const typestr = typeof inner;
switch (typestr) {
case 'undefined':
case 'string':
case 'boolean':
case 'number':
case 'bigint': {
return typestr;
}
case 'string': {
assertWellFormedString(inner);
return 'string';
}
case 'symbol': {
assertPassableSymbol(inner);
return 'symbol';
Expand Down
5 changes: 0 additions & 5 deletions packages/pass-style/test/test-pass-style.js

This file was deleted.

49 changes: 49 additions & 0 deletions packages/pass-style/test/test-passable-string.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/* eslint-disable no-useless-concat */
import { test } from './prepare-test-env-ava.js';

import { passStyleOf, isWellFormedString, assertWellFormedString } from '../src/passStyleOf.js';

test('test string well formedness behaviors', t => {
const gcleff1 = '\u{1D11E}';
const gcleff2 = '\u{D834}\u{DD1E}';
const gcleff3 = '\u{D834}' + '\u{DD1E}';
const badcleff1 = '\u{D834}\u{D834}\u{DD1E}';
const badcleff2 = '\u{D834}\u{DD1E}\u{D834}';
const badcleff3 = '\u{D834}' + '\u{DD1E}\u{D834}';

// This test block ensures that the underlying platform behaves as we expect
t.is(gcleff1, gcleff2);
t.is(gcleff1, gcleff3);
t.is(gcleff1.length, 2);
t.is(gcleff2.length, 2);
t.is(gcleff3.length, 2);
// Uses string iterator, which iterates code points if possible, not
// UTF16 code units
t.deepEqual([...gcleff1], [gcleff1]);
t.not(badcleff1, badcleff2);
t.is(badcleff2, badcleff3);
t.is(badcleff1.length, 3);
// But if the string contains lone surrogates, the string iterator will
// produce those as characters
t.deepEqual([...badcleff1], ['\u{D834}', gcleff1]);
t.deepEqual([...badcleff2], [gcleff1, '\u{D834}']);

t.is(passStyleOf(gcleff1), 'string');
t.true(isWellFormedString(gcleff1));
t.notThrows(() => assertWellFormedString(gcleff1));

t.throws(() => passStyleOf(badcleff1), {
message: 'Expected well-formed unicode string: "\\ud834𝄞"',
});
t.throws(() => passStyleOf(badcleff2), {
message: 'Expected well-formed unicode string: "𝄞\\ud834"',
});
t.false(isWellFormedString(badcleff1));
t.false(isWellFormedString(badcleff2));
t.throws(() => assertWellFormedString(badcleff1), {
message: 'Expected well-formed unicode string: "\\ud834𝄞"',
});
t.throws(() => assertWellFormedString(badcleff2), {
message: 'Expected well-formed unicode string: "𝄞\\ud834"',
});
});

0 comments on commit 47d0bb9

Please sign in to comment.