-
Notifications
You must be signed in to change notification settings - Fork 74
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix(marshal)!: compare strings by codepoint
- Loading branch information
Showing
7 changed files
with
176 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
import { test } from './prepare-test-env-ava.js'; | ||
|
||
import { compareRank } from '../src/rankOrder.js'; | ||
import { encodePassable } from './test-encodePassable.js'; | ||
|
||
/** | ||
* Essentially a ponyfill for Array.prototype.toSorted, for use before | ||
* we can always rely on the platform to provide it. | ||
* | ||
* @param {string[]} strings | ||
* @param {( | ||
* left: string, | ||
* right: string | ||
* ) => import('../src/types.js').RankComparison} comp | ||
* @returns {string[]} | ||
*/ | ||
const sorted = (strings, comp) => [...strings].sort(comp); | ||
|
||
test('unicode code point order', t => { | ||
// Test case from | ||
// https://icu-project.org/docs/papers/utf16_code_point_order.html | ||
const str0 = '\u{ff61}'; | ||
const str3 = '\u{d800}\u{dc02}'; | ||
|
||
// str1 and str2 become impossible examples once we prohibit | ||
// non - well - formed strings. | ||
// See https://github.com/endojs/endo/pull/2002 | ||
const str1 = '\u{d800}X'; | ||
const str2 = '\u{d800}\u{ff61}'; | ||
|
||
// harden to ensure it is not sorted in place, just for sanity | ||
const strs = harden([str0, str1, str2, str3]); | ||
|
||
/** | ||
* @param {string} left | ||
* @param {string} right | ||
* @returns {import('../src/types.js').RankComparison} | ||
*/ | ||
const nativeComp = (left, right) => | ||
// eslint-disable-next-line no-nested-ternary | ||
left < right ? -1 : left > right ? 1 : 0; | ||
|
||
const nativeSorted = sorted(strs, nativeComp); | ||
|
||
t.deepEqual(nativeSorted, [str1, str3, str2, str0]); | ||
|
||
const rankSorted = sorted(strs, compareRank); | ||
|
||
t.deepEqual(rankSorted, [str1, str2, str0, str3]); | ||
|
||
const nativeEncComp = (left, right) => | ||
nativeComp(encodePassable(left), encodePassable(right)); | ||
|
||
const nativeEncSorted = sorted(strs, nativeEncComp); | ||
|
||
t.deepEqual(nativeEncSorted, nativeSorted); | ||
|
||
const rankEncComp = (left, right) => | ||
compareRank(encodePassable(left), encodePassable(right)); | ||
|
||
const rankEncSorted = sorted(strs, rankEncComp); | ||
|
||
t.deepEqual(rankEncSorted, rankSorted); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,10 @@ | ||
User-visible changes in `@endo/patterns`: | ||
|
||
# next release | ||
|
||
- JavaScript's relational comparison operators like `<` compare strings by lexicographic UTF16 code unit order, which is exposes an internal representational detail not relevant to the string's meaning as a Unicode string. Previously, `compareKeys` and associated functions compared strings using this JavaScript-native comparison. Now `compareKeys` and associated functions compare strings by lexicographic Unicode Code Point order. ***This change only affects strings containing so-called supplementary characters, i.e., those whose Unicode character code does not fit in 16 bits***. | ||
- See the NEWS.md of @endo/marshal for more on this change. | ||
|
||
# v0.2.6 (2023-09-11) | ||
|
||
- Adds support for CopyMap patterns (e.g., `matches(specimen, makeCopyMap([]))`). |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
// modeled on test-string-rank-order.js | ||
|
||
import { test } from './prepare-test-env-ava.js'; | ||
|
||
import { compareKeys } from '../src/keys/compareKeys.js'; | ||
|
||
/** | ||
* Essentially a ponyfill for Array.prototype.toSorted, for use before | ||
* we can always rely on the platform to provide it. | ||
* | ||
* @param {string[]} strings | ||
* @param {( | ||
* left: string, | ||
* right: string | ||
* ) => import('@endo/marshal').RankComparison} comp | ||
* @returns {string[]} | ||
*/ | ||
const sorted = (strings, comp) => [...strings].sort(comp); | ||
|
||
test('unicode code point order', t => { | ||
// Test case from | ||
// https://icu-project.org/docs/papers/utf16_code_point_order.html | ||
const str0 = '\u{ff61}'; | ||
const str3 = '\u{d800}\u{dc02}'; | ||
|
||
// str1 and str2 become impossible examples once we prohibit | ||
// non - well - formed strings. | ||
// See https://github.com/endojs/endo/pull/2002 | ||
const str1 = '\u{d800}X'; | ||
const str2 = '\u{d800}\u{ff61}'; | ||
|
||
// harden to ensure it is not sorted in place, just for sanity | ||
const strs = harden([str0, str1, str2, str3]); | ||
|
||
/** | ||
* @param {string} left | ||
* @param {string} right | ||
* @returns {import('@endo/marshal').RankComparison} | ||
*/ | ||
const nativeComp = (left, right) => | ||
// eslint-disable-next-line no-nested-ternary | ||
left < right ? -1 : left > right ? 1 : 0; | ||
|
||
const nativeSorted = sorted(strs, nativeComp); | ||
|
||
t.deepEqual(nativeSorted, [str1, str3, str2, str0]); | ||
|
||
// @ts-expect-error We know that for strings, `compareKeys` never returns | ||
// NaN because it never judges strings to be incomparable. Thus, the | ||
// KeyComparison it returns happens to also be a RankComparison we can | ||
// sort with. | ||
const keySorted = sorted(strs, compareKeys); | ||
|
||
t.deepEqual(keySorted, [str1, str2, str0, str3]); | ||
}); |