Skip to content

Commit

Permalink
[JSC] Intl.Collator should take collation option
Browse files Browse the repository at this point in the history
https://bugs.webkit.org/show_bug.cgi?id=216529

Reviewed by Ross Kirsling.

JSTests:

* stress/intl-collator-co-extension.js:
(shouldThrow):

Source/JavaScriptCore:

This patch adds "collation" option to Intl.Collator. We are already getting consensus[1], and will be integrated into the spec.
Previously, passing "collation" is only available through "-u-co-" unicode extension in the passed locale. The proposal exposes
collation option as an option to Intl.Collator so that we can set it easily.
"collation" is used only when "usage" is "sort". "search" usage will filter out collation options since "search" itself is one of
the "collation" option.

[1]: tc39/ecma402#459

* runtime/IntlCollator.cpp:
(JSC::IntlCollator::sortLocaleData):
(JSC::IntlCollator::initializeCollator):

Canonical link: https://commits.webkit.org/229383@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@267102 268f45cc-cd09-0410-ab3c-d52691b4dbfc
  • Loading branch information
Constellation committed Sep 15, 2020
1 parent ed8484a commit 3d81389
Show file tree
Hide file tree
Showing 4 changed files with 93 additions and 11 deletions.
10 changes: 10 additions & 0 deletions JSTests/ChangeLog
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
2020-09-14 Yusuke Suzuki <[email protected]>

[JSC] Intl.Collator should take collation option
https://bugs.webkit.org/show_bug.cgi?id=216529

Reviewed by Ross Kirsling.

* stress/intl-collator-co-extension.js:
(shouldThrow):

2020-09-14 Keith Miller <[email protected]>

BytecodeParser should GetLocal op_ret's value even if it's unused by the caller
Expand Down
38 changes: 38 additions & 0 deletions JSTests/stress/intl-collator-co-extension.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,26 @@ function shouldBeArray(actual, expected) {
}
}

function shouldThrow(func, errorMessage) {
var errorThrown = false;
var error = null;
try {
func();
} catch (e) {
errorThrown = true;
error = e;
}
if (!errorThrown)
throw new Error('not thrown');
if (String(error) !== errorMessage)
throw new Error(`bad error: ${String(error)}`);
}

shouldBeArray(["AE", "\u00C4"].sort(new Intl.Collator("de", {usage: "sort"}).compare), ["\u00C4", "AE"]);
shouldBeArray(["AE", "\u00C4"].sort(new Intl.Collator("de", {usage: "search"}).compare), ["AE", "\u00C4"]);
shouldBe(new Intl.Collator("de", {usage: "sort"}).resolvedOptions().locale, "de");
shouldBe(new Intl.Collator("de", {usage: "search"}).resolvedOptions().locale, "de");
shouldBe(new Intl.Collator("de", {usage: "sort", collation: "phonebk"}).resolvedOptions().locale, "de");
shouldBeArray(["2", "10"].sort(new Intl.Collator("de", {usage: "sort"}).compare), ["10", "2"]);
shouldBeArray(["2", "10"].sort(new Intl.Collator("de", {usage: "search"}).compare), ["10", "2"]);

Expand All @@ -40,6 +56,28 @@ shouldBe(new Intl.Collator("de-u-kn", {usage: "sort"}).resolvedOptions().locale,
shouldBe(new Intl.Collator("de-u-kn", {usage: "search"}).resolvedOptions().locale, "de-u-kn");

shouldBeArray(["a", "ae", "ä", "æ"].sort(new Intl.Collator("de-u-co-phonebk").compare), ["a", "ae", "ä", "æ"]);
shouldBeArray(["a", "ae", "ä", "æ"].sort(new Intl.Collator("de", { collation: 'phonebk' }).compare), ["a", "ae", "ä", "æ"]);
shouldBeArray(["a", "ae", "ä", "æ"].sort(new Intl.Collator("de").compare), ["a", "ä", "ae", "æ"]);
shouldBeArray(["a", "ae", "ä", "æ"].sort(new Intl.Collator("de-u-co-phonebk", { usage: 'search' }).compare), ["a", "ae", "ä", "æ"]);
shouldBeArray(["a", "ae", "ä", "æ"].sort(new Intl.Collator("de", { usage: 'search', collation: 'phonebk' }).compare), ["a", "ae", "ä", "æ"]);
shouldBeArray(["a", "ae", "ä", "æ"].sort(new Intl.Collator("de", { usage: 'search' }).compare), ["a", "ae", "ä", "æ"]);

// We cannot set sort / search / standard though "collation" option. Use "usage" instead.
shouldBe(new Intl.Collator("de-u-kn", {collation: "search"}).resolvedOptions().collation, "default");
shouldBe(new Intl.Collator("de-u-kn", {collation: "sort"}).resolvedOptions().collation, "default");
shouldBe(new Intl.Collator("de-u-kn", {collation: "standard"}).resolvedOptions().collation, "default");
shouldBe(new Intl.Collator("de-u-kn", {usage: "search", collation: "search"}).resolvedOptions().collation, "default");
shouldBe(new Intl.Collator("de-u-kn", {usage: "search", collation: "sort"}).resolvedOptions().collation, "default");
shouldBe(new Intl.Collator("de-u-kn", {usage: "search", collation: "standard"}).resolvedOptions().collation, "default");

shouldThrow(() => {
new Intl.Collator("de-u-kn", {"collation": "phonebook"});
}, `RangeError: collation is not a well-formed collation value`);

shouldBe(new Intl.Collator("de-u-kn", {collation: "phonebk"}).resolvedOptions().collation, "phonebk");
shouldBe(new Intl.Collator("de-u-kn", {usage: "sort", collation: "phonebk"}).resolvedOptions().collation, "phonebk");
shouldBe(new Intl.Collator("de-u-kn", {usage: "search", collation: "phonebk"}).resolvedOptions().collation, "default");
shouldBe(new Intl.Collator("de-u-co-dict", {collation: "phonebk"}).resolvedOptions().collation, "phonebk");
shouldBe(new Intl.Collator("de-u-co-dict", {collation: "phonebk"}).resolvedOptions().locale, "de");
shouldBe(new Intl.Collator("de-u-co-dict", {usage: "search", collation: "phonebk"}).resolvedOptions().collation, "default");
shouldBe(new Intl.Collator("de-u-co-dict", {usage: "search", collation: "phonebk"}).resolvedOptions().locale, "de");
19 changes: 19 additions & 0 deletions Source/JavaScriptCore/ChangeLog
Original file line number Diff line number Diff line change
@@ -1,3 +1,22 @@
2020-09-14 Yusuke Suzuki <[email protected]>

[JSC] Intl.Collator should take collation option
https://bugs.webkit.org/show_bug.cgi?id=216529

Reviewed by Ross Kirsling.

This patch adds "collation" option to Intl.Collator. We are already getting consensus[1], and will be integrated into the spec.
Previously, passing "collation" is only available through "-u-co-" unicode extension in the passed locale. The proposal exposes
collation option as an option to Intl.Collator so that we can set it easily.
"collation" is used only when "usage" is "sort". "search" usage will filter out collation options since "search" itself is one of
the "collation" option.

[1]: https://github.com/tc39/ecma402/pull/459

* runtime/IntlCollator.cpp:
(JSC::IntlCollator::sortLocaleData):
(JSC::IntlCollator::initializeCollator):

2020-09-15 Joonghun Park <[email protected]>

Unreviewed. Remove the build warning below since r228533.
Expand Down
37 changes: 26 additions & 11 deletions Source/JavaScriptCore/runtime/IntlCollator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,27 +85,26 @@ Vector<String> IntlCollator::sortLocaleData(const String& locale, RelevantExtens
keyLocaleData.append({ });

UErrorCode status = U_ZERO_ERROR;
UEnumeration* enumeration = ucol_getKeywordValuesForLocale("collation", locale.utf8().data(), false, &status);
auto enumeration = std::unique_ptr<UEnumeration, ICUDeleter<uenum_close>>(ucol_getKeywordValuesForLocale("collation", locale.utf8().data(), false, &status));
if (U_SUCCESS(status)) {
const char* collation;
while ((collation = uenum_next(enumeration, nullptr, &status)) && U_SUCCESS(status)) {
while ((collation = uenum_next(enumeration.get(), nullptr, &status)) && U_SUCCESS(status)) {
// 10.2.3 "The values "standard" and "search" must not be used as elements in any [[sortLocaleData]][locale].co and [[searchLocaleData]][locale].co array."
if (!strcmp(collation, "standard") || !strcmp(collation, "search"))
continue;

// Map keyword values to BCP 47 equivalents.
if (!strcmp(collation, "dictionary"))
collation = "dict";
keyLocaleData.append("dict"_s);
else if (!strcmp(collation, "gb2312han"))
collation = "gb2312";
keyLocaleData.append("gb2312"_s);
else if (!strcmp(collation, "phonebook"))
collation = "phonebk";
keyLocaleData.append("phonebk"_s);
else if (!strcmp(collation, "traditional"))
collation = "trad";

keyLocaleData.append(collation);
keyLocaleData.append("trad"_s);
else
keyLocaleData.append(collation);
}
uenum_close(enumeration);
}
break;
}
Expand Down Expand Up @@ -178,6 +177,18 @@ void IntlCollator::initializeCollator(JSGlobalObject* globalObject, JSValue loca
LocaleMatcher localeMatcher = intlOption<LocaleMatcher>(globalObject, options, vm.propertyNames->localeMatcher, { { "lookup"_s, LocaleMatcher::Lookup }, { "best fit"_s, LocaleMatcher::BestFit } }, "localeMatcher must be either \"lookup\" or \"best fit\""_s, LocaleMatcher::BestFit);
RETURN_IF_EXCEPTION(scope, void());

{
String collation = intlStringOption(globalObject, options, vm.propertyNames->collation, { }, nullptr, nullptr);
RETURN_IF_EXCEPTION(scope, void());
if (!collation.isNull()) {
if (!isUnicodeLocaleIdentifierType(collation)) {
throwRangeError(globalObject, scope, "collation is not a well-formed collation value"_s);
return;
}
localeOptions[static_cast<unsigned>(RelevantExtensionKey::Co)] = WTFMove(collation);
}
}

TriState numeric = intlBooleanOption(globalObject, options, vm.propertyNames->numeric);
RETURN_IF_EXCEPTION(scope, void());
if (numeric != TriState::Indeterminate)
Expand Down Expand Up @@ -220,16 +231,20 @@ void IntlCollator::initializeCollator(JSGlobalObject* globalObject, JSValue loca
CString dataLocaleWithExtensions;
switch (m_usage) {
case Usage::Sort:
dataLocaleWithExtensions = m_locale.utf8();
if (collation.isNull())
dataLocaleWithExtensions = resolved.dataLocale.utf8();
else
dataLocaleWithExtensions = makeString(resolved.dataLocale, "-u-co-", m_collation).utf8();
break;
case Usage::Search:
// searchLocaleData filters out "co" unicode extension. However, we need to pass "co" to ICU when Usage::Search is specified.
// So we need to pass "co" unicode extension through locale. Since the other relevant extensions are handled via ucol_setAttribute,
// we can just use dataLocale
// Since searchLocaleData filters out "co" unicode extension, "collation" option is just ignored.
dataLocaleWithExtensions = makeString(resolved.dataLocale, "-u-co-search").utf8();
break;
}
dataLogLnIf(IntlCollatorInternal::verbose, "dataLocaleWithExtensions:(", dataLocaleWithExtensions, ")");
dataLogLnIf(IntlCollatorInternal::verbose, "locale:(", resolved.locale, "),dataLocaleWithExtensions:(", dataLocaleWithExtensions, ")");

UErrorCode status = U_ZERO_ERROR;
m_collator = std::unique_ptr<UCollator, UCollatorDeleter>(ucol_open(dataLocaleWithExtensions.data(), &status));
Expand Down

0 comments on commit 3d81389

Please sign in to comment.