Skip to content

Commit

Permalink
PICARD-2953: Use strxfrm for sorting on Windows again
Browse files Browse the repository at this point in the history
On Windows QCollator.sortKey is broken and results of wrong ordering of
numbers, both in numeric and normal alphabetic mode.
  • Loading branch information
phw committed Aug 24, 2024
1 parent 7d79d41 commit 3090c1e
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 7 deletions.
40 changes: 33 additions & 7 deletions picard/i18n.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,11 +226,6 @@ def gettext_ctxt(gettext_, message, context=None):
return translated


def _digit_replace(matchobj):
s = matchobj.group(0)
return str(int(s)) if s.isdigit() else s


def sort_key(string, numeric=False):
"""Transforms a string to one that can be used in locale-aware comparisons.
Expand All @@ -240,13 +235,44 @@ def sort_key(string, numeric=False):
Returns: An object that can be compared locale-aware
"""
# QCollator.sortKey is broken, see https://bugreports.qt.io/browse/QTBUG-128170
if IS_WIN:
return _sort_key_strxfrm(string, numeric)
else:
return _sort_key_qt(string, numeric)


RE_NUMBER = re.compile(r'(\d+)')


def _digits_replace(matchobj):
s = matchobj.group(0)
return str(int(s)) if s.isdecimal() else s


def _sort_key_qt(string, numeric=False):
collator = _qcollator_numeric if numeric else _qcollator
# On macOS / Windows the numeric sorting does not work reliable with non-latin
# scripts. Replace numbers in the sort string with their latin equivalent.
if numeric and (IS_MACOS or IS_WIN):
string = re.sub(r'\d', _digit_replace, string)
string = RE_NUMBER.sub(_digits_replace, string)

# On macOS numeric sorting of strings entirely consisting of numeric characters fails
# and always sorts alphabetically (002 < 1). Always prefix with an alphabeticcharacter
# and always sorts alphabetically (002 < 1). Always prefix with an alphabetic character
# to work around that.
return collator.sortKey('a' + string.replace('\0', ''))


def _sort_key_strxfrm(string, numeric=False):
if numeric:
return [int(s) if s.isdecimal() else _strxfrm(s)
for s in RE_NUMBER.split(str(string).replace('\0', ''))]
else:
return _strxfrm(string)


def _strxfrm(string):
try:
return locale.strxfrm(string)
except (OSError, ValueError):
return string.lower()
2 changes: 2 additions & 0 deletions test/test_i18n.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,12 @@ def test_sort_key(self):
self.assertTrue(i18n.sort_key('äb') < i18n.sort_key('ac'))
self.assertTrue(i18n.sort_key('foo002') < i18n.sort_key('foo1'))
self.assertTrue(i18n.sort_key('002 foo') < i18n.sort_key('1 foo'))
self.assertTrue(i18n.sort_key('1') < i18n.sort_key('C'))
self.assertTrue(i18n.sort_key('foo1', numeric=True) < i18n.sort_key('foo002', numeric=True))
self.assertTrue(i18n.sort_key('004', numeric=True) < i18n.sort_key('5', numeric=True))
self.assertTrue(i18n.sort_key('0042', numeric=True) < i18n.sort_key('50', numeric=True))
self.assertTrue(i18n.sort_key('5', numeric=True) < i18n.sort_key('0042', numeric=True))
self.assertTrue(i18n.sort_key('99', numeric=True) < i18n.sort_key('100', numeric=True))

def test_sort_key_numbers_different_scripts(self):
i18n.setup_gettext(localedir, 'en')
Expand Down

0 comments on commit 3090c1e

Please sign in to comment.