diff --git a/NEWS b/NEWS index 44cc5ed33d35..d55fd75fa36d 100644 --- a/NEWS +++ b/NEWS @@ -3,6 +3,12 @@ For full details, see the git log at: https://github.com/ksh93/ksh Any uppercase BUG_* names are modernish shell bug IDs. +2020-07-05: + +- In UTF-8 locales, fix corruption of the shell's internal string quoting + algorithm (as used by xtrace, 'printf %q', and more) that occurred when + the processing of a multibyte character was interrupted. + 2020-07-03: - Backslashes are no longer escaped in the raw Bourne Shell-like editing diff --git a/src/cmd/ksh93/include/version.h b/src/cmd/ksh93/include/version.h index 0f0a4fb8c52d..2ee19b9b1b31 100644 --- a/src/cmd/ksh93/include/version.h +++ b/src/cmd/ksh93/include/version.h @@ -17,4 +17,4 @@ * David Korn * * * ***********************************************************************/ -#define SH_RELEASE "93u+m 2020-07-02" +#define SH_RELEASE "93u+m 2020-07-05" diff --git a/src/cmd/ksh93/sh/string.c b/src/cmd/ksh93/sh/string.c index 79b37f482209..5eb124b75b23 100644 --- a/src/cmd/ksh93/sh/string.c +++ b/src/cmd/ksh93/sh/string.c @@ -336,6 +336,9 @@ char *sh_fmtq(const char *string) int offset; if(!cp) return((char*)0); +#if SHOPT_MULTIBYTE + mbinit(); +#endif /* SHOPT_MULTIBYTE */ offset = staktell(); state = ((c= mbchar(cp))==0); if(isaletter(c)) diff --git a/src/cmd/ksh93/tests/builtins.sh b/src/cmd/ksh93/tests/builtins.sh index 470644be4852..fd093c66bf15 100755 --- a/src/cmd/ksh93/tests/builtins.sh +++ b/src/cmd/ksh93/tests/builtins.sh @@ -302,7 +302,23 @@ fi if [[ $(printf '%..*s\n' : abc def) != abc:def ]] then err_exit "printf '%..*s' not working" fi + +# ====== +# shell-quoting using printf %q (same algorithm used for xtrace and output of 'set', 'trap', ...) + [[ $(printf '%q\n') == '' ]] || err_exit 'printf "%q" with missing arguments' + +# the following fails on 2012-08-01 in UTF-8 locales +expect="'shell-quoted string'" +actual=$( + print -nr $'\303\274' | read -n1 foo # interrupt processing of 2-byte UTF-8 char after reading 1 byte + printf '%q\n' "shell-quoted string" +) +LC_CTYPE=POSIX true # on buggy ksh, a locale re-init via temp assignment restores correct shellquoting +[[ $actual == "$expect" ]] || err_exit 'shell-quoting corrupted after interrupted processing of UTF-8 char' \ + "(expected $expect; got $actual)" + +# ====== # we won't get hit by the one second boundary twice, right? expect= actual= { expect=$(LC_ALL=C date | sed 's/ GMT / UTC /') && actual=$(LC_ALL=C printf '%T\n' now) && [[ $actual == "$expect" ]]; } || diff --git a/src/lib/libast/comp/setlocale.c b/src/lib/libast/comp/setlocale.c index ba31fe914478..65553f8bb108 100644 --- a/src/lib/libast/comp/setlocale.c +++ b/src/lib/libast/comp/setlocale.c @@ -600,6 +600,8 @@ utf8_mbtowc(wchar_t* wp, const char* str, size_t n) register int c; register wchar_t w = 0; + if (!wp && !sp) + ast.mb_sync = 0; /* assume call from mbinit() macro: reset global multibyte sync state */ if (!sp || !n) return 0; if ((m = utf8tab[*sp]) > 0)