diff --git a/src/cmd/ksh93/meson.build b/src/cmd/ksh93/meson.build index 4419647c51b3..43afa8312ba7 100644 --- a/src/cmd/ksh93/meson.build +++ b/src/cmd/ksh93/meson.build @@ -88,7 +88,7 @@ all_tests = [ ['alias'], ['append'], ['arith'], ['arrays'], ['arrays2'], ['attributes'], ['basic', 90], ['bracket'], ['builtins'], ['case'], ['comvar'], ['comvario'], ['coprocess', 50], ['cubetype'], ['directoryfd'], ['enum'], - ['exit'], ['expand'], ['functions'], ['glob'], ['grep'], ['heredoc'], + ['exit'], ['expand'], ['functions'], ['glob'], ['grep'], ['heredoc'], ['ifs'], ['io'], ['leaks'], ['locale'], ['math', 50], ['nameref'], ['namespace'], ['modifiers'], ['options'], ['path'], ['pointtype'], ['quoting'], ['quoting2'], ['readcsv'], ['recttype'], ['restricted'], ['return'], ['select'], diff --git a/src/cmd/ksh93/sh/macro.c b/src/cmd/ksh93/sh/macro.c index a526e189319f..c60fa052601f 100644 --- a/src/cmd/ksh93/sh/macro.c +++ b/src/cmd/ksh93/sh/macro.c @@ -1663,7 +1663,13 @@ static_fn bool varsub(Mac_t *mp) { int match[2 * (MATCH_MAX + 1)], index; int nmatch, nmatch_prev, vsize_last, tsize; char *vlast = NULL, *oldv; - d = (mode == '@' ? ' ' : mp->ifs); + char *ifs_bytes = NULL; + if (mode == '@') { + d = ' '; + } else { + d = mp->ifs; + ifs_bytes = mp->ifsp; + } while (1) { if (!v) v = ""; if (c == '/' || c == '#' || c == '%') { @@ -1792,10 +1798,16 @@ static_fn bool varsub(Mac_t *mp) { mp->atmode = mode == '@'; mp->pattern = oldpat; } else if (d) { - if (mp->sp) { - sfputc(mp->sp, d); + Sfio_t *sfio_ptr = (mp->sp) ? mp->sp : stkp; + + // Handle multi-byte characters being used for the internal + // field separator (IFS). + if (ifs_bytes) { + for (int i = 0; i < mbsize(ifs_bytes); i++) { + sfputc(sfio_ptr, ifs_bytes[i]); + } } else { - sfputc(stkp, d); + sfputc(sfio_ptr, d); } } } diff --git a/src/cmd/ksh93/tests/ifs.sh b/src/cmd/ksh93/tests/ifs.sh new file mode 100644 index 000000000000..559726d91bfe --- /dev/null +++ b/src/cmd/ksh93/tests/ifs.sh @@ -0,0 +1,30 @@ +# These are the tests for the internal field separator (IFS). + +IFS=e +set : : +[[ "$*" == ":e:" ]] || log_error "IFS failed" ":e:" "$*" + +IFS='|' read -r first second third <<< 'one|two|three' +[[ "${first}" == "one" ]] || log_error "IFS failed" "one" "${first}" +[[ "${second}" == "two" ]] || log_error "IFS failed" "two" "${second}" +[[ "${third}" == "three" ]] || log_error "IFS failed" "three" "${third}" + +# Multi-byte character checks will only work if UTF-8 inputs are enabled +if [ "${LANG}" = "C.UTF-8" ] +then + # 2 byte latin accented e character + IFS=é + set : : + [[ "$*" == ":é:" ]] || log_error "IFS failed with multibyte character" ":é:" "$*" + + # 4 byte roman sestertius character + IFS=𐆘 read -r first second third <<< 'one𐆘two𐆘three' + [[ "${first}" == "one" ]] || log_error "IFS failed" "one" "${first}" + [[ "${second}" == "two" ]] || log_error "IFS failed" "two" "${second}" + [[ "${third}" == "three" ]] || log_error "IFS failed" "three" "${third}" + + # Ensure subshells don't get corrupted when IFS becomes multibyte character + expected_output=$(printf ":é:\\ntrap -- 'echo end' EXIT\\nend") + output=$(LANG=C.UTF-8; IFS=é; set : :; echo "$*"; trap "echo end" EXIT; LC_ALL=C; unset LC_ALL; trap) + [[ "${output}" == "${expected_output}" ]] || log_error "IFS subshell failed" "${expected_output}" "${output}" +fi