Skip to content

Commit

Permalink
Fix expansion of multibyte IFS characters
Browse files Browse the repository at this point in the history
Closes #13. Previously, the `varsub` method used for the macro expansion of
`$param`, `${param}`, and `${param op word}` would incorrectly expand the
internal field separator (IFS) if it was a multibyte character. This was due to
truncation based on the incorrect assumption that the IFS would never be larger
than a single byte.

This change fixes this issue by carefully tracking the number of bytes that
should be persisted in the IFS case and ensuring that all bytes are written
during expansion and substitution.
  • Loading branch information
etscrivner authored and krader1961 committed Jul 29, 2018
1 parent f66d382 commit a063631
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 4 deletions.
2 changes: 1 addition & 1 deletion src/cmd/ksh93/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ all_tests = [
['alias'], ['append'], ['arith'], ['arrays'], ['arrays2'], ['attributes'],
['basic', 90], ['bracket'], ['builtins'], ['case'], ['comvar'],
['comvario'], ['coprocess', 50], ['cubetype'], ['directoryfd'], ['enum'],
['exit'], ['expand'], ['functions'], ['glob'], ['grep'], ['heredoc'],
['exit'], ['expand'], ['functions'], ['glob'], ['grep'], ['heredoc'], ['ifs'],
['io'], ['leaks'], ['locale'], ['math', 50], ['nameref'], ['namespace'],
['modifiers'], ['options'], ['path'], ['pointtype'], ['quoting'],
['quoting2'], ['readcsv'], ['recttype'], ['restricted'], ['return'], ['select'],
Expand Down
17 changes: 14 additions & 3 deletions src/cmd/ksh93/sh/macro.c
Original file line number Diff line number Diff line change
Expand Up @@ -1792,10 +1792,21 @@ static_fn bool varsub(Mac_t *mp) {
mp->atmode = mode == '@';
mp->pattern = oldpat;
} else if (d) {
if (mp->sp) {
sfputc(mp->sp, d);
Sfio_t *sfio_ptr = (mp->sp) ? mp->sp : stkp;

// We know from above that if we are not performing @-expansion
// then we assigned `d` the value of `mp->ifs`, here we check
// whether or not we have a valid string of IFS characters to
// write as it is possible for `d` to be set to `mp->ifs` and
// yet `mp->ifsp` to be NULL.
if (mode != '@' && mp->ifsp) {
// Handle multi-byte characters being used for the internal
// field separator (IFS).
for (int i = 0; i < mbsize(mp->ifsp); i++) {
sfputc(sfio_ptr, mp->ifsp[i]);
}
} else {
sfputc(stkp, d);
sfputc(sfio_ptr, d);
}
}
}
Expand Down
31 changes: 31 additions & 0 deletions src/cmd/ksh93/tests/ifs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# These are the tests for the internal field separator (IFS).

IFS=e
set : :
[[ "$*" == ":e:" ]] || log_error "IFS failed" ":e:" "$*"

IFS='|' read -r first second third <<< 'one|two|three'
[[ "${first}" == "one" ]] || log_error "IFS failed" "one" "${first}"
[[ "${second}" == "two" ]] || log_error "IFS failed" "two" "${second}"
[[ "${third}" == "three" ]] || log_error "IFS failed" "three" "${third}"

# Multi-byte (wide) character checks will only work if UTF-8 inputs are enabled. We can't just set
# LC_ALL here because the literal UTF-8 strings will have already been read.
if [[ $LC_ALL == en_US.UTF-8 ]]
then
# 2 byte latin accented e character
IFS=é
set : :
[[ "$*" == ":é:" ]] || log_error "IFS failed with multibyte character" ":é:" "$*"

# 4 byte roman sestertius character
IFS=𐆘 read -r first second third <<< 'one𐆘two𐆘three'
[[ "${first}" == "one" ]] || log_error "IFS failed" "one" "${first}"
[[ "${second}" == "two" ]] || log_error "IFS failed" "two" "${second}"
[[ "${third}" == "three" ]] || log_error "IFS failed" "three" "${third}"

# Ensure subshells don't get corrupted when IFS becomes multibyte character
expect=$(printf ":é:\\ntrap -- 'echo end' EXIT\\nend")
actual=$(LANG=C.UTF-8; IFS=é; set : :; echo "$*"; trap "echo end" EXIT; trap)
[[ "$expect" == "$actual" ]] || log_error "IFS subshell failed" "$expect" "$actual"
fi

0 comments on commit a063631

Please sign in to comment.