-
Notifications
You must be signed in to change notification settings - Fork 154
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fix expansion of multibyte IFS characters
Closes #13. Previously, the `varsub` method used for the macro expansion of `$param`, `${param}`, and `${param op word}` would incorrectly expand the internal field separator (IFS) if it was a multibyte character. This was due to truncation based on the incorrect assumption that the IFS would never be larger than a single byte. This change fixes this issue by carefully tracking the number of bytes that should be persisted in the IFS case and ensuring that all bytes are written during expansion and substitution.
- Loading branch information
1 parent
f66d382
commit a063631
Showing
3 changed files
with
46 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
# These are the tests for the internal field separator (IFS). | ||
|
||
IFS=e | ||
set : : | ||
[[ "$*" == ":e:" ]] || log_error "IFS failed" ":e:" "$*" | ||
|
||
IFS='|' read -r first second third <<< 'one|two|three' | ||
[[ "${first}" == "one" ]] || log_error "IFS failed" "one" "${first}" | ||
[[ "${second}" == "two" ]] || log_error "IFS failed" "two" "${second}" | ||
[[ "${third}" == "three" ]] || log_error "IFS failed" "three" "${third}" | ||
|
||
# Multi-byte (wide) character checks will only work if UTF-8 inputs are enabled. We can't just set | ||
# LC_ALL here because the literal UTF-8 strings will have already been read. | ||
if [[ $LC_ALL == en_US.UTF-8 ]] | ||
then | ||
# 2 byte latin accented e character | ||
IFS=é | ||
set : : | ||
[[ "$*" == ":é:" ]] || log_error "IFS failed with multibyte character" ":é:" "$*" | ||
|
||
# 4 byte roman sestertius character | ||
IFS=𐆘 read -r first second third <<< 'one𐆘two𐆘three' | ||
[[ "${first}" == "one" ]] || log_error "IFS failed" "one" "${first}" | ||
[[ "${second}" == "two" ]] || log_error "IFS failed" "two" "${second}" | ||
[[ "${third}" == "three" ]] || log_error "IFS failed" "three" "${third}" | ||
|
||
# Ensure subshells don't get corrupted when IFS becomes multibyte character | ||
expect=$(printf ":é:\\ntrap -- 'echo end' EXIT\\nend") | ||
actual=$(LANG=C.UTF-8; IFS=é; set : :; echo "$*"; trap "echo end" EXIT; trap) | ||
[[ "$expect" == "$actual" ]] || log_error "IFS subshell failed" "$expect" "$actual" | ||
fi |