Skip to content

Commit

Permalink
Fix default justification width for non-printable/non-ASCII chars
Browse files Browse the repository at this point in the history
Reproducer:

    unset s
    s='コーンシェル'
    echo ${#s}
    typeset -L s
    typeset -p s
    echo ${#s}

Output:

    6
    typeset -L 18 s='コーンシェル      '
    12

Expected output:

    6
    typeset -L 12 s=コーンシェル
    6

Note that the typeset -L, -R or -Z option-argument values do not
represent numbers of characters or bytes. Their purpose is to
justify. They count horizontal terminal positions. Double-width
characters, like Japanese, occupy two horizontal terminal positions
each, instead of one, and ksh takes this into account. Thus, the
justification width above is 12 though the string length in
characters is 6, becasue each character occupies two terminal
positions.

Also, control characters have no width and do not count. This is
similarly broken.

src/cmd/ksh93/sh/name.c:
- nv_newattr(): The default justification width was calculated based
  on strlen(), which counts bytes, not terminal positions. Use a loop
  with mbchar() and mbwidth() (see ast.h) to calculate the correct
  default width in terminal positions.

Resolves: #189
  • Loading branch information
McDutchie committed Mar 10, 2024
1 parent a01ce5c commit 0460290
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 2 deletions.
5 changes: 5 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@ This documents significant changes in the 1.0 branch of ksh 93u+m.
For full details, see the git log at: https://github.com/ksh93/ksh/tree/1.0
Uppercase BUG_* IDs are shell bug IDs as used by the Modernish shell library.

2024-03-10:

- Fixed a longstanding bug where the default terminal width for typeset -L, -R,
or -Z, if not given, was miscalculated for multibyte or control characters.

2024-03-07:

- Fixed a bug that caused some systems to corrupt the display of multibyte
Expand Down
2 changes: 1 addition & 1 deletion src/cmd/ksh93/include/version.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

#define SH_RELEASE_FORK "93u+m" /* only change if you develop a new ksh93 fork */
#define SH_RELEASE_SVER "1.0.9-beta" /* semantic version number: https://semver.org */
#define SH_RELEASE_DATE "2024-03-07" /* must be in this format for $((.sh.version)) */
#define SH_RELEASE_DATE "2024-03-10" /* must be in this format for $((.sh.version)) */
#define SH_RELEASE_CPYR "(c) 2020-2024 Contributors to ksh " SH_RELEASE_FORK

/* Scripts sometimes field-split ${.sh.version}, so don't change amount of whitespace. */
Expand Down
13 changes: 13 additions & 0 deletions src/cmd/ksh93/sh/name.c
Original file line number Diff line number Diff line change
Expand Up @@ -2996,7 +2996,20 @@ void nv_newattr (Namval_t *np, unsigned newatts, int size)
ap->nelem |= ARRAY_SCAN;
}
if(size==0 && !(newatts&NV_INTEGER) && (newatts&NV_HOST)!=NV_HOST && (newatts&(NV_LJUST|NV_RJUST|NV_ZFILL)))
{ /*
* Calculate the default terminal width for -L, -R, -Z if no numeric option-argument was given.
* Note: we count terminal positions, not characters (double-width adds 2, control char adds 0)
*/
char *cq = cp;
wchar_t c;
int w;
n = 0;
mbinit();
while(c = mbchar(cq))
if ((w = mbwidth(c)) > 0)
n += w;
size = n;
}
}
else if(!trans)
_nv_unset(np,NV_EXPORT);
Expand Down
8 changes: 8 additions & 0 deletions src/cmd/ksh93/tests/attributes.sh
Original file line number Diff line number Diff line change
Expand Up @@ -845,5 +845,13 @@ do read -r -N6 var
[[ $got == "$exp" ]] || err_exit "loop optimization bug with 'typeset -b' variables (expected '$exp', got '$got')"
done <<< 'twotowthreetfourro'

# ======
# control characters should not be counted for default justification` width
# https://github.com/ksh93/ksh/issues/189
exp='typeset -L 5 s=$'\''1\n2\a3\t4\x[0b]5'\'
got=$(s=$'1\n2\a3\t4\v5'; typeset -L s; typeset -p s)
[[ $got == "$exp" ]] || err_exit "default terminal width for typeset -L incorrect" \
"(expected $(printf %q "$exp"); got $(printf %q "$got"))"

# ======
exit $((Errors<125?Errors:125))
16 changes: 15 additions & 1 deletion src/cmd/ksh93/tests/locale.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# #
# This software is part of the ast package #
# Copyright (c) 1982-2012 AT&T Intellectual Property #
# Copyright (c) 2020-2023 Contributors to ksh 93u+m #
# Copyright (c) 2020-2024 Contributors to ksh 93u+m #
# and is licensed under the #
# Eclipse Public License, Version 2.0 #
# #
Expand Down Expand Up @@ -454,5 +454,19 @@ then unset LANG "${!LC_@}" i
fi
fi
# ======
# double-width characters should count for two for default justification width
# https://github.com/ksh93/ksh/issues/189
if ((SHOPT_MULTIBYTE))
then unset s "${!LC_@}"
LANG=C.UTF-8
s='コーンシェル'
typeset -L s
got=$(typeset -p s; echo ${#s})
exp=$'typeset -L 12 s=コーンシェル\n6' # each double-width character counts for two terminal positions
[[ $got == "$exp" ]] || err_exit "default terminal width for typeset -L incorrect" \
"(expected $(printf %q "$exp"); got $(printf %q "$got"))"
fi
# ======
exit $((Errors<125?Errors:125))

0 comments on commit 0460290

Please sign in to comment.