Skip to content

Commit

Permalink
Fix BUG_BRACQUOT: quoting in glob bracket expression ineffective
Browse files Browse the repository at this point in the history
Example reproducers:

    case b in
    ( ['a-c'] )
	echo BUG ;;
    esac

Output: BUG, expected: none.

    touch b && echo ["a-c"]

Output: b; expected: [a-c]

In both reproducers, the - character in the bracket expression is
part of a quoted string, which should cause the shell to escape it,
disabling its processing as a range opereator.

Previous discussion that explains why this is a bug:
https://www.mail-archive.com/[email protected]/msg02036.html

Analysis: The 'case' execution code (case TSW: in sh_exec()) calls
sh_macpat() (macro.c) to parse the pattern, which calls copyto(),
which (among many other things) translates single and double shell
quotes into corresponding backslash escapes where needed. These
backslash escapes are internally understood by the regex parser.

But in a bracket pattern like ["!"a"-"c], neither the quoted "!"
nor the quoted "-" get such an internal backslash after quote
removal. This is the bug.

copyto() is effectively a lexer for expansions. It uses the ST_DOL
lexical state table a.k.a. sh_lexstate6[]. Adding a new lexical
state for !, ^ and -, ST_BRAOP, allows copyto() to keep track of
bracket expressions in glob patterns and add those escapes.

src/cmd/ksh93/include/lexstates.h,
src/cmd/ksh93/data/lexstates.c:
- Define new S_BRAOP state for bracket expression operator (!^-).
  We can re-use the value of S_RES (4) as it's unused in ST_DOL.

src/cmd/ksh93/sh/macro.c: copyto():
- S_BRACT, S_ENDCH: Keep track of bracket expressions with a
  'bracketexpr' flag that is only set if we're processing a pattern
  that is not an ERE (only the glob pattern behaviour should be
  changed).
- S_ESC: Preserve any existing backslash if bracketexpr is set and
  the current character is an S_BRAOP character. This fixes the bug
  for bracket expressions like [\!N] and [\^N] (where the escaped !
  and ^ should not be negators).
- S_BRAOP: Added. Write a backslash (ESCAPE) on the stack if
  bracketexpr is set and the current character (a bracket
  expression operator) is double or single quoted. This fixes the
  rest of the bug.

Resolves: #488
  • Loading branch information
McDutchie committed Oct 7, 2022
1 parent d84067a commit 6c73c8c
Show file tree
Hide file tree
Showing 8 changed files with 122 additions and 11 deletions.
5 changes: 5 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@ This documents significant changes in the dev branch of ksh 93u+m.
For full details, see the git log at: https://github.com/ksh93/ksh
Uppercase BUG_* IDs are shell bug IDs as used by the Modernish shell library.

2022-10-07:

- Fixed BUG_BRACQUOT: single and double shell quotes did not work to escape
the !, ^ or - operators in a bracket expression within a glob pattern.

2022-09-28:

- Fixed a bug where LINENO was reset to 1 after being temporarily changed in
Expand Down
6 changes: 3 additions & 3 deletions src/cmd/ksh93/data/lexstates.c
Original file line number Diff line number Diff line change
Expand Up @@ -365,14 +365,14 @@ static const char sh_lexstate9[256] =
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, S_QUOTE,0, S_DOL, 0, S_PAT, S_LIT,
S_PAT, S_PAT, S_PAT, 0, S_COM, 0, S_DOT, S_SLASH,
0, S_BRAOP,S_QUOTE,0, S_DOL, 0, S_PAT, S_LIT,
S_PAT, S_PAT, S_PAT, 0, S_COM, S_BRAOP,S_DOT, S_SLASH,
S_DIG, S_DIG, S_DIG, S_DIG, S_DIG, S_DIG, S_DIG, S_DIG,
S_DIG, S_DIG, S_COLON,0, 0, S_EQ, 0, S_PAT,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, S_BRACT,S_ESC, S_ENDCH,0, 0,
0, 0, 0, S_BRACT,S_ESC, S_ENDCH,S_BRAOP,0,
S_GRAVE,0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
Expand Down
1 change: 1 addition & 0 deletions src/cmd/ksh93/include/lexstates.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
#define S_DELIM S_RES /* IFS delimiter characters */
#define S_MBYTE S_NAME /* IFS first byte of multi-byte char */
#define S_BLNK 36 /* space or tab */
#define S_BRAOP S_RES /* potentially a glob pattern bracket expression operator (!, ^, -) */
/* The following must be the highest numbered states */
#define S_QUOTE 37 /* double quote character */
#define S_GRAVE 38 /* old comsub character */
Expand Down
2 changes: 1 addition & 1 deletion src/cmd/ksh93/include/version.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

#define SH_RELEASE_FORK "93u+m" /* only change if you develop a new ksh93 fork */
#define SH_RELEASE_SVER "1.1.0-alpha" /* semantic version number: https://semver.org */
#define SH_RELEASE_DATE "2022-09-28" /* must be in this format for $((.sh.version)) */
#define SH_RELEASE_DATE "2022-10-07" /* must be in this format for $((.sh.version)) */
#define SH_RELEASE_CPYR "(c) 2020-2022 Contributors to ksh " SH_RELEASE_FORK

/* Scripts sometimes field-split ${.sh.version}, so don't change amount of whitespace. */
Expand Down
35 changes: 28 additions & 7 deletions src/cmd/ksh93/sh/macro.c
Original file line number Diff line number Diff line change
Expand Up @@ -430,13 +430,14 @@ static void copyto(register Mac_t *mp,int endch, int newquote)
register const char *state = sh_lexstates[ST_MACRO];
register char *cp,*first;
Lex_t *lp = (Lex_t*)sh.lex_context;
int tilde = -1;
int oldquote = mp->quote;
int ansi_c = 0;
int paren = 0;
int ere = 0;
int dotdot = 0;
int brace = 0;
int tilde = -1; /* offset for tilde expansion */
int dotdot = 0; /* offset for '..' in subscript */
int paren = 0; /* level of (parentheses) */
int brace = 0; /* level of {braces} */
char oldquote = mp->quote; /* save "double quoted" state */
char ansi_c = 0; /* set when processing ANSI C escape codes */
char ere = 0; /* set when processing an extended regular expression */
char bracketexpr = 0; /* set when in [brackets] within a non-ERE glob pattern */
Sfio_t *sp = mp->sp;
Stk_t *stkp = sh.stk;
char *resume = 0;
Expand Down Expand Up @@ -530,6 +531,9 @@ static void copyto(register Mac_t *mp,int endch, int newquote)
n = S_PAT;
if(mp->pattern)
{
/* preserve \ for escaping glob pattern bracket expression operators */
if(bracketexpr && n==S_BRAOP)
break;
/* preserve \digit for pattern matching */
/* also \alpha for extended patterns */
if(!mp->lit && !mp->quote)
Expand Down Expand Up @@ -631,6 +635,8 @@ static void copyto(register Mac_t *mp,int endch, int newquote)
mp->pattern = c;
break;
case S_ENDCH:
if(bracketexpr && cp[-1]==RBRACT)
bracketexpr--;
if((mp->lit || cp[-1]!=endch || mp->quote!=newquote))
goto pattern;
if(endch==RBRACE && mp->pattern && brace)
Expand Down Expand Up @@ -731,6 +737,13 @@ static void copyto(register Mac_t *mp,int endch, int newquote)
cp = first = fcseek(0);
break;
}
if(mp->pattern==1 && !ere && !bracketexpr)
{
bracketexpr++;
/* a ] following [, as in []abc], should not close the bracket expression */
if(cp[0]==RBRACT && cp[1])
bracketexpr++;
}
/* FALLTHROUGH */
case S_PAT:
if(mp->pattern && !(mp->quote || mp->lit))
Expand Down Expand Up @@ -845,6 +858,14 @@ static void copyto(register Mac_t *mp,int endch, int newquote)
cp = first = fcseek(c+2);
}
break;
case S_BRAOP:
/* escape a quoted !^- within a bracket expression */
if(c)
sfwrite(stkp,first,c);
first = fcseek(c);
if(bracketexpr && (mp->quote || mp->lit))
sfputc(stkp,ESCAPE);
break;
}
}
done:
Expand Down
34 changes: 34 additions & 0 deletions src/cmd/ksh93/tests/bracket.sh
Original file line number Diff line number Diff line change
Expand Up @@ -516,5 +516,39 @@ then [[ x -eq y ]] && err_exit "comparing long floats fails"
fi
unset x y

# ======
# Shell quoting within bracket expressions in glob patterns had no effect
# https://github.com/ksh93/ksh/issues/488

[[ b == *[a'-'c]* ]] && err_exit 'BUG_BRACQUOT: 1A'
[[ b == *['!'N]* ]] && err_exit 'BUG_BRACQUOT: 1B'
[[ b == *['^'N]* ]] && err_exit 'BUG_BRACQUOT: 1C'
[[ b == *[a$'-'c]* ]] && err_exit 'BUG_BRACQUOT: 2A'
[[ b == *[$'!'N]* ]] && err_exit 'BUG_BRACQUOT: 2B'
[[ b == *[$'^'N]* ]] && err_exit 'BUG_BRACQUOT: 2C'
[[ b == *[a"-"c]* ]] && err_exit 'BUG_BRACQUOT: 3A'
[[ b == *["!"N]* ]] && err_exit 'BUG_BRACQUOT: 3B'
[[ b == *["^"N]* ]] && err_exit 'BUG_BRACQUOT: 3C'
[[ b == *[a\-c]* ]] && err_exit 'BUG_BRACQUOT: 4A'
[[ b == *[\!N]* ]] && err_exit 'BUG_BRACQUOT: 4B'
[[ b == *[\^N]* ]] && err_exit 'BUG_BRACQUOT: 4C'
p='*[a\-c]*'; [[ b == $p ]] && err_exit 'BUG_BRACQUOT: 5A'
p='*[\!N]*'; [[ b == $p ]] && err_exit 'BUG_BRACQUOT: 5B'
p='*[\^N]*'; [[ b == $p ]] && err_exit 'BUG_BRACQUOT: 5C'
p='*[a\-c]*'; [[ - == $p ]] || err_exit 'BUG_BRACQUOT: 6A'
p='*[\!N]*'; [[ \! == $p ]] || err_exit 'BUG_BRACQUOT: 6B'
p='*[\^N]*'; [[ ^ == $p ]] || err_exit 'BUG_BRACQUOT: 6C'

# also test bracket expressions with ] as the first character, e.g. []abc]
[[ b == *[]a'-'c]* ]] && err_exit 'BUG_BRACQUOT: B1'
[[ b == *[]a$'-'c]* ]] && err_exit 'BUG_BRACQUOT: B2'
[[ b == *[]a"-"c]* ]] && err_exit 'BUG_BRACQUOT: B3'
[[ b == *[]a\-c]* ]] && err_exit 'BUG_BRACQUOT: B4'

# make sure we didn't break extended regular expressions
[[ \\ == [a"-"z] ]] && err_exit 'internal backslash escape incorrectly applied to glob [a"-"z]'
[[ \\ =~ [a"-"z] ]] && err_exit 'internal backslash escape incorrectly applied to ERE [a"-"z]'
[[ \\ == ~(E:[a"-"z]) ]] && err_exit 'internal backslash escape incorrectly applied to ERE via ksh glob ~(E:[a"-"z])'

# ======
exit $((Errors<125?Errors:125))
29 changes: 29 additions & 0 deletions src/cmd/ksh93/tests/case.sh
Original file line number Diff line number Diff line change
Expand Up @@ -97,5 +97,34 @@ got=$(set +x; { "$SHELL" -c 'case x in [x[:bogus:]]) echo x ;; esac'; } 2>&1)
((!(e = $?))) && [[ -z $got ]] || err_exit 'use of invalid character class name' \
"(got status $e$( ((e>128)) && print -n /SIG && kill -l "$e"), $(printf %q "$got"))"

# ======
# Shell quoting within bracket expressions in glob patterns had no effect
# https://github.com/ksh93/ksh/issues/488

case b in *[a'-'c]*) err_exit 'BUG_BRACQUOT: 1a';; esac
case b in *['!'N]*) err_exit 'BUG_BRACQUOT: 1b';; esac
case b in *['^'N]*) err_exit 'BUG_BRACQUOT: 1c';; esac
case b in *[a$'-'c]*) err_exit 'BUG_BRACQUOT: 2a';; esac
case b in *[$'!'N]*) err_exit 'BUG_BRACQUOT: 2b';; esac
case b in *[$'^'N]*) err_exit 'BUG_BRACQUOT: 2c';; esac
case b in *[a"-"c]*) err_exit 'BUG_BRACQUOT: 3a';; esac
case b in *["!"N]*) err_exit 'BUG_BRACQUOT: 3b';; esac
case b in *["^"N]*) err_exit 'BUG_BRACQUOT: 3c';; esac
case b in *[a\-c]*) err_exit 'BUG_BRACQUOT: 4a';; esac
case b in *[\!N]*) err_exit 'BUG_BRACQUOT: 4b';; esac
case b in *[\^N]*) err_exit 'BUG_BRACQUOT: 4c';; esac
p='*[a\-c]*'; case b in $p) err_exit 'BUG_BRACQUOT: 5a';; esac
p='*[\!N]*'; case b in $p) err_exit 'BUG_BRACQUOT: 5b';; esac
p='*[\^N]*'; case b in $p) err_exit 'BUG_BRACQUOT: 5c';; esac
p='*[a\-c]*'; case - in $p) ;; *) err_exit 'BUG_BRACQUOT: 6a';; esac
p='*[\!N]*'; case \! in $p) ;; *) err_exit 'BUG_BRACQUOT: 6b';; esac
p='*[\^N]*'; case ^ in $p) ;; *) err_exit 'BUG_BRACQUOT: 6c';; esac

# also test bracket expressions with ] as the first character, e.g. []abc]
case b in *[]a'-'c]*) err_exit 'BUG_BRACQUOT: A1';; esac
case b in *[]a$'-'c]*) err_exit 'BUG_BRACQUOT: A2';; esac
case b in *[]a"-"c]*) err_exit 'BUG_BRACQUOT: A3';; esac
case b in *[]a\-c]*) err_exit 'BUG_BRACQUOT: A4';; esac

# ======
exit $((Errors<125?Errors:125))
21 changes: 21 additions & 0 deletions src/cmd/ksh93/tests/glob.sh
Original file line number Diff line number Diff line change
Expand Up @@ -410,5 +410,26 @@ test_glob '<d_un/d_sym//d_3> <d_un/d_sym//d_3/d_4> <d_un/d_sym//d_tres> <d_un/d_

set --noglobstar

# ======
# Shell quoting within bracket expressions in glob patterns had no effect
# https://github.com/ksh93/ksh/issues/488

mkdir BUG_BRACQUOT
cd BUG_BRACQUOT
: > b
test_glob '<[a-c]>' [a'-'c]
test_glob '<[!N]>' ['!'N]
test_glob '<[^N]>' ['^'N]
test_glob '<[a-c]>' [a$'-'c]
test_glob '<[!N]>' [$'!'N]
test_glob '<[^N]>' [$'^'N]
test_glob '<[a-c]>' [a"-"c]
test_glob '<[!N]>' ["!"N]
test_glob '<[^N]>' ["^"N]
test_glob '<[a-c]>' [a\-c]
test_glob '<[!N]>' [\!N]
test_glob '<[^N]>' [\^N]
cd ..

# ======
exit $((Errors<125?Errors:125))

0 comments on commit 6c73c8c

Please sign in to comment.