Skip to content

Commit

Permalink
<regex>: Correct characters not matched by special character dot (#…
Browse files Browse the repository at this point in the history
…5192)

Co-authored-by: Stephan T. Lavavej <[email protected]>
  • Loading branch information
muellerj2 and StephanTLavavej authored Jan 14, 2025
1 parent 3b6d29c commit 247c51f
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 13 deletions.
19 changes: 17 additions & 2 deletions stl/inc/regex
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ enum _Meta_type : int { // meta character representations for parser
_Meta_nl = '\n',
_Meta_cr = '\r',
_Meta_bsp = '\b',
_Meta_ls = L'\u2028',
_Meta_ps = L'\u2029',
_Meta_chr = 0,

_Esc_bsl = '\\',
Expand Down Expand Up @@ -3523,10 +3525,23 @@ bool _Matcher<_BidIt, _Elem, _RxTraits, _It>::_Match_pat(_Node_base* _Nx) { // c
break;

case _N_dot:
if (_Tgt_state._Cur == _End || *_Tgt_state._Cur == _Meta_nl || *_Tgt_state._Cur == _Meta_cr) {
if (_Tgt_state._Cur == _End) {
_Failed = true;
} else {
++_Tgt_state._Cur;
const _Elem _Ch = *_Tgt_state._Cur;
if (_Sflags
& (regex_constants::basic | regex_constants::extended | regex_constants::grep
| regex_constants::egrep | regex_constants::awk)) {
if (_Ch == _Elem()) {
_Failed = true;
}
} else if (_Ch == _Meta_nl || _Ch == _Meta_cr || _Ch == _Meta_ls || _Ch == _Meta_ps) { // ECMAScript
_Failed = true;
}

if (!_Failed) {
++_Tgt_state._Cur;
}
}

break;
Expand Down
37 changes: 37 additions & 0 deletions tests/std/tests/VSO_0000000_regex_use/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -683,6 +683,42 @@ void test_gh_5167() {
g_regexTester.should_throw(R"(abc\10)", error_backref, grep);
}

void test_gh_5192() {
// GH-5192: Correct characters not matched by special character dot
for (const syntax_option_type option : {
regex_constants::basic,
regex_constants::extended,
regex_constants::awk,
regex_constants::grep,
regex_constants::egrep,
}) {
const test_regex caretDotStar(&g_regexTester, "^.*", option);
caretDotStar.should_search_match("abc\nd\re\0f"s, "abc\nd\re"s);
caretDotStar.should_search_match("abcd\re\ngh\0i"s, "abcd\re\ngh"s);

const test_wregex wCaretDotStar(&g_regexTester, L"^.*", option);
wCaretDotStar.should_search_match(L"abc\nd\re\0f"s, L"abc\nd\re"s);
wCaretDotStar.should_search_match(L"abcd\re\ngh\0i"s, L"abcd\re\ngh"s);
wCaretDotStar.should_search_match(L"abc\u2028d\ne\0f"s, L"abc\u2028d\ne"s); // U+2028 LINE SEPARATOR
wCaretDotStar.should_search_match(L"abc\u2029d\ne\0f"s, L"abc\u2029d\ne"s); // U+2029 PARAGRAPH SEPARATOR
}

for (const syntax_option_type option : {
regex_constants::ECMAScript,
syntax_option_type(),
}) {
const test_regex caretDotStar(&g_regexTester, "^.*", option);
caretDotStar.should_search_match("ab\0c\nd\re\0f"s, "ab\0c"s);
caretDotStar.should_search_match("ab\0cd\re\ngh\0i"s, "ab\0cd"s);

const test_wregex wCaretDotStar(&g_regexTester, L"^.*", option);
wCaretDotStar.should_search_match(L"abc\0\nd\re\0f"s, L"abc\0"s);
wCaretDotStar.should_search_match(L"ab\0cd\re\ngh\0i"s, L"ab\0cd"s);
wCaretDotStar.should_search_match(L"ab\0c\u2028d\ne\0f"s, L"ab\0c"s); // U+2028 LINE SEPARATOR
wCaretDotStar.should_search_match(L"a\0bc\u2029d\ne\0f"s, L"a\0bc"s); // U+2029 PARAGRAPH SEPARATOR
}
}

int main() {
test_dev10_449367_case_insensitivity_should_work();
test_dev11_462743_regex_collate_should_not_disable_regex_icase();
Expand Down Expand Up @@ -714,6 +750,7 @@ int main() {
test_gh_5058();
test_gh_5160();
test_gh_5167();
test_gh_5192();

return g_regexTester.result();
}
17 changes: 6 additions & 11 deletions tests/tr1/tests/regex3/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -321,17 +321,12 @@ static void test_uncoveredgrammar() {
STDString str2(T("Prime number"));
STDString str3(T("aaaqxzbbb"));
STDString str4(T("aaa\nxzbbb"));
static const STD regex_constants::syntax_option_type flag[6] = {
STD regex_constants::ECMAScript,
STD regex_constants::basic,
STD regex_constants::extended,
STD regex_constants::awk,
STD regex_constants::grep,
STD regex_constants::egrep,
};
for (int i = 0; i <= 5; i++) {
rx1.assign(T(".*"), flag[i]);
rx2.assign(T("aaa...bbb"), flag[i]);
for (const STD regex_constants::syntax_option_type flag : {
STD regex_constants::ECMAScript,
STD regex_constants::syntax_option_type(),
}) {
rx1.assign(T(".*"), flag);
rx2.assign(T("aaa...bbb"), flag);
CHECK(!STD regex_match(str1, rx1));
CHECK(STD regex_match(str2, rx1));
CHECK(STD regex_match(str3, rx2));
Expand Down

0 comments on commit 247c51f

Please sign in to comment.