Skip to content

Commit

Permalink
<regex>: Always reject character ranges with set limits (#5158)
Browse files Browse the repository at this point in the history
Co-authored-by: Stephan T. Lavavej <[email protected]>
  • Loading branch information
muellerj2 and StephanTLavavej authored Dec 5, 2024
1 parent 79137b6 commit e3e65be
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 32 deletions.
51 changes: 26 additions & 25 deletions stl/inc/regex
Original file line number Diff line number Diff line change
Expand Up @@ -4086,37 +4086,38 @@ void _Parser<_FwdIt, _Elem, _RxTraits>::_ClassRanges() { // check for valid clas
return;
}

if (_Ret != _Prs_set) {
if (_Val == 0 && !(_L_flags & _L_bzr_chr)) {
_Error(regex_constants::error_escape);
}

if (_Mchar == _Meta_dash) { // check for valid range
_Next();
_Elem _Chr1 = static_cast<_Elem>(_Val);
if ((_Ret = _ClassAtom()) == _Prs_none) { // treat - as ordinary character
_Nfa._Add_char_to_class(static_cast<_Elem>(_Val));
_Nfa._Add_char_to_class(_Meta_dash);
return;
}
if (_Ret == _Prs_chr && _Val == 0 && !(_L_flags & _L_bzr_chr)) {
_Error(regex_constants::error_escape);
}

if (_Ret == _Prs_set) {
_Error(regex_constants::error_range); // set follows dash
if (_Mchar == _Meta_dash) { // check for valid range
_Next();
_Elem _Chr1 = static_cast<_Elem>(_Val);
const bool _Set_preceding = _Ret == _Prs_set;
if ((_Ret = _ClassAtom()) == _Prs_none) { // treat - as ordinary character
if (!_Set_preceding) {
_Nfa._Add_char_to_class(_Chr1);
}
_Nfa._Add_char_to_class(_Meta_dash);
return;
}

if (_Flags & regex_constants::collate) { // translate ends of range
_Val = _Traits.translate(static_cast<_Elem>(_Val));
_Chr1 = _Traits.translate(_Chr1);
}
if (_Set_preceding || _Ret == _Prs_set) {
_Error(regex_constants::error_range); // set precedes or follows dash
}

if (static_cast<typename _RxTraits::_Uelem>(_Val) < static_cast<typename _RxTraits::_Uelem>(_Chr1)) {
_Error(regex_constants::error_range);
}
if (_Flags & regex_constants::collate) { // translate ends of range
_Val = _Traits.translate(static_cast<_Elem>(_Val));
_Chr1 = _Traits.translate(_Chr1);
}

_Nfa._Add_range(_Chr1, static_cast<_Elem>(_Val));
} else {
_Nfa._Add_char_to_class(static_cast<_Elem>(_Val));
if (static_cast<typename _RxTraits::_Uelem>(_Val) < static_cast<typename _RxTraits::_Uelem>(_Chr1)) {
_Error(regex_constants::error_range);
}

_Nfa._Add_range(_Chr1, static_cast<_Elem>(_Val));
} else if (_Ret == _Prs_chr) {
_Nfa._Add_char_to_class(static_cast<_Elem>(_Val));
}
}
}
Expand Down
1 change: 0 additions & 1 deletion tests/libcxx/expected_results.txt
Original file line number Diff line number Diff line change
Expand Up @@ -561,7 +561,6 @@ std/re/re.alg/re.alg.search/no_update_pos.pass.cpp FAIL
std/re/re.const/re.synopt/syntax_option_type.pass.cpp FAIL
std/re/re.regex/re.regex.construct/bad_backref.pass.cpp FAIL
std/re/re.regex/re.regex.construct/bad_escape.pass.cpp FAIL
std/re/re.regex/re.regex.construct/bad_range.pass.cpp FAIL
std/re/re.regex/re.regex.construct/default.pass.cpp FAIL
std/re/re.regex/re.regex.nonmemb/re.regex.nmswap/swap.pass.cpp FAIL
std/re/re.regex/re.regex.swap/swap.pass.cpp FAIL
Expand Down
14 changes: 8 additions & 6 deletions tests/std/include/test_regex_support.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,18 +160,20 @@ class regex_fixture {
}
}

void should_throw(const std::string& pattern, const std::regex_constants::error_type expectedCode) {
void should_throw(const std::string& pattern, const std::regex_constants::error_type expectedCode,
const std::regex_constants::syntax_option_type syntax = std::regex_constants::ECMAScript) {
try {
const std::regex r(pattern);
printf(R"(regex r("%s") succeeded (which is bad).)"
const std::regex r(pattern, syntax);
printf(R"(regex r("%s", 0x%X) succeeded (which is bad).)"
"\n",
pattern.c_str());
pattern.c_str(), static_cast<unsigned int>(syntax));
fail_regex();
} catch (const std::regex_error& e) {
if (e.code() != expectedCode) {
printf(R"(regex r("%s") threw 0x%X; expected 0x%X)"
printf(R"(regex r("%s", 0x%X) threw 0x%X; expected 0x%X)"
"\n",
pattern.c_str(), static_cast<unsigned int>(e.code()), static_cast<unsigned int>(expectedCode));
pattern.c_str(), static_cast<unsigned int>(syntax), static_cast<unsigned int>(e.code()),
static_cast<unsigned int>(expectedCode));
fail_regex();
}
}
Expand Down
31 changes: 31 additions & 0 deletions tests/std/tests/VSO_0000000_regex_use/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -582,6 +582,36 @@ void test_gh_993() {
}
}

void test_gh_4995() {
// GH-4995: R"([\d-e])" should be rejected
g_regexTester.should_throw(R"([\d-e])", error_range);
g_regexTester.should_throw(R"([e-\d])", error_range);
g_regexTester.should_throw(R"([\w-\d])", error_range);
g_regexTester.should_throw("[[:digit:]-e]", error_range);
g_regexTester.should_throw("[e-[:digit:]]", error_range);
g_regexTester.should_throw("[[:alpha:]-[:digit:]]", error_range);
g_regexTester.should_throw("[[=a=]-e]", error_range, ECMAScript | regex::collate);
g_regexTester.should_throw("[e-[=a=]]", error_range, ECMAScript | regex::collate);
g_regexTester.should_throw("[[=a=]-[=b=]]", error_range, ECMAScript | regex::collate);

// Test valid cases:
g_regexTester.should_not_match("b", R"([\d-])");
g_regexTester.should_match("5", R"([\d-])");
g_regexTester.should_match("-", R"([\d-])");

g_regexTester.should_not_match("b", R"([-\d])");
g_regexTester.should_match("5", R"([-\d])");
g_regexTester.should_match("-", R"([-\d])");

g_regexTester.should_match("b", R"([a-c\d])");
g_regexTester.should_match("5", R"([a-c\d])");
g_regexTester.should_not_match("-", R"([a-c\d])");

g_regexTester.should_match("b", R"([\da-c])");
g_regexTester.should_match("5", R"([\da-c])");
g_regexTester.should_not_match("-", R"([\da-c])");
}

void test_gh_5058() {
// GH-5058 "<regex>: Small cleanups" changed some default constructors to be defaulted.
// Verify that <regex> types are still const-default-constructible (N4993 [dcl.init.general]/8).
Expand Down Expand Up @@ -656,6 +686,7 @@ int main() {
test_VSO_225160_match_eol_flag();
test_VSO_226914_word_boundaries();
test_gh_993();
test_gh_4995();
test_gh_5058();

return g_regexTester.result();
Expand Down

0 comments on commit e3e65be

Please sign in to comment.