Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize find_first_of for one element needle #4563

Merged
merged 11 commits into from
Apr 9, 2024
44 changes: 24 additions & 20 deletions benchmarks/src/find_first_of.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,42 +5,46 @@
#include <benchmark/benchmark.h>
#include <cstddef>
#include <cstdint>
#include <cstdlib>
#include <numeric>
#include <vector>

using namespace std;

template <class T, size_t Pos, size_t NSize, size_t HSize = Pos * 2, size_t Which = 0>
template <class T>
void bm(benchmark::State& state) {
const size_t Pos = static_cast<size_t>(state.range(0));
const size_t NSize = static_cast<size_t>(state.range(1));
const size_t HSize = Pos * 2;
const size_t Which = 0;

vector<T> h(HSize, T{'.'});
vector<T> n(NSize);
iota(n.begin(), n.end(), T{'a'});

static_assert(Pos < HSize);
static_assert(Which < NSize);
if (Pos >= HSize || Which >= NSize) {
abort();
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved
}

h[Pos] = n[Which];

for (auto _ : state) {
benchmark::DoNotOptimize(find_first_of(h.begin(), h.end(), n.begin(), n.end()));
}
}

BENCHMARK(bm<uint8_t, 2, 3>);
BENCHMARK(bm<uint16_t, 2, 3>);

BENCHMARK(bm<uint8_t, 7, 4>);
BENCHMARK(bm<uint16_t, 7, 4>);

BENCHMARK(bm<uint8_t, 9, 3>);
BENCHMARK(bm<uint16_t, 9, 3>);

BENCHMARK(bm<uint8_t, 22, 5>);
BENCHMARK(bm<uint16_t, 22, 5>);

BENCHMARK(bm<uint8_t, 3056, 7>);
BENCHMARK(bm<uint16_t, 3056, 7>);

BENCHMARK(bm<uint8_t, 1011, 11>);
BENCHMARK(bm<uint16_t, 1011, 11>);
#define ARGS \
Args({2, 3}) \
->Args({7, 4}) \
->Args({9, 3}) \
->Args({22, 5}) \
->Args({58, 2}) \
->Args({102, 4}) \
->Args({325, 1}) \
->Args({1011, 11}) \
->Args({3056, 7});

BENCHMARK(bm<uint8_t>)->ARGS;
BENCHMARK(bm<uint16_t>)->ARGS;

BENCHMARK_MAIN();
37 changes: 31 additions & 6 deletions stl/inc/algorithm
Original file line number Diff line number Diff line change
Expand Up @@ -3379,10 +3379,26 @@ _NODISCARD _CONSTEXPR20 _FwdIt1 find_first_of(
// look for one of [_First2, _Last2) satisfying _Pred with element
_STD _Adl_verify_range(_First1, _Last1);
_STD _Adl_verify_range(_First2, _Last2);
auto _UFirst1 = _STD _Get_unwrapped(_First1);
const auto _ULast1 = _STD _Get_unwrapped(_Last1);
const auto _UFirst2 = _STD _Get_unwrapped(_First2);
const auto _ULast2 = _STD _Get_unwrapped(_Last2);
auto _UFirst1 = _STD _Get_unwrapped(_First1);
auto _ULast1 = _STD _Get_unwrapped(_Last1);
auto _UFirst2 = _STD _Get_unwrapped(_First2);
const auto _ULast2 = _STD _Get_unwrapped(_Last2);

constexpr bool _Is_predicate_equal = _Is_any_of_v<_Pr,
#if _HAS_CXX20
_RANGES equal_to,
#endif // _HAS_CXX20
_STD equal_to<>>;

if constexpr (_Is_ranges_random_iter_v<decltype(_UFirst2)> && _Is_predicate_equal) {
const auto _Count2 = _ULast2 - _UFirst2;
if (_Count2 == 1) {
_UFirst1 = _STD _Find_unchecked(_STD move(_UFirst1), _STD move(_ULast1), *_UFirst2);
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved
_STD _Seek_wrapped(_First1, _UFirst1);
return _First1;
}
}

#if _USE_STD_VECTOR_ALGORITHMS
if constexpr (_Vector_alg_in_find_first_of_is_safe<decltype(_UFirst1), decltype(_UFirst2), _Pr>) {
if (!_STD _Is_constant_evaluated() && _ULast1 - _UFirst1 >= _Threshold_find_first_of) {
Expand Down Expand Up @@ -3470,14 +3486,23 @@ namespace ranges {

private:
template <class _It1, class _Se1, class _It2, class _Se2, class _Pr, class _Pj1, class _Pj2>
_NODISCARD static constexpr _It1 _Find_first_of_unchecked(_It1 _First1, const _Se1 _Last1, const _It2 _First2,
const _Se2 _Last2, _Pr _Pred, _Pj1 _Proj1, _Pj2 _Proj2) {
_NODISCARD static constexpr _It1 _Find_first_of_unchecked(
_It1 _First1, _Se1 _Last1, const _It2 _First2, const _Se2 _Last2, _Pr _Pred, _Pj1 _Proj1, _Pj2 _Proj2) {
_STL_INTERNAL_STATIC_ASSERT(input_iterator<_It1>);
_STL_INTERNAL_STATIC_ASSERT(sentinel_for<_Se1, _It1>);
_STL_INTERNAL_STATIC_ASSERT(forward_iterator<_It2>);
_STL_INTERNAL_STATIC_ASSERT(sentinel_for<_Se2, _It2>);
_STL_INTERNAL_STATIC_ASSERT(indirectly_comparable<_It1, _It2, _Pr, _Pj1, _Pj2>);

if constexpr (_Is_ranges_random_iter_v<_It2> && sized_sentinel_for<_Se2, _It2>
&& _Is_any_of_v<_Pr, _STD equal_to<>, _RANGES equal_to>) {
const auto _Count2 = _Last2 - _First2;
if (_Count2 == 1) {
return _RANGES _Find_unchecked(
_STD move(_First1), _STD move(_Last1), _STD invoke(_Proj2, *_First2), _Proj1);
}
}

#if _USE_STD_VECTOR_ALGORITHMS
if constexpr (_Vector_alg_in_find_first_of_is_safe<_It1, _It2, _Pr> && sized_sentinel_for<_Se1, _It1>
&& sized_sentinel_for<_Se2, _It2> && is_same_v<_Pj1, identity> && is_same_v<_Pj2, identity>) {
Expand Down