Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

<bitset>: optimize streaming operator >> #5008

Merged
merged 19 commits into from
Oct 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 80 additions & 28 deletions benchmarks/src/bitset_from_string.cpp
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#include <algorithm>
#include <array>
#include <benchmark/benchmark.h>
#include <bitset>
#include <cstddef>
#include <random>
#include <sstream>
#include <string>

using namespace std;

Expand Down Expand Up @@ -39,7 +42,7 @@ namespace {
const auto random_digits = random_digits_init<N, charT, 2048>();

template <length_type Length, size_t N, class charT>
void BM_bitset_from_string(benchmark::State& state) {
void bitset_from_string(benchmark::State& state) {
const auto& digit_array = random_digits<N, charT>;
for (auto _ : state) {
benchmark::DoNotOptimize(digit_array);
Expand All @@ -56,34 +59,83 @@ namespace {
}
}
}

template <class charT, size_t Length>
basic_string<charT> random_digits_contiguous_string_init() {
mt19937_64 rnd{};
uniform_int_distribution<> dis('0', '1');

basic_string<charT> result;

result.resize_and_overwrite(Length, [&](charT* ptr, size_t) {
generate_n(ptr, Length, [&] { return static_cast<charT>(dis(rnd)); });
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved
return Length;
});

return result;
}

template <class charT, size_t Length>
const auto random_digits_contiguous_string = random_digits_contiguous_string_init<charT, Length>();

template <size_t N, class charT>
void bitset_from_stream(benchmark::State& state) {
constexpr size_t string_length = 2048;
constexpr size_t count = string_length / N;
basic_istringstream<charT> stream(random_digits_contiguous_string<charT, string_length>);
bitset<N> bs;
for (auto _ : state) {
benchmark::DoNotOptimize(stream);
for (size_t i = 0; i != count; ++i) {
stream >> bs;
}
benchmark::DoNotOptimize(bs);
stream.seekg(0);
}
}

} // namespace

BENCHMARK(BM_bitset_from_string<length_type::char_count, 15, char>);
BENCHMARK(BM_bitset_from_string<length_type::char_count, 16, char>);
BENCHMARK(BM_bitset_from_string<length_type::char_count, 36, char>);
BENCHMARK(BM_bitset_from_string<length_type::char_count, 64, char>);
BENCHMARK(BM_bitset_from_string<length_type::char_count, 512, char>);
BENCHMARK(BM_bitset_from_string<length_type::char_count, 2048, char>);

BENCHMARK(BM_bitset_from_string<length_type::char_count, 15, wchar_t>);
BENCHMARK(BM_bitset_from_string<length_type::char_count, 16, wchar_t>);
BENCHMARK(BM_bitset_from_string<length_type::char_count, 36, wchar_t>);
BENCHMARK(BM_bitset_from_string<length_type::char_count, 64, wchar_t>);
BENCHMARK(BM_bitset_from_string<length_type::char_count, 512, wchar_t>);
BENCHMARK(BM_bitset_from_string<length_type::char_count, 2048, wchar_t>);

BENCHMARK(BM_bitset_from_string<length_type::null_term, 15, char>);
BENCHMARK(BM_bitset_from_string<length_type::null_term, 16, char>);
BENCHMARK(BM_bitset_from_string<length_type::null_term, 36, char>);
BENCHMARK(BM_bitset_from_string<length_type::null_term, 64, char>);
BENCHMARK(BM_bitset_from_string<length_type::null_term, 512, char>);
BENCHMARK(BM_bitset_from_string<length_type::null_term, 2048, char>);

BENCHMARK(BM_bitset_from_string<length_type::null_term, 15, wchar_t>);
BENCHMARK(BM_bitset_from_string<length_type::null_term, 16, wchar_t>);
BENCHMARK(BM_bitset_from_string<length_type::null_term, 36, wchar_t>);
BENCHMARK(BM_bitset_from_string<length_type::null_term, 64, wchar_t>);
BENCHMARK(BM_bitset_from_string<length_type::null_term, 512, wchar_t>);
BENCHMARK(BM_bitset_from_string<length_type::null_term, 2048, wchar_t>);
BENCHMARK(bitset_from_string<length_type::char_count, 15, char>);
BENCHMARK(bitset_from_string<length_type::char_count, 16, char>);
BENCHMARK(bitset_from_string<length_type::char_count, 36, char>);
BENCHMARK(bitset_from_string<length_type::char_count, 64, char>);
BENCHMARK(bitset_from_string<length_type::char_count, 512, char>);
BENCHMARK(bitset_from_string<length_type::char_count, 2048, char>);

BENCHMARK(bitset_from_string<length_type::char_count, 15, wchar_t>);
BENCHMARK(bitset_from_string<length_type::char_count, 16, wchar_t>);
BENCHMARK(bitset_from_string<length_type::char_count, 36, wchar_t>);
BENCHMARK(bitset_from_string<length_type::char_count, 64, wchar_t>);
BENCHMARK(bitset_from_string<length_type::char_count, 512, wchar_t>);
BENCHMARK(bitset_from_string<length_type::char_count, 2048, wchar_t>);

BENCHMARK(bitset_from_string<length_type::null_term, 15, char>);
BENCHMARK(bitset_from_string<length_type::null_term, 16, char>);
BENCHMARK(bitset_from_string<length_type::null_term, 36, char>);
BENCHMARK(bitset_from_string<length_type::null_term, 64, char>);
BENCHMARK(bitset_from_string<length_type::null_term, 512, char>);
BENCHMARK(bitset_from_string<length_type::null_term, 2048, char>);

BENCHMARK(bitset_from_string<length_type::null_term, 15, wchar_t>);
BENCHMARK(bitset_from_string<length_type::null_term, 16, wchar_t>);
BENCHMARK(bitset_from_string<length_type::null_term, 36, wchar_t>);
BENCHMARK(bitset_from_string<length_type::null_term, 64, wchar_t>);
BENCHMARK(bitset_from_string<length_type::null_term, 512, wchar_t>);
BENCHMARK(bitset_from_string<length_type::null_term, 2048, wchar_t>);

BENCHMARK(bitset_from_stream<15, char>);
BENCHMARK(bitset_from_stream<16, char>);
BENCHMARK(bitset_from_stream<36, char>);
BENCHMARK(bitset_from_stream<64, char>);
BENCHMARK(bitset_from_stream<512, char>);
BENCHMARK(bitset_from_stream<2048, char>);

BENCHMARK(bitset_from_stream<15, wchar_t>);
BENCHMARK(bitset_from_stream<16, wchar_t>);
BENCHMARK(bitset_from_stream<36, wchar_t>);
BENCHMARK(bitset_from_stream<64, wchar_t>);
BENCHMARK(bitset_from_stream<512, wchar_t>);
BENCHMARK(bitset_from_stream<2048, wchar_t>);

BENCHMARK_MAIN();
61 changes: 41 additions & 20 deletions stl/inc/bitset
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,34 @@ __declspec(noalias) bool __stdcall __std_bitset_from_string_2(void* _Dest, const
#endif // _USE_STD_VECTOR_ALGORITHMS

_STD_BEGIN
template <class _Elem, size_t _Bits, bool _Dynamic = (_Bits * sizeof(_Elem) > _STD_BITSET_TO_STREAM_STACK_RESERVATION)>
struct _Buffer_for_bitset {
_Buffer_for_bitset() noexcept = default;
_Buffer_for_bitset(const _Buffer_for_bitset&) = delete;
_Buffer_for_bitset& operator=(const _Buffer_for_bitset&) = delete;

~_Buffer_for_bitset() {
delete[] _Buf;
}

void _Allocate() {
_Buf = new _Elem[_Bits];
}

_Elem* _Buf = nullptr;
};

template <class _Elem, size_t _Bits>
struct _Buffer_for_bitset<_Elem, _Bits, false> {
_Buffer_for_bitset() noexcept { /* intentionally not initializing _Buf */ }
_Buffer_for_bitset(const _Buffer_for_bitset&) = delete;
_Buffer_for_bitset& operator=(const _Buffer_for_bitset&) = delete;

void _Allocate() noexcept {}

_Elem _Buf[(_Bits != 0) ? _Bits : 1];
};

_EXPORT_STD template <size_t _Bits>
class bitset { // store fixed-length sequence of Boolean elements
private:
Expand Down Expand Up @@ -590,14 +618,11 @@ basic_ostream<_Elem, _Tr>& operator<<(basic_ostream<_Elem, _Tr>& _Ostr, const bi
const _Elem _Elem0 = _Ctype_fac.widen('0');
const _Elem _Elem1 = _Ctype_fac.widen('1');

if constexpr (_Bits * sizeof(_Elem) <= _STD_BITSET_TO_STREAM_STACK_RESERVATION) {
_Elem _Buf[_Bits + 1];
_Right._To_string(_Buf, _Bits, _Elem0, _Elem1);
_Buf[_Bits] = _Elem{'\0'};
return _Ostr << _Buf;
} else {
return _Ostr << _Right.template to_string<_Elem, _Tr, allocator<_Elem>>(_Elem0, _Elem1);
}
_Buffer_for_bitset<_Elem, _Bits> _Buf;
_Buf._Allocate();
_Right._To_string(_Buf._Buf, _Bits, _Elem0, _Elem1);
_Ostr.write(_Buf._Buf, _Bits);
return _Ostr;
}

_EXPORT_STD template <class _Elem, class _Tr, size_t _Bits>
Expand All @@ -608,14 +633,15 @@ basic_istream<_Elem, _Tr>& operator>>(basic_istream<_Elem, _Tr>& _Istr, bitset<_
const _Elem _Elem0 = _Ctype_fac.widen('0');
const _Elem _Elem1 = _Ctype_fac.widen('1');
typename _Istr_t::iostate _State = _Istr_t::goodbit;
bool _Changed = false;
string _Str;
size_t _Count = 0;
_Buffer_for_bitset<_Elem, _Bits> _Buf;
const typename _Istr_t::sentry _Ok(_Istr);

if (_Ok) { // valid stream, extract elements
_TRY_IO_BEGIN
_Buf._Allocate();
typename _Tr::int_type _Meta = _Istr.rdbuf()->sgetc();
for (size_t _Count = _Right.size(); 0 < _Count; _Meta = _Istr.rdbuf()->snextc(), (void) --_Count) {
for (; _Count < _Bits; ++_Count, _Meta = _Istr.rdbuf()->snextc()) {
// test _Meta
if (_Tr::eq_int_type(_Tr::eof(), _Meta)) { // end of file, quit
_State |= _Istr_t::eofbit;
Expand All @@ -628,28 +654,23 @@ basic_istream<_Elem, _Tr>& operator>>(basic_istream<_Elem, _Tr>& _Istr, bitset<_
break; // invalid element
}

if (_Str.max_size() <= _Str.size()) { // no room in string, give up (unlikely)
_State |= _Istr_t::failbit;
break;
}

// valid, append '0' or '1'
_Str.push_back('0' + _Tr::eq(_Char, _Elem1));
_Changed = true;
_Buf._Buf[_Count] = '0' + _Tr::eq(_Char, _Elem1);
}

_CATCH_IO_(_Istr_t, _Istr)
}

constexpr bool _Has_bits = _Bits > 0;

if constexpr (_Has_bits) {
if (!_Changed) {
if (_Count == 0) {
_State |= _Istr_t::failbit;
}
}

_Istr.setstate(_State);
_Right = bitset<_Bits>(_Str); // convert string and store
_Right = bitset<_Bits>(_Buf._Buf, _Count); // convert string and store
return _Istr;
}

Expand Down