Skip to content

Commit

Permalink
<bitset>: optimize streaming operator >> (#5008)
Browse files Browse the repository at this point in the history
Co-authored-by: Stephan T. Lavavej <[email protected]>
  • Loading branch information
AlexGuteniev and StephanTLavavej authored Oct 21, 2024
1 parent a5d66bd commit aaca194
Show file tree
Hide file tree
Showing 2 changed files with 121 additions and 48 deletions.
108 changes: 80 additions & 28 deletions benchmarks/src/bitset_from_string.cpp
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#include <algorithm>
#include <array>
#include <benchmark/benchmark.h>
#include <bitset>
#include <cstddef>
#include <random>
#include <sstream>
#include <string>

using namespace std;

Expand Down Expand Up @@ -39,7 +42,7 @@ namespace {
const auto random_digits = random_digits_init<N, charT, 2048>();

template <length_type Length, size_t N, class charT>
void BM_bitset_from_string(benchmark::State& state) {
void bitset_from_string(benchmark::State& state) {
const auto& digit_array = random_digits<N, charT>;
for (auto _ : state) {
benchmark::DoNotOptimize(digit_array);
Expand All @@ -56,34 +59,83 @@ namespace {
}
}
}

template <class charT, size_t Length>
basic_string<charT> random_digits_contiguous_string_init() {
mt19937_64 rnd{};
uniform_int_distribution<> dis('0', '1');

basic_string<charT> result;

result.resize_and_overwrite(Length, [&](charT* ptr, size_t) {
generate_n(ptr, Length, [&] { return static_cast<charT>(dis(rnd)); });
return Length;
});

return result;
}

template <class charT, size_t Length>
const auto random_digits_contiguous_string = random_digits_contiguous_string_init<charT, Length>();

template <size_t N, class charT>
void bitset_from_stream(benchmark::State& state) {
constexpr size_t string_length = 2048;
constexpr size_t count = string_length / N;
basic_istringstream<charT> stream(random_digits_contiguous_string<charT, string_length>);
bitset<N> bs;
for (auto _ : state) {
benchmark::DoNotOptimize(stream);
for (size_t i = 0; i != count; ++i) {
stream >> bs;
}
benchmark::DoNotOptimize(bs);
stream.seekg(0);
}
}

} // namespace

BENCHMARK(BM_bitset_from_string<length_type::char_count, 15, char>);
BENCHMARK(BM_bitset_from_string<length_type::char_count, 16, char>);
BENCHMARK(BM_bitset_from_string<length_type::char_count, 36, char>);
BENCHMARK(BM_bitset_from_string<length_type::char_count, 64, char>);
BENCHMARK(BM_bitset_from_string<length_type::char_count, 512, char>);
BENCHMARK(BM_bitset_from_string<length_type::char_count, 2048, char>);

BENCHMARK(BM_bitset_from_string<length_type::char_count, 15, wchar_t>);
BENCHMARK(BM_bitset_from_string<length_type::char_count, 16, wchar_t>);
BENCHMARK(BM_bitset_from_string<length_type::char_count, 36, wchar_t>);
BENCHMARK(BM_bitset_from_string<length_type::char_count, 64, wchar_t>);
BENCHMARK(BM_bitset_from_string<length_type::char_count, 512, wchar_t>);
BENCHMARK(BM_bitset_from_string<length_type::char_count, 2048, wchar_t>);

BENCHMARK(BM_bitset_from_string<length_type::null_term, 15, char>);
BENCHMARK(BM_bitset_from_string<length_type::null_term, 16, char>);
BENCHMARK(BM_bitset_from_string<length_type::null_term, 36, char>);
BENCHMARK(BM_bitset_from_string<length_type::null_term, 64, char>);
BENCHMARK(BM_bitset_from_string<length_type::null_term, 512, char>);
BENCHMARK(BM_bitset_from_string<length_type::null_term, 2048, char>);

BENCHMARK(BM_bitset_from_string<length_type::null_term, 15, wchar_t>);
BENCHMARK(BM_bitset_from_string<length_type::null_term, 16, wchar_t>);
BENCHMARK(BM_bitset_from_string<length_type::null_term, 36, wchar_t>);
BENCHMARK(BM_bitset_from_string<length_type::null_term, 64, wchar_t>);
BENCHMARK(BM_bitset_from_string<length_type::null_term, 512, wchar_t>);
BENCHMARK(BM_bitset_from_string<length_type::null_term, 2048, wchar_t>);
BENCHMARK(bitset_from_string<length_type::char_count, 15, char>);
BENCHMARK(bitset_from_string<length_type::char_count, 16, char>);
BENCHMARK(bitset_from_string<length_type::char_count, 36, char>);
BENCHMARK(bitset_from_string<length_type::char_count, 64, char>);
BENCHMARK(bitset_from_string<length_type::char_count, 512, char>);
BENCHMARK(bitset_from_string<length_type::char_count, 2048, char>);

BENCHMARK(bitset_from_string<length_type::char_count, 15, wchar_t>);
BENCHMARK(bitset_from_string<length_type::char_count, 16, wchar_t>);
BENCHMARK(bitset_from_string<length_type::char_count, 36, wchar_t>);
BENCHMARK(bitset_from_string<length_type::char_count, 64, wchar_t>);
BENCHMARK(bitset_from_string<length_type::char_count, 512, wchar_t>);
BENCHMARK(bitset_from_string<length_type::char_count, 2048, wchar_t>);

BENCHMARK(bitset_from_string<length_type::null_term, 15, char>);
BENCHMARK(bitset_from_string<length_type::null_term, 16, char>);
BENCHMARK(bitset_from_string<length_type::null_term, 36, char>);
BENCHMARK(bitset_from_string<length_type::null_term, 64, char>);
BENCHMARK(bitset_from_string<length_type::null_term, 512, char>);
BENCHMARK(bitset_from_string<length_type::null_term, 2048, char>);

BENCHMARK(bitset_from_string<length_type::null_term, 15, wchar_t>);
BENCHMARK(bitset_from_string<length_type::null_term, 16, wchar_t>);
BENCHMARK(bitset_from_string<length_type::null_term, 36, wchar_t>);
BENCHMARK(bitset_from_string<length_type::null_term, 64, wchar_t>);
BENCHMARK(bitset_from_string<length_type::null_term, 512, wchar_t>);
BENCHMARK(bitset_from_string<length_type::null_term, 2048, wchar_t>);

BENCHMARK(bitset_from_stream<15, char>);
BENCHMARK(bitset_from_stream<16, char>);
BENCHMARK(bitset_from_stream<36, char>);
BENCHMARK(bitset_from_stream<64, char>);
BENCHMARK(bitset_from_stream<512, char>);
BENCHMARK(bitset_from_stream<2048, char>);

BENCHMARK(bitset_from_stream<15, wchar_t>);
BENCHMARK(bitset_from_stream<16, wchar_t>);
BENCHMARK(bitset_from_stream<36, wchar_t>);
BENCHMARK(bitset_from_stream<64, wchar_t>);
BENCHMARK(bitset_from_stream<512, wchar_t>);
BENCHMARK(bitset_from_stream<2048, wchar_t>);

BENCHMARK_MAIN();
61 changes: 41 additions & 20 deletions stl/inc/bitset
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,34 @@ __declspec(noalias) bool __stdcall __std_bitset_from_string_2(void* _Dest, const
#endif // _USE_STD_VECTOR_ALGORITHMS

_STD_BEGIN
template <class _Elem, size_t _Bits, bool _Dynamic = (_Bits * sizeof(_Elem) > _STD_BITSET_TO_STREAM_STACK_RESERVATION)>
struct _Buffer_for_bitset {
_Buffer_for_bitset() noexcept = default;
_Buffer_for_bitset(const _Buffer_for_bitset&) = delete;
_Buffer_for_bitset& operator=(const _Buffer_for_bitset&) = delete;

~_Buffer_for_bitset() {
delete[] _Buf;
}

void _Allocate() {
_Buf = new _Elem[_Bits];
}

_Elem* _Buf = nullptr;
};

template <class _Elem, size_t _Bits>
struct _Buffer_for_bitset<_Elem, _Bits, false> {
_Buffer_for_bitset() noexcept { /* intentionally not initializing _Buf */ }
_Buffer_for_bitset(const _Buffer_for_bitset&) = delete;
_Buffer_for_bitset& operator=(const _Buffer_for_bitset&) = delete;

void _Allocate() noexcept {}

_Elem _Buf[(_Bits != 0) ? _Bits : 1];
};

_EXPORT_STD template <size_t _Bits>
class bitset { // store fixed-length sequence of Boolean elements
private:
Expand Down Expand Up @@ -590,14 +618,11 @@ basic_ostream<_Elem, _Tr>& operator<<(basic_ostream<_Elem, _Tr>& _Ostr, const bi
const _Elem _Elem0 = _Ctype_fac.widen('0');
const _Elem _Elem1 = _Ctype_fac.widen('1');

if constexpr (_Bits * sizeof(_Elem) <= _STD_BITSET_TO_STREAM_STACK_RESERVATION) {
_Elem _Buf[_Bits + 1];
_Right._To_string(_Buf, _Bits, _Elem0, _Elem1);
_Buf[_Bits] = _Elem{'\0'};
return _Ostr << _Buf;
} else {
return _Ostr << _Right.template to_string<_Elem, _Tr, allocator<_Elem>>(_Elem0, _Elem1);
}
_Buffer_for_bitset<_Elem, _Bits> _Buf;
_Buf._Allocate();
_Right._To_string(_Buf._Buf, _Bits, _Elem0, _Elem1);
_Ostr.write(_Buf._Buf, _Bits);
return _Ostr;
}

_EXPORT_STD template <class _Elem, class _Tr, size_t _Bits>
Expand All @@ -608,14 +633,15 @@ basic_istream<_Elem, _Tr>& operator>>(basic_istream<_Elem, _Tr>& _Istr, bitset<_
const _Elem _Elem0 = _Ctype_fac.widen('0');
const _Elem _Elem1 = _Ctype_fac.widen('1');
typename _Istr_t::iostate _State = _Istr_t::goodbit;
bool _Changed = false;
string _Str;
size_t _Count = 0;
_Buffer_for_bitset<_Elem, _Bits> _Buf;
const typename _Istr_t::sentry _Ok(_Istr);

if (_Ok) { // valid stream, extract elements
_TRY_IO_BEGIN
_Buf._Allocate();
typename _Tr::int_type _Meta = _Istr.rdbuf()->sgetc();
for (size_t _Count = _Right.size(); 0 < _Count; _Meta = _Istr.rdbuf()->snextc(), (void) --_Count) {
for (; _Count < _Bits; ++_Count, _Meta = _Istr.rdbuf()->snextc()) {
// test _Meta
if (_Tr::eq_int_type(_Tr::eof(), _Meta)) { // end of file, quit
_State |= _Istr_t::eofbit;
Expand All @@ -628,28 +654,23 @@ basic_istream<_Elem, _Tr>& operator>>(basic_istream<_Elem, _Tr>& _Istr, bitset<_
break; // invalid element
}

if (_Str.max_size() <= _Str.size()) { // no room in string, give up (unlikely)
_State |= _Istr_t::failbit;
break;
}

// valid, append '0' or '1'
_Str.push_back('0' + _Tr::eq(_Char, _Elem1));
_Changed = true;
_Buf._Buf[_Count] = '0' + _Tr::eq(_Char, _Elem1);
}

_CATCH_IO_(_Istr_t, _Istr)
}

constexpr bool _Has_bits = _Bits > 0;

if constexpr (_Has_bits) {
if (!_Changed) {
if (_Count == 0) {
_State |= _Istr_t::failbit;
}
}

_Istr.setstate(_State);
_Right = bitset<_Bits>(_Str); // convert string and store
_Right = bitset<_Bits>(_Buf._Buf, _Count); // convert string and store
return _Istr;
}

Expand Down

0 comments on commit aaca194

Please sign in to comment.