Skip to content

Commit

Permalink
Move ProfilerManager Start/Stop routines closer to actual benchmark g…
Browse files Browse the repository at this point in the history
…oogle#1807

Previously, the Start/Stop routines were called before the benchmark function
was called and after it returned. However, what we really want is for them
to be called within the core of the benchmark:

  for (auto _ : state) {
    // This is what we want traced, not the entire BM_foo function.
  }
  • Loading branch information
xdje42 committed Jul 25, 2024
1 parent 64b5d8c commit db498ec
Show file tree
Hide file tree
Showing 6 changed files with 40 additions and 20 deletions.
4 changes: 3 additions & 1 deletion include/benchmark/benchmark.h
Original file line number Diff line number Diff line change
Expand Up @@ -1004,7 +1004,8 @@ class BENCHMARK_EXPORT State {
State(std::string name, IterationCount max_iters,
const std::vector<int64_t>& ranges, int thread_i, int n_threads,
internal::ThreadTimer* timer, internal::ThreadManager* manager,
internal::PerfCountersMeasurement* perf_counters_measurement);
internal::PerfCountersMeasurement* perf_counters_measurement,
ProfilerManager* profiler_manager);

void StartKeepRunning();
// Implementation of KeepRunning() and KeepRunningBatch().
Expand All @@ -1019,6 +1020,7 @@ class BENCHMARK_EXPORT State {
internal::ThreadTimer* const timer_;
internal::ThreadManager* const manager_;
internal::PerfCountersMeasurement* const perf_counters_measurement_;
ProfilerManager* const profiler_manager_;

friend class internal::BenchmarkInstance;
};
Expand Down
10 changes: 8 additions & 2 deletions src/benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,8 @@ void UseCharPointer(char const volatile* const v) {
State::State(std::string name, IterationCount max_iters,
const std::vector<int64_t>& ranges, int thread_i, int n_threads,
internal::ThreadTimer* timer, internal::ThreadManager* manager,
internal::PerfCountersMeasurement* perf_counters_measurement)
internal::PerfCountersMeasurement* perf_counters_measurement,
ProfilerManager* profiler_manager)
: total_iterations_(0),
batch_leftover_(0),
max_iterations(max_iters),
Expand All @@ -182,7 +183,8 @@ State::State(std::string name, IterationCount max_iters,
threads_(n_threads),
timer_(timer),
manager_(manager),
perf_counters_measurement_(perf_counters_measurement) {
perf_counters_measurement_(perf_counters_measurement),
profiler_manager_(profiler_manager) {
BM_CHECK(max_iterations != 0) << "At least one iteration must be run";
BM_CHECK_LT(thread_index_, threads_)
<< "thread_index must be less than threads";
Expand Down Expand Up @@ -302,6 +304,8 @@ void State::StartKeepRunning() {
BM_CHECK(!started_ && !finished_);
started_ = true;
total_iterations_ = skipped() ? 0 : max_iterations;
if (BENCHMARK_BUILTIN_EXPECT(profiler_manager_ != nullptr, false))
profiler_manager_->AfterSetupStart();
manager_->StartStopBarrier();
if (!skipped()) ResumeTiming();
}
Expand All @@ -315,6 +319,8 @@ void State::FinishKeepRunning() {
total_iterations_ = 0;
finished_ = true;
manager_->StartStopBarrier();
if (BENCHMARK_BUILTIN_EXPECT(profiler_manager_ != nullptr, false))
profiler_manager_->BeforeTeardownStop();
}

namespace internal {
Expand Down
9 changes: 5 additions & 4 deletions src/benchmark_api_internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -92,25 +92,26 @@ BenchmarkInstance::BenchmarkInstance(Benchmark* benchmark, int family_idx,
State BenchmarkInstance::Run(
IterationCount iters, int thread_id, internal::ThreadTimer* timer,
internal::ThreadManager* manager,
internal::PerfCountersMeasurement* perf_counters_measurement) const {
internal::PerfCountersMeasurement* perf_counters_measurement,
ProfilerManager* profiler_manager) const {
State st(name_.function_name, iters, args_, thread_id, threads_, timer,
manager, perf_counters_measurement);
manager, perf_counters_measurement, profiler_manager);
benchmark_.Run(st);
return st;
}

void BenchmarkInstance::Setup() const {
if (setup_) {
State st(name_.function_name, /*iters*/ 1, args_, /*thread_id*/ 0, threads_,
nullptr, nullptr, nullptr);
nullptr, nullptr, nullptr, nullptr);
setup_(st);
}
}

void BenchmarkInstance::Teardown() const {
if (teardown_) {
State st(name_.function_name, /*iters*/ 1, args_, /*thread_id*/ 0, threads_,
nullptr, nullptr, nullptr);
nullptr, nullptr, nullptr, nullptr);
teardown_(st);
}
}
Expand Down
3 changes: 2 additions & 1 deletion src/benchmark_api_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ class BenchmarkInstance {

State Run(IterationCount iters, int thread_id, internal::ThreadTimer* timer,
internal::ThreadManager* manager,
internal::PerfCountersMeasurement* perf_counters_measurement) const;
internal::PerfCountersMeasurement* perf_counters_measurement,
ProfilerManager* profiler_manager) const;

private:
BenchmarkName name_;
Expand Down
21 changes: 12 additions & 9 deletions src/benchmark_runner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -125,14 +125,15 @@ BenchmarkReporter::Run CreateRunReport(
// Adds the stats collected for the thread into manager->results.
void RunInThread(const BenchmarkInstance* b, IterationCount iters,
int thread_id, ThreadManager* manager,
PerfCountersMeasurement* perf_counters_measurement) {
PerfCountersMeasurement* perf_counters_measurement,
ProfilerManager* profiler_manager) {
internal::ThreadTimer timer(
b->measure_process_cpu_time()
? internal::ThreadTimer::CreateProcessCpuTime()
: internal::ThreadTimer::Create());

State st =
b->Run(iters, thread_id, &timer, manager, perf_counters_measurement);
State st = b->Run(iters, thread_id, &timer, manager,
perf_counters_measurement, profiler_manager);
BM_CHECK(st.skipped() || st.iterations() >= st.max_iterations)
<< "Benchmark returned before State::KeepRunning() returned false!";
{
Expand Down Expand Up @@ -268,12 +269,14 @@ BenchmarkRunner::IterationResults BenchmarkRunner::DoNIterations() {
// Run all but one thread in separate threads
for (std::size_t ti = 0; ti < pool.size(); ++ti) {
pool[ti] = std::thread(&RunInThread, &b, iters, static_cast<int>(ti + 1),
manager.get(), perf_counters_measurement_ptr);
manager.get(), perf_counters_measurement_ptr,
/*profiler_manager=*/nullptr);
}
// And run one thread here directly.
// (If we were asked to run just one thread, we don't create new threads.)
// Yes, we need to do this here *after* we start the separate threads.
RunInThread(&b, iters, 0, manager.get(), perf_counters_measurement_ptr);
RunInThread(&b, iters, 0, manager.get(), perf_counters_measurement_ptr,
/*profiler_manager=*/nullptr);

// The main thread has finished. Now let's wait for the other threads.
manager->WaitForAllThreads();
Expand Down Expand Up @@ -415,7 +418,8 @@ MemoryManager::Result* BenchmarkRunner::RunMemoryManager(
manager.reset(new internal::ThreadManager(1));
b.Setup();
RunInThread(&b, memory_iterations, 0, manager.get(),
perf_counters_measurement_ptr);
perf_counters_measurement_ptr,
/*profiler_manager=*/nullptr);
manager->WaitForAllThreads();
manager.reset();
b.Teardown();
Expand All @@ -429,11 +433,10 @@ void BenchmarkRunner::RunProfilerManager() {
std::unique_ptr<internal::ThreadManager> manager;
manager.reset(new internal::ThreadManager(1));
b.Setup();
profiler_manager->AfterSetupStart();
RunInThread(&b, profile_iterations, 0, manager.get(),
/*perf_counters_measurement_ptr=*/nullptr);
/*perf_counters_measurement_ptr=*/nullptr,
/*profiler_manager=*/profiler_manager);
manager->WaitForAllThreads();
profiler_manager->BeforeTeardownStop();
manager.reset();
b.Teardown();
}
Expand Down
13 changes: 10 additions & 3 deletions test/profiler_manager_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,12 @@
#include "output_test.h"

class TestProfilerManager : public benchmark::ProfilerManager {
void AfterSetupStart() override {}
void BeforeTeardownStop() override {}
public:
void AfterSetupStart() override { ++start_called; }
void BeforeTeardownStop() override { ++stop_called; }

int start_called = 0;
int stop_called = 0;
};

void BM_empty(benchmark::State& state) {
Expand Down Expand Up @@ -35,9 +39,12 @@ ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_empty\",$"},
ADD_CASES(TC_CSVOut, {{"^\"BM_empty\",%csv_report$"}});

int main(int argc, char* argv[]) {
std::unique_ptr<benchmark::ProfilerManager> pm(new TestProfilerManager());
std::unique_ptr<TestProfilerManager> pm(new TestProfilerManager());

benchmark::RegisterProfilerManager(pm.get());
RunOutputTests(argc, argv);
benchmark::RegisterProfilerManager(nullptr);

assert(pm->start_called == 1);
assert(pm->stop_called == 1);
}

0 comments on commit db498ec

Please sign in to comment.