Skip to content

Commit

Permalink
Dynamically detect PMU capabilities through libpfm
Browse files Browse the repository at this point in the history
- Instead of allowing for up to 3 counters, libpfm's internal
  capabilities of reporting PMU info are used to manage a per-PMU
  "registry" and dynamically allocate "slots" according to the specific
  counters requested.
- per-PMU information is obtained, where each PMU reports its own
  capabilities in the form of fixed/non-fixed counter limits.
- In this PR/commit, it is *still* impossible to get more detailed
  (x86-only) counter information in terms of fixed/non-fixed counter
  association, due to what seems to be a lack of API surface on libpfm
  itself: https://sourceforge.net/p/perfmon2/mailman/message/37631173/
- The maximal number of counters is bumped from 3 to 63, which together
  with the current padding "scheme" means we pre-allocate/inlline up-to
  64 counter slots (64-bits each) per measurement instance
- Closes #1377
  • Loading branch information
damageboy committed Apr 5, 2022
1 parent 60b16f1 commit cf45b73
Show file tree
Hide file tree
Showing 2 changed files with 134 additions and 26 deletions.
147 changes: 127 additions & 20 deletions src/perf_counters.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,130 @@
#include <vector>

#if defined HAVE_LIBPFM
#include <unordered_map>
#include "perfmon/pfmlib.h"
#include "perfmon/pfmlib_perf_event.h"
#endif

namespace benchmark {
namespace internal {

constexpr size_t PerfCounterValues::kMaxCounters;

#if defined HAVE_LIBPFM

class SinglePMURegistry {
public:
~SinglePMURegistry() = default;
SinglePMURegistry(SinglePMURegistry&&) = default;
SinglePMURegistry(const SinglePMURegistry&) = delete;
SinglePMURegistry& operator=(SinglePMURegistry&&) noexcept;
SinglePMURegistry& operator=(const SinglePMURegistry&) = delete;

SinglePMURegistry(pfm_pmu_t pmu_id)
: pmu_id_(pmu_id), available_counters_(0), available_fixed_counters_(0) {
{
pfm_pmu_info_t pmu_info{};
const auto pfm_pmu = pfm_get_pmu_info(pmu_id, &pmu_info);

if (pfm_pmu != PFM_SUCCESS) {
GetErrorLogInstance() << "Unknown pmu: " << pmu_id << "\n";
return;
}

name_ = pmu_info.name;
desc_ = pmu_info.desc;
available_counters_ = pmu_info.num_cntrs;
available_fixed_counters_ = pmu_info.num_fixed_cntrs;

BM_VLOG(1) << "PMU: " << pmu_id << " " << name_ << " " << desc_ << "\n";
BM_VLOG(1) << " counters: " << available_counters_ << " fixed: " << available_fixed_counters_ << "\n";
}
}

const char* name() const { return name_; }

bool AddCounter(int event_id) {
pfm_event_info_t info{};
const auto pfm_event_info =
pfm_get_event_info(event_id, PFM_OS_PERF_EVENT, &info);

if (pfm_event_info != PFM_SUCCESS) {
GetErrorLogInstance() << "Unknown event id: " << event_id << "\n";
return false;
}

assert(info.pmu == pmu_id_);

if (counter_ids_.find(event_id) != counter_ids_.end()) return true;

assert(std::numeric_limits<int>::max() > counter_ids_.size());
if (static_cast<int>(counter_ids_.size()) >= available_counters_ - 1) {
GetErrorLogInstance() << "Maximal number of counters for PMU " << name_
<< " (" << available_counters_ << ") reached.\n";
return false;
}

counter_ids_.emplace(event_id, info.code);

BM_VLOG(2) << "Registered counter: " << event_id << " (" << info.name << " - " << info.desc
<< ") in pmu " << name_ << " (" << counter_ids_.size() << "/" << available_counters_ << "\n";

return true;
}

private:
pfm_pmu_t pmu_id_;
const char* name_;
const char* desc_;
std::unordered_map<int, uint64_t> counter_ids_;
std::unordered_map<int, uint64_t> fixed_counter_ids_;
int available_counters_;
int available_fixed_counters_;
};

class PMURegistry {
public:
~PMURegistry() = default;
PMURegistry(PMURegistry&&) = default;
PMURegistry(const PMURegistry&) = delete;
PMURegistry& operator=(PMURegistry&&) noexcept;
PMURegistry& operator=(const PMURegistry&) = delete;
PMURegistry() {}

bool EnlistCounter(const std::string& name, struct perf_event_attr &attr_base) {
attr_base.size = sizeof(attr_base);
pfm_perf_encode_arg_t encoding{};
encoding.attr = &attr_base;

const auto pfm_get = pfm_get_os_event_encoding(
name.c_str(), PFM_PLM3, PFM_OS_PERF_EVENT, &encoding);
if (pfm_get != PFM_SUCCESS) {
GetErrorLogInstance() << "Unknown counter name: " << name << "\n";
return false;
}

pfm_event_info_t info{};
const auto pfm_info =
pfm_get_event_info(encoding.idx, PFM_OS_PERF_EVENT, &info);
if (pfm_info != PFM_SUCCESS) {
GetErrorLogInstance()
<< "Unknown counter idx: " << encoding.idx << "(" << name << ")\n";
return false;
}

// Spin-up a new per-PMU sub-registry if needed
if (pmu_registry_.find(info.pmu) == pmu_registry_.end()) {
pmu_registry_.emplace(info.pmu, SinglePMURegistry(info.pmu));
}

auto& single_pmu = pmu_registry_.find(info.pmu)->second;

return single_pmu.AddCounter(info.idx);
}

private:
std::unordered_map<pfm_pmu_t, SinglePMURegistry> pmu_registry_;
};

const bool PerfCounters::kSupported = true;

bool PerfCounters::Initialize() { return pfm_initialize() == PFM_SUCCESS; }
Expand All @@ -38,35 +152,28 @@ PerfCounters PerfCounters::Create(
if (counter_names.empty()) {
return NoCounters();
}
if (counter_names.size() > PerfCounterValues::kMaxCounters) {
GetErrorLogInstance()
<< counter_names.size()
<< " counters were requested. The minimum is 1, the maximum is "
<< PerfCounterValues::kMaxCounters << "\n";
return NoCounters();
}

std::vector<int> counter_ids(counter_names.size());
PMURegistry registry{};

const int mode = PFM_PLM3; // user mode only
for (size_t i = 0; i < counter_names.size(); ++i) {
const bool is_first = i == 0;
struct perf_event_attr attr {};
attr.size = sizeof(attr);
const int group_id = !is_first ? counter_ids[0] : -1;
const auto& name = counter_names[i];
if (name.empty()) {
GetErrorLogInstance() << "A counter name was the empty string\n";
return NoCounters();
}
pfm_perf_encode_arg_t arg{};
arg.attr = &attr;

const int pfm_get =
pfm_get_os_event_encoding(name.c_str(), mode, PFM_OS_PERF_EVENT, &arg);
if (pfm_get != PFM_SUCCESS) {
GetErrorLogInstance() << "Unknown counter name: " << name << "\n";
struct perf_event_attr attr {};
auto ok = registry.EnlistCounter(name, attr);

if (!ok) {
GetErrorLogInstance() << "Failed to register counter: " << name << "\n";
return NoCounters();
}

const bool is_first = i == 0;
const int group_id = !is_first ? counter_ids[0] : -1;

attr.disabled = is_first;
// Note: the man page for perf_event_create suggests inerit = true and
// read_format = PERF_FORMAT_GROUP don't work together, but that's not the
Expand Down
13 changes: 7 additions & 6 deletions src/perf_counters.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,14 @@ namespace internal {
// operator[]) of this object.
class PerfCounterValues {
public:
explicit PerfCounterValues(size_t nr_counters) : nr_counters_(nr_counters) {
BM_CHECK_LE(nr_counters_, kMaxCounters);
explicit PerfCounterValues(int nr_counters)
: nr_counters_(nr_counters)
{
BM_CHECK_LE(nr_counters_, kMaxPreAllocatedCounters);
}

uint64_t operator[](size_t pos) const { return values_[kPadding + pos]; }

static constexpr size_t kMaxCounters = 3;
static constexpr size_t kMaxPreAllocatedCounters = 63;

private:
friend class PerfCounters;
Expand All @@ -67,8 +68,8 @@ class PerfCounterValues {
}

static constexpr size_t kPadding = 1;
std::array<uint64_t, kPadding + kMaxCounters> values_;
const size_t nr_counters_;
std::array<uint64_t, kPadding + kMaxPreAllocatedCounters> values_;
const int nr_counters_;
};

// Collect PMU counters. The object, once constructed, is ready to be used by
Expand Down

0 comments on commit cf45b73

Please sign in to comment.