Unverified Commit e539e807 by Mircea Trofin Committed by GitHub

[PFM] Extend perf counter support to multi-threaded cases. (#1153)

* Extend perf counter support to multi-threaded cases. * Docs update * const-ed Snapshot
parent 7d0d9061
......@@ -13,8 +13,7 @@ This feature is available if:
Unit (PMU),
* The benchmark is compiled with support for collecting counters. Currently,
this requires [libpfm](http://perfmon2.sourceforge.net/) be available at build
time, and
* Currently, there is a limitation that the benchmark be run on one thread.
time
The feature does not require modifying benchmark code. Counter collection is
handled at the boundaries where timer collection is also handled.
......
......@@ -163,8 +163,6 @@ class BenchmarkRunner {
internal::ARM_DisplayReportAggregatesOnly);
run_results.file_report_aggregates_only =
(b.aggregation_report_mode() & internal::ARM_FileReportAggregatesOnly);
CHECK(b.threads() == 1 || !perf_counters_measurement.IsValid())
<< "Perf counters are not supported in multi-threaded cases.\n";
CHECK(FLAGS_benchmark_perf_counters.empty() ||
perf_counters_measurement.IsValid())
<< "Perf counters were requested but could not be set up.";
......
......@@ -67,6 +67,10 @@ PerfCounters PerfCounters::Create(
return NoCounters();
}
attr.disabled = is_first;
// Note: the man page for perf_event_create suggests inerit = true and
// read_format = PERF_FORMAT_GROUP don't work together, but that's not the
// case.
attr.inherit = true;
attr.pinned = is_first;
attr.exclude_kernel = true;
attr.exclude_user = false;
......
......@@ -92,7 +92,7 @@ class PerfCounters final {
// Take a snapshot of the current value of the counters into the provided
// valid PerfCounterValues storage. The values are populated such that:
// names()[i]'s value is (*values)[i]
BENCHMARK_ALWAYS_INLINE bool Snapshot(PerfCounterValues* values) {
BENCHMARK_ALWAYS_INLINE bool Snapshot(PerfCounterValues* values) const {
#ifndef BENCHMARK_OS_WINDOWS
assert(values != nullptr);
assert(IsValid());
......
#include <thread>
#include "../src/perf_counters.h"
#include "gtest/gtest.h"
......@@ -92,4 +94,52 @@ TEST(PerfCountersTest, Read2Counters) {
EXPECT_GT(values2[0], 0);
EXPECT_GT(values2[1], 0);
}
size_t do_work() {
size_t res = 0;
for (size_t i = 0; i < 100000000; ++i) res += i * i;
return res;
}
void measure(size_t threadcount, PerfCounterValues* values1,
PerfCounterValues* values2) {
CHECK_NE(values1, nullptr);
CHECK_NE(values2, nullptr);
std::vector<std::thread> threads(threadcount);
auto work = [&]() { CHECK(do_work() > 1000); };
// We need to first set up the counters, then start the threads, so the
// threads would inherit the counters. But later, we need to first destroy the
// thread pool (so all the work finishes), then measure the counters. So the
// scopes overlap, and we need to explicitly control the scope of the
// threadpool.
auto counters =
PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent3});
for (auto& t : threads) t = std::thread(work);
counters.Snapshot(values1);
for (auto& t : threads) t.join();
counters.Snapshot(values2);
}
TEST(PerfCountersTest, MultiThreaded) {
if (!PerfCounters::kSupported) {
GTEST_SKIP() << "Test skipped because libpfm is not supported.";
}
EXPECT_TRUE(PerfCounters::Initialize());
PerfCounterValues values1(2);
PerfCounterValues values2(2);
measure(2, &values1, &values2);
std::vector<double> D1{static_cast<double>(values2[0] - values1[0]),
static_cast<double>(values2[1] - values1[1])};
measure(4, &values1, &values2);
std::vector<double> D2{static_cast<double>(values2[0] - values1[0]),
static_cast<double>(values2[1] - values1[1])};
// Some extra work will happen on the main thread - like joining the threads
// - so the ratio won't be quite 2.0, but very close.
EXPECT_GE(D2[0], 1.9 * D1[0]);
EXPECT_GE(D2[1], 1.9 * D1[1]);
}
} // namespace
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment