[NFCI] Make BenchmarkRunner non-internal to it's .cpp file

Currently the lifetime of a single BenchmarkRunner is constrained to a RunBenchmark(), but that will have to change for interleaved benchmark execution, because we'll need to keep it around to not forget how much repetitions of an instance we've done.
parent 520573fe
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#include "benchmark_runner.h" #include "benchmark_runner.h"
#include "benchmark/benchmark.h" #include "benchmark/benchmark.h"
#include "benchmark_api_internal.h" #include "benchmark_api_internal.h"
#include "internal_macros.h" #include "internal_macros.h"
...@@ -106,7 +107,8 @@ BenchmarkReporter::Run CreateRunReport( ...@@ -106,7 +107,8 @@ BenchmarkReporter::Run CreateRunReport(
report.max_bytes_used = memory_result.max_bytes_used; report.max_bytes_used = memory_result.max_bytes_used;
} }
internal::Finish(&report.counters, results.iterations, seconds, b.threads()); internal::Finish(&report.counters, results.iterations, seconds,
b.threads());
} }
return report; return report;
} }
...@@ -137,238 +139,211 @@ void RunInThread(const BenchmarkInstance* b, IterationCount iters, ...@@ -137,238 +139,211 @@ void RunInThread(const BenchmarkInstance* b, IterationCount iters,
manager->NotifyThreadComplete(); manager->NotifyThreadComplete();
} }
class BenchmarkRunner { } // end namespace
public:
BenchmarkRunner(const benchmark::internal::BenchmarkInstance& b_, BenchmarkRunner::BenchmarkRunner(
std::vector<BenchmarkReporter::Run>* complexity_reports_) const benchmark::internal::BenchmarkInstance& b_,
: b(b_), std::vector<BenchmarkReporter::Run>* complexity_reports_)
complexity_reports(complexity_reports_), : b(b_),
min_time(!IsZero(b.min_time()) ? b.min_time() complexity_reports(complexity_reports_),
: FLAGS_benchmark_min_time), min_time(!IsZero(b.min_time()) ? b.min_time() : FLAGS_benchmark_min_time),
repeats(b.repetitions() != 0 ? b.repetitions() repeats(b.repetitions() != 0 ? b.repetitions()
: FLAGS_benchmark_repetitions), : FLAGS_benchmark_repetitions),
has_explicit_iteration_count(b.iterations() != 0), has_explicit_iteration_count(b.iterations() != 0),
pool(b.threads() - 1), pool(b.threads() - 1),
iters(has_explicit_iteration_count ? b.iterations() : 1), iters(has_explicit_iteration_count ? b.iterations() : 1),
perf_counters_measurement( perf_counters_measurement(
PerfCounters::Create(StrSplit(FLAGS_benchmark_perf_counters, ','))), PerfCounters::Create(StrSplit(FLAGS_benchmark_perf_counters, ','))),
perf_counters_measurement_ptr(perf_counters_measurement.IsValid() perf_counters_measurement_ptr(perf_counters_measurement.IsValid()
? &perf_counters_measurement ? &perf_counters_measurement
: nullptr) { : nullptr) {
run_results.display_report_aggregates_only =
(FLAGS_benchmark_report_aggregates_only ||
FLAGS_benchmark_display_aggregates_only);
run_results.file_report_aggregates_only =
FLAGS_benchmark_report_aggregates_only;
if (b.aggregation_report_mode() != internal::ARM_Unspecified) {
run_results.display_report_aggregates_only = run_results.display_report_aggregates_only =
(FLAGS_benchmark_report_aggregates_only || (b.aggregation_report_mode() &
FLAGS_benchmark_display_aggregates_only); internal::ARM_DisplayReportAggregatesOnly);
run_results.file_report_aggregates_only = run_results.file_report_aggregates_only =
FLAGS_benchmark_report_aggregates_only; (b.aggregation_report_mode() & internal::ARM_FileReportAggregatesOnly);
if (b.aggregation_report_mode() != internal::ARM_Unspecified) { CHECK(FLAGS_benchmark_perf_counters.empty() ||
run_results.display_report_aggregates_only = perf_counters_measurement.IsValid())
(b.aggregation_report_mode() & << "Perf counters were requested but could not be set up.";
internal::ARM_DisplayReportAggregatesOnly);
run_results.file_report_aggregates_only =
(b.aggregation_report_mode() & internal::ARM_FileReportAggregatesOnly);
CHECK(FLAGS_benchmark_perf_counters.empty() ||
perf_counters_measurement.IsValid())
<< "Perf counters were requested but could not be set up.";
}
for (int repetition_num = 0; repetition_num < repeats; repetition_num++) {
DoOneRepetition(repetition_num);
}
// Calculate additional statistics
run_results.aggregates_only = ComputeStats(run_results.non_aggregates);
// Maybe calculate complexity report
if (complexity_reports && b.last_benchmark_instance) {
auto additional_run_stats = ComputeBigO(*complexity_reports);
run_results.aggregates_only.insert(run_results.aggregates_only.end(),
additional_run_stats.begin(),
additional_run_stats.end());
complexity_reports->clear();
}
} }
RunResults&& get_results() { return std::move(run_results); } for (int repetition_num = 0; repetition_num < repeats; repetition_num++) {
DoOneRepetition(repetition_num);
private: }
RunResults run_results;
const benchmark::internal::BenchmarkInstance& b; // Calculate additional statistics
std::vector<BenchmarkReporter::Run>* complexity_reports; run_results.aggregates_only = ComputeStats(run_results.non_aggregates);
const double min_time; // Maybe calculate complexity report
const int repeats; if (complexity_reports && b.last_benchmark_instance) {
const bool has_explicit_iteration_count; auto additional_run_stats = ComputeBigO(*complexity_reports);
run_results.aggregates_only.insert(run_results.aggregates_only.end(),
additional_run_stats.begin(),
additional_run_stats.end());
complexity_reports->clear();
}
}
std::vector<std::thread> pool; BenchmarkRunner::IterationResults BenchmarkRunner::DoNIterations() {
VLOG(2) << "Running " << b.name().str() << " for " << iters << "\n";
IterationCount iters; // preserved between repetitions! std::unique_ptr<internal::ThreadManager> manager;
// So only the first repetition has to find/calculate it, manager.reset(new internal::ThreadManager(b.threads()));
// the other repetitions will just use that precomputed iteration count.
PerfCountersMeasurement perf_counters_measurement; // Run all but one thread in separate threads
PerfCountersMeasurement* const perf_counters_measurement_ptr; for (std::size_t ti = 0; ti < pool.size(); ++ti) {
pool[ti] = std::thread(&RunInThread, &b, iters, static_cast<int>(ti + 1),
manager.get(), perf_counters_measurement_ptr);
}
// And run one thread here directly.
// (If we were asked to run just one thread, we don't create new threads.)
// Yes, we need to do this here *after* we start the separate threads.
RunInThread(&b, iters, 0, manager.get(), perf_counters_measurement_ptr);
struct IterationResults { // The main thread has finished. Now let's wait for the other threads.
internal::ThreadManager::Result results; manager->WaitForAllThreads();
IterationCount iters; for (std::thread& thread : pool) thread.join();
double seconds;
};
IterationResults DoNIterations() {
VLOG(2) << "Running " << b.name().str() << " for " << iters << "\n";
std::unique_ptr<internal::ThreadManager> manager; IterationResults i;
manager.reset(new internal::ThreadManager(b.threads())); // Acquire the measurements/counters from the manager, UNDER THE LOCK!
{
MutexLock l(manager->GetBenchmarkMutex());
i.results = manager->results;
}
// Run all but one thread in separate threads // And get rid of the manager.
for (std::size_t ti = 0; ti < pool.size(); ++ti) { manager.reset();
pool[ti] = std::thread(&RunInThread, &b, iters, static_cast<int>(ti + 1),
manager.get(), perf_counters_measurement_ptr);
}
// And run one thread here directly.
// (If we were asked to run just one thread, we don't create new threads.)
// Yes, we need to do this here *after* we start the separate threads.
RunInThread(&b, iters, 0, manager.get(), perf_counters_measurement_ptr);
// The main thread has finished. Now let's wait for the other threads. // Adjust real/manual time stats since they were reported per thread.
manager->WaitForAllThreads(); i.results.real_time_used /= b.threads();
for (std::thread& thread : pool) thread.join(); i.results.manual_time_used /= b.threads();
// If we were measuring whole-process CPU usage, adjust the CPU time too.
if (b.measure_process_cpu_time()) i.results.cpu_time_used /= b.threads();
IterationResults i; VLOG(2) << "Ran in " << i.results.cpu_time_used << "/"
// Acquire the measurements/counters from the manager, UNDER THE LOCK! << i.results.real_time_used << "\n";
{
MutexLock l(manager->GetBenchmarkMutex());
i.results = manager->results;
}
// And get rid of the manager. // By using KeepRunningBatch a benchmark can iterate more times than
manager.reset(); // requested, so take the iteration count from i.results.
i.iters = i.results.iterations / b.threads();
// Adjust real/manual time stats since they were reported per thread. // Base decisions off of real time if requested by this benchmark.
i.results.real_time_used /= b.threads(); i.seconds = i.results.cpu_time_used;
i.results.manual_time_used /= b.threads(); if (b.use_manual_time()) {
// If we were measuring whole-process CPU usage, adjust the CPU time too. i.seconds = i.results.manual_time_used;
if (b.measure_process_cpu_time()) i.results.cpu_time_used /= b.threads(); } else if (b.use_real_time()) {
i.seconds = i.results.real_time_used;
}
VLOG(2) << "Ran in " << i.results.cpu_time_used << "/" return i;
<< i.results.real_time_used << "\n"; }
// By using KeepRunningBatch a benchmark can iterate more times than IterationCount BenchmarkRunner::PredictNumItersNeeded(
// requested, so take the iteration count from i.results. const IterationResults& i) const {
i.iters = i.results.iterations / b.threads(); // See how much iterations should be increased by.
// Note: Avoid division by zero with max(seconds, 1ns).
double multiplier = min_time * 1.4 / std::max(i.seconds, 1e-9);
// If our last run was at least 10% of FLAGS_benchmark_min_time then we
// use the multiplier directly.
// Otherwise we use at most 10 times expansion.
// NOTE: When the last run was at least 10% of the min time the max
// expansion should be 14x.
bool is_significant = (i.seconds / min_time) > 0.1;
multiplier = is_significant ? multiplier : std::min(10.0, multiplier);
if (multiplier <= 1.0) multiplier = 2.0;
// So what seems to be the sufficiently-large iteration count? Round up.
const IterationCount max_next_iters = static_cast<IterationCount>(
std::lround(std::max(multiplier * static_cast<double>(i.iters),
static_cast<double>(i.iters) + 1.0)));
// But we do have *some* sanity limits though..
const IterationCount next_iters = std::min(max_next_iters, kMaxIterations);
VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n";
return next_iters; // round up before conversion to integer.
}
// Base decisions off of real time if requested by this benchmark. bool BenchmarkRunner::ShouldReportIterationResults(
i.seconds = i.results.cpu_time_used; const IterationResults& i) const {
if (b.use_manual_time()) { // Determine if this run should be reported;
i.seconds = i.results.manual_time_used; // Either it has run for a sufficient amount of time
} else if (b.use_real_time()) { // or because an error was reported.
i.seconds = i.results.real_time_used; return i.results.has_error_ ||
} i.iters >= kMaxIterations || // Too many iterations already.
i.seconds >= min_time || // The elapsed time is large enough.
// CPU time is specified but the elapsed real time greatly exceeds
// the minimum time.
// Note that user provided timers are except from this sanity check.
((i.results.real_time_used >= 5 * min_time) && !b.use_manual_time());
}
return i; void BenchmarkRunner::DoOneRepetition(int64_t repetition_index) {
const bool is_the_first_repetition = repetition_index == 0;
IterationResults i;
// We *may* be gradually increasing the length (iteration count)
// of the benchmark until we decide the results are significant.
// And once we do, we report those last results and exit.
// Please do note that the if there are repetitions, the iteration count
// is *only* calculated for the *first* repetition, and other repetitions
// simply use that precomputed iteration count.
for (;;) {
i = DoNIterations();
// Do we consider the results to be significant?
// If we are doing repetitions, and the first repetition was already done,
// it has calculated the correct iteration time, so we have run that very
// iteration count just now. No need to calculate anything. Just report.
// Else, the normal rules apply.
const bool results_are_significant = !is_the_first_repetition ||
has_explicit_iteration_count ||
ShouldReportIterationResults(i);
if (results_are_significant) break; // Good, let's report them!
// Nope, bad iteration. Let's re-estimate the hopefully-sufficient
// iteration count, and run the benchmark again...
iters = PredictNumItersNeeded(i);
assert(iters > i.iters &&
"if we did more iterations than we want to do the next time, "
"then we should have accepted the current iteration run.");
} }
IterationCount PredictNumItersNeeded(const IterationResults& i) const { // Oh, one last thing, we need to also produce the 'memory measurements'..
// See how much iterations should be increased by. MemoryManager::Result memory_result;
// Note: Avoid division by zero with max(seconds, 1ns). IterationCount memory_iterations = 0;
double multiplier = min_time * 1.4 / std::max(i.seconds, 1e-9); if (memory_manager != nullptr) {
// If our last run was at least 10% of FLAGS_benchmark_min_time then we // Only run a few iterations to reduce the impact of one-time
// use the multiplier directly. // allocations in benchmarks that are not properly managed.
// Otherwise we use at most 10 times expansion. memory_iterations = std::min<IterationCount>(16, iters);
// NOTE: When the last run was at least 10% of the min time the max memory_manager->Start();
// expansion should be 14x. std::unique_ptr<internal::ThreadManager> manager;
bool is_significant = (i.seconds / min_time) > 0.1; manager.reset(new internal::ThreadManager(1));
multiplier = is_significant ? multiplier : std::min(10.0, multiplier); RunInThread(&b, memory_iterations, 0, manager.get(),
if (multiplier <= 1.0) multiplier = 2.0; perf_counters_measurement_ptr);
manager->WaitForAllThreads();
// So what seems to be the sufficiently-large iteration count? Round up. manager.reset();
const IterationCount max_next_iters = static_cast<IterationCount>(
std::lround(std::max(multiplier * static_cast<double>(i.iters),
static_cast<double>(i.iters) + 1.0)));
// But we do have *some* sanity limits though..
const IterationCount next_iters = std::min(max_next_iters, kMaxIterations);
VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n";
return next_iters; // round up before conversion to integer.
}
bool ShouldReportIterationResults(const IterationResults& i) const { memory_manager->Stop(&memory_result);
// Determine if this run should be reported;
// Either it has run for a sufficient amount of time
// or because an error was reported.
return i.results.has_error_ ||
i.iters >= kMaxIterations || // Too many iterations already.
i.seconds >= min_time || // The elapsed time is large enough.
// CPU time is specified but the elapsed real time greatly exceeds
// the minimum time.
// Note that user provided timers are except from this sanity check.
((i.results.real_time_used >= 5 * min_time) && !b.use_manual_time());
} }
void DoOneRepetition(int64_t repetition_index) { // Ok, now actualy report.
const bool is_the_first_repetition = repetition_index == 0; BenchmarkReporter::Run report =
IterationResults i; CreateRunReport(b, i.results, memory_iterations, memory_result, i.seconds,
repetition_index, repeats);
// We *may* be gradually increasing the length (iteration count)
// of the benchmark until we decide the results are significant.
// And once we do, we report those last results and exit.
// Please do note that the if there are repetitions, the iteration count
// is *only* calculated for the *first* repetition, and other repetitions
// simply use that precomputed iteration count.
for (;;) {
i = DoNIterations();
// Do we consider the results to be significant?
// If we are doing repetitions, and the first repetition was already done,
// it has calculated the correct iteration time, so we have run that very
// iteration count just now. No need to calculate anything. Just report.
// Else, the normal rules apply.
const bool results_are_significant = !is_the_first_repetition ||
has_explicit_iteration_count ||
ShouldReportIterationResults(i);
if (results_are_significant) break; // Good, let's report them!
// Nope, bad iteration. Let's re-estimate the hopefully-sufficient
// iteration count, and run the benchmark again...
iters = PredictNumItersNeeded(i);
assert(iters > i.iters &&
"if we did more iterations than we want to do the next time, "
"then we should have accepted the current iteration run.");
}
// Oh, one last thing, we need to also produce the 'memory measurements'..
MemoryManager::Result memory_result;
IterationCount memory_iterations = 0;
if (memory_manager != nullptr) {
// Only run a few iterations to reduce the impact of one-time
// allocations in benchmarks that are not properly managed.
memory_iterations = std::min<IterationCount>(16, iters);
memory_manager->Start();
std::unique_ptr<internal::ThreadManager> manager;
manager.reset(new internal::ThreadManager(1));
RunInThread(&b, memory_iterations, 0, manager.get(),
perf_counters_measurement_ptr);
manager->WaitForAllThreads();
manager.reset();
memory_manager->Stop(&memory_result);
}
// Ok, now actualy report.
BenchmarkReporter::Run report =
CreateRunReport(b, i.results, memory_iterations, memory_result,
i.seconds, repetition_index, repeats);
if (complexity_reports && !report.error_occurred)
complexity_reports->push_back(report);
run_results.non_aggregates.push_back(report); if (complexity_reports && !report.error_occurred)
} complexity_reports->push_back(report);
};
} // end namespace run_results.non_aggregates.push_back(report);
}
RunResults RunBenchmark( RunResults RunBenchmark(
const benchmark::internal::BenchmarkInstance& b, const benchmark::internal::BenchmarkInstance& b,
......
...@@ -15,8 +15,13 @@ ...@@ -15,8 +15,13 @@
#ifndef BENCHMARK_RUNNER_H_ #ifndef BENCHMARK_RUNNER_H_
#define BENCHMARK_RUNNER_H_ #define BENCHMARK_RUNNER_H_
#include <thread>
#include <vector>
#include "benchmark_api_internal.h" #include "benchmark_api_internal.h"
#include "internal_macros.h" #include "internal_macros.h"
#include "perf_counters.h"
#include "thread_manager.h"
DECLARE_double(benchmark_min_time); DECLARE_double(benchmark_min_time);
...@@ -42,6 +47,46 @@ struct RunResults { ...@@ -42,6 +47,46 @@ struct RunResults {
bool file_report_aggregates_only = false; bool file_report_aggregates_only = false;
}; };
class BenchmarkRunner {
public:
BenchmarkRunner(const benchmark::internal::BenchmarkInstance& b_,
std::vector<BenchmarkReporter::Run>* complexity_reports_);
RunResults&& get_results() { return std::move(run_results); }
private:
RunResults run_results;
const benchmark::internal::BenchmarkInstance& b;
std::vector<BenchmarkReporter::Run>* complexity_reports;
const double min_time;
const int repeats;
const bool has_explicit_iteration_count;
std::vector<std::thread> pool;
IterationCount iters; // preserved between repetitions!
// So only the first repetition has to find/calculate it,
// the other repetitions will just use that precomputed iteration count.
PerfCountersMeasurement perf_counters_measurement;
PerfCountersMeasurement* const perf_counters_measurement_ptr;
struct IterationResults {
internal::ThreadManager::Result results;
IterationCount iters;
double seconds;
};
IterationResults DoNIterations();
IterationCount PredictNumItersNeeded(const IterationResults& i) const;
bool ShouldReportIterationResults(const IterationResults& i) const;
void DoOneRepetition(int64_t repetition_index);
};
RunResults RunBenchmark( RunResults RunBenchmark(
const benchmark::internal::BenchmarkInstance& b, const benchmark::internal::BenchmarkInstance& b,
std::vector<BenchmarkReporter::Run>* complexity_reports); std::vector<BenchmarkReporter::Run>* complexity_reports);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment