Unverified Commit fbc31405 by Roman Lebedev Committed by GitHub

Random interleaving of benchmark repetitions - the sequel (fixes #1051) (#1163)

Inspired by the original implementation by Hai Huang @haih-g from https://github.com/google/benchmark/pull/1105. The original implementation had design deficiencies that weren't really addressable without redesign, so it was reverted. In essence, the original implementation consisted of two separateable parts: * reducing the amount time each repetition is run for, and symmetrically increasing repetition count * running the repetitions in random order While it worked fine for the usual case, it broke down when user would specify repetitions (it would completely ignore that request), or specified per-repetition min time (while it would still adjust the repetition count, it would not adjust the per-repetition time, leading to much greater run times) Here, like i was originally suggesting in the original review, i'm separating the features, and only dealing with a single one - running repetitions in random order. Now that the runs/repetitions are no longer in-order, the tooling may wish to sort the output, and indeed `compare.py` has been updated to do that: #1168.
parent d17ea665
...@@ -299,6 +299,8 @@ too (`-lkstat`). ...@@ -299,6 +299,8 @@ too (`-lkstat`).
[Setting the Time Unit](#setting-the-time-unit) [Setting the Time Unit](#setting-the-time-unit)
[Random Interleaving](docs/random_interleaving.md)
[User-Requested Performance Counters](docs/perf_counters.md) [User-Requested Performance Counters](docs/perf_counters.md)
[Preventing Optimization](#preventing-optimization) [Preventing Optimization](#preventing-optimization)
......
<a name="interleaving" />
# Random Interleaving
[Random Interleaving](https://github.com/google/benchmark/issues/1051) is a
technique to lower run-to-run variance. It randomly interleaves repetitions of a
microbenchmark with repetitions from other microbenchmarks in the same benchmark
test. Data shows it is able to lower run-to-run variance by
[40%](https://github.com/google/benchmark/issues/1051) on average.
To use, you mainly need to set `--benchmark_enable_random_interleaving=true`,
and optionally specify non-zero repetition count `--benchmark_repetitions=9`
and optionally decrease the per-repetition time `--benchmark_min_time=0.1`.
...@@ -1472,6 +1472,19 @@ class BenchmarkReporter { ...@@ -1472,6 +1472,19 @@ class BenchmarkReporter {
int64_t max_bytes_used; int64_t max_bytes_used;
}; };
struct PerFamilyRunReports {
PerFamilyRunReports() : num_runs_total(0), num_runs_done(0) {}
// How many runs will all instances of this benchmark perform?
int num_runs_total;
// How many runs have happened already?
int num_runs_done;
// The reports about (non-errneous!) runs of this family.
std::vector<BenchmarkReporter::Run> Runs;
};
// Construct a BenchmarkReporter with the output stream set to 'std::cout' // Construct a BenchmarkReporter with the output stream set to 'std::cout'
// and the error stream set to 'std::cerr' // and the error stream set to 'std::cerr'
BenchmarkReporter(); BenchmarkReporter();
......
...@@ -33,8 +33,10 @@ ...@@ -33,8 +33,10 @@
#include <cstdlib> #include <cstdlib>
#include <fstream> #include <fstream>
#include <iostream> #include <iostream>
#include <limits>
#include <map> #include <map>
#include <memory> #include <memory>
#include <random>
#include <string> #include <string>
#include <thread> #include <thread>
#include <utility> #include <utility>
...@@ -73,6 +75,10 @@ DEFINE_double(benchmark_min_time, 0.5); ...@@ -73,6 +75,10 @@ DEFINE_double(benchmark_min_time, 0.5);
// standard deviation of the runs will be reported. // standard deviation of the runs will be reported.
DEFINE_int32(benchmark_repetitions, 1); DEFINE_int32(benchmark_repetitions, 1);
// If set, enable random interleaving of repetitions of all benchmarks.
// See http://github.com/google/benchmark/issues/1051 for details.
DEFINE_bool(benchmark_enable_random_interleaving, false);
// Report the result of each benchmark repetitions. When 'true' is specified // Report the result of each benchmark repetitions. When 'true' is specified
// only the mean, standard deviation, and other statistics are reported for // only the mean, standard deviation, and other statistics are reported for
// repeated benchmarks. Affects all reporters. // repeated benchmarks. Affects all reporters.
...@@ -297,23 +303,68 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks, ...@@ -297,23 +303,68 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
context.name_field_width = name_field_width; context.name_field_width = name_field_width;
// Keep track of running times of all instances of each benchmark family. // Keep track of running times of all instances of each benchmark family.
std::map<int /*family_index*/, std::vector<BenchmarkReporter::Run>> std::map<int /*family_index*/, BenchmarkReporter::PerFamilyRunReports>
complexity_reports; per_family_reports;
if (display_reporter->ReportContext(context) && if (display_reporter->ReportContext(context) &&
(!file_reporter || file_reporter->ReportContext(context))) { (!file_reporter || file_reporter->ReportContext(context))) {
FlushStreams(display_reporter); FlushStreams(display_reporter);
FlushStreams(file_reporter); FlushStreams(file_reporter);
size_t num_repetitions_total = 0;
std::vector<internal::BenchmarkRunner> runners;
runners.reserve(benchmarks.size());
for (const BenchmarkInstance& benchmark : benchmarks) { for (const BenchmarkInstance& benchmark : benchmarks) {
std::vector<BenchmarkReporter::Run>* complexity_reports_for_family = BenchmarkReporter::PerFamilyRunReports* reports_for_family = nullptr;
nullptr;
if (benchmark.complexity() != oNone) if (benchmark.complexity() != oNone)
complexity_reports_for_family = reports_for_family = &per_family_reports[benchmark.family_index()];
&complexity_reports[benchmark.family_index()];
runners.emplace_back(benchmark, reports_for_family);
int num_repeats_of_this_instance = runners.back().GetNumRepeats();
num_repetitions_total += num_repeats_of_this_instance;
if (reports_for_family)
reports_for_family->num_runs_total += num_repeats_of_this_instance;
}
assert(runners.size() == benchmarks.size() && "Unexpected runner count.");
std::vector<int> repetition_indices;
repetition_indices.reserve(num_repetitions_total);
for (size_t runner_index = 0, num_runners = runners.size();
runner_index != num_runners; ++runner_index) {
const internal::BenchmarkRunner& runner = runners[runner_index];
std::fill_n(std::back_inserter(repetition_indices),
runner.GetNumRepeats(), runner_index);
}
assert(repetition_indices.size() == num_repetitions_total &&
"Unexpected number of repetition indexes.");
if (FLAGS_benchmark_enable_random_interleaving) {
std::random_device rd;
std::mt19937 g(rd());
std::shuffle(repetition_indices.begin(), repetition_indices.end(), g);
}
RunResults run_results = for (size_t repetition_index : repetition_indices) {
RunBenchmark(benchmark, complexity_reports_for_family); internal::BenchmarkRunner& runner = runners[repetition_index];
runner.DoOneRepetition();
if (runner.HasRepeatsRemaining()) continue;
// FIXME: report each repetition separately, not all of them in bulk.
RunResults run_results = runner.GetResults();
// Maybe calculate complexity report
if (const auto* reports_for_family = runner.GetReportsForFamily()) {
if (reports_for_family->num_runs_done ==
reports_for_family->num_runs_total) {
auto additional_run_stats = ComputeBigO(reports_for_family->Runs);
run_results.aggregates_only.insert(run_results.aggregates_only.end(),
additional_run_stats.begin(),
additional_run_stats.end());
per_family_reports.erase(
(int)reports_for_family->Runs.front().family_index);
}
}
Report(display_reporter, file_reporter, run_results); Report(display_reporter, file_reporter, run_results);
} }
...@@ -471,6 +522,7 @@ void PrintUsageAndExit() { ...@@ -471,6 +522,7 @@ void PrintUsageAndExit() {
" [--benchmark_filter=<regex>]\n" " [--benchmark_filter=<regex>]\n"
" [--benchmark_min_time=<min_time>]\n" " [--benchmark_min_time=<min_time>]\n"
" [--benchmark_repetitions=<num_repetitions>]\n" " [--benchmark_repetitions=<num_repetitions>]\n"
" [--benchmark_enable_random_interleaving={true|false}]\n"
" [--benchmark_report_aggregates_only={true|false}]\n" " [--benchmark_report_aggregates_only={true|false}]\n"
" [--benchmark_display_aggregates_only={true|false}]\n" " [--benchmark_display_aggregates_only={true|false}]\n"
" [--benchmark_format=<console|json|csv>]\n" " [--benchmark_format=<console|json|csv>]\n"
...@@ -495,6 +547,8 @@ void ParseCommandLineFlags(int* argc, char** argv) { ...@@ -495,6 +547,8 @@ void ParseCommandLineFlags(int* argc, char** argv) {
&FLAGS_benchmark_min_time) || &FLAGS_benchmark_min_time) ||
ParseInt32Flag(argv[i], "benchmark_repetitions", ParseInt32Flag(argv[i], "benchmark_repetitions",
&FLAGS_benchmark_repetitions) || &FLAGS_benchmark_repetitions) ||
ParseBoolFlag(argv[i], "benchmark_enable_random_interleaving",
&FLAGS_benchmark_enable_random_interleaving) ||
ParseBoolFlag(argv[i], "benchmark_report_aggregates_only", ParseBoolFlag(argv[i], "benchmark_report_aggregates_only",
&FLAGS_benchmark_report_aggregates_only) || &FLAGS_benchmark_report_aggregates_only) ||
ParseBoolFlag(argv[i], "benchmark_display_aggregates_only", ParseBoolFlag(argv[i], "benchmark_display_aggregates_only",
......
...@@ -39,8 +39,6 @@ class BenchmarkInstance { ...@@ -39,8 +39,6 @@ class BenchmarkInstance {
IterationCount iterations() const { return iterations_; } IterationCount iterations() const { return iterations_; }
int threads() const { return threads_; } int threads() const { return threads_; }
bool last_benchmark_instance;
State Run(IterationCount iters, int thread_id, internal::ThreadTimer* timer, State Run(IterationCount iters, int thread_id, internal::ThreadTimer* timer,
internal::ThreadManager* manager, internal::ThreadManager* manager,
internal::PerfCountersMeasurement* perf_counters_measurement) const; internal::PerfCountersMeasurement* perf_counters_measurement) const;
......
...@@ -166,7 +166,6 @@ bool BenchmarkFamilies::FindBenchmarks( ...@@ -166,7 +166,6 @@ bool BenchmarkFamilies::FindBenchmarks(
const auto full_name = instance.name().str(); const auto full_name = instance.name().str();
if ((re.Match(full_name) && !isNegativeFilter) || if ((re.Match(full_name) && !isNegativeFilter) ||
(!re.Match(full_name) && isNegativeFilter)) { (!re.Match(full_name) && isNegativeFilter)) {
instance.last_benchmark_instance = (&args == &family->args_.back());
benchmarks->push_back(std::move(instance)); benchmarks->push_back(std::move(instance));
++per_family_instance_index; ++per_family_instance_index;
......
...@@ -143,9 +143,9 @@ void RunInThread(const BenchmarkInstance* b, IterationCount iters, ...@@ -143,9 +143,9 @@ void RunInThread(const BenchmarkInstance* b, IterationCount iters,
BenchmarkRunner::BenchmarkRunner( BenchmarkRunner::BenchmarkRunner(
const benchmark::internal::BenchmarkInstance& b_, const benchmark::internal::BenchmarkInstance& b_,
std::vector<BenchmarkReporter::Run>* complexity_reports_) BenchmarkReporter::PerFamilyRunReports* reports_for_family_)
: b(b_), : b(b_),
complexity_reports(complexity_reports_), reports_for_family(reports_for_family_),
min_time(!IsZero(b.min_time()) ? b.min_time() : FLAGS_benchmark_min_time), min_time(!IsZero(b.min_time()) ? b.min_time() : FLAGS_benchmark_min_time),
repeats(b.repetitions() != 0 ? b.repetitions() repeats(b.repetitions() != 0 ? b.repetitions()
: FLAGS_benchmark_repetitions), : FLAGS_benchmark_repetitions),
...@@ -172,22 +172,6 @@ BenchmarkRunner::BenchmarkRunner( ...@@ -172,22 +172,6 @@ BenchmarkRunner::BenchmarkRunner(
perf_counters_measurement.IsValid()) perf_counters_measurement.IsValid())
<< "Perf counters were requested but could not be set up."; << "Perf counters were requested but could not be set up.";
} }
for (int repetition_num = 0; repetition_num < repeats; repetition_num++) {
DoOneRepetition(repetition_num);
}
// Calculate additional statistics
run_results.aggregates_only = ComputeStats(run_results.non_aggregates);
// Maybe calculate complexity report
if (complexity_reports && b.last_benchmark_instance) {
auto additional_run_stats = ComputeBigO(*complexity_reports);
run_results.aggregates_only.insert(run_results.aggregates_only.end(),
additional_run_stats.begin(),
additional_run_stats.end());
complexity_reports->clear();
}
} }
BenchmarkRunner::IterationResults BenchmarkRunner::DoNIterations() { BenchmarkRunner::IterationResults BenchmarkRunner::DoNIterations() {
...@@ -283,8 +267,10 @@ bool BenchmarkRunner::ShouldReportIterationResults( ...@@ -283,8 +267,10 @@ bool BenchmarkRunner::ShouldReportIterationResults(
((i.results.real_time_used >= 5 * min_time) && !b.use_manual_time()); ((i.results.real_time_used >= 5 * min_time) && !b.use_manual_time());
} }
void BenchmarkRunner::DoOneRepetition(int64_t repetition_index) { void BenchmarkRunner::DoOneRepetition() {
const bool is_the_first_repetition = repetition_index == 0; assert(HasRepeatsRemaining() && "Already done all repetitions?");
const bool is_the_first_repetition = num_repetitions_done == 0;
IterationResults i; IterationResults i;
// We *may* be gradually increasing the length (iteration count) // We *may* be gradually increasing the length (iteration count)
...@@ -337,19 +323,25 @@ void BenchmarkRunner::DoOneRepetition(int64_t repetition_index) { ...@@ -337,19 +323,25 @@ void BenchmarkRunner::DoOneRepetition(int64_t repetition_index) {
// Ok, now actualy report. // Ok, now actualy report.
BenchmarkReporter::Run report = BenchmarkReporter::Run report =
CreateRunReport(b, i.results, memory_iterations, memory_result, i.seconds, CreateRunReport(b, i.results, memory_iterations, memory_result, i.seconds,
repetition_index, repeats); num_repetitions_done, repeats);
if (complexity_reports && !report.error_occurred) if (reports_for_family) {
complexity_reports->push_back(report); ++reports_for_family->num_runs_done;
if (!report.error_occurred) reports_for_family->Runs.push_back(report);
}
run_results.non_aggregates.push_back(report); run_results.non_aggregates.push_back(report);
++num_repetitions_done;
} }
RunResults RunBenchmark( RunResults&& BenchmarkRunner::GetResults() {
const benchmark::internal::BenchmarkInstance& b, assert(!HasRepeatsRemaining() && "Did not run all repetitions yet?");
std::vector<BenchmarkReporter::Run>* complexity_reports) {
internal::BenchmarkRunner r(b, complexity_reports); // Calculate additional statistics over the repetitions of this instance.
return r.get_results(); run_results.aggregates_only = ComputeStats(run_results.non_aggregates);
return std::move(run_results);
} }
} // end namespace internal } // end namespace internal
......
...@@ -50,20 +50,34 @@ struct RunResults { ...@@ -50,20 +50,34 @@ struct RunResults {
class BenchmarkRunner { class BenchmarkRunner {
public: public:
BenchmarkRunner(const benchmark::internal::BenchmarkInstance& b_, BenchmarkRunner(const benchmark::internal::BenchmarkInstance& b_,
std::vector<BenchmarkReporter::Run>* complexity_reports_); BenchmarkReporter::PerFamilyRunReports* reports_for_family);
RunResults&& get_results() { return std::move(run_results); } int GetNumRepeats() const { return repeats; }
bool HasRepeatsRemaining() const {
return GetNumRepeats() != num_repetitions_done;
}
void DoOneRepetition();
RunResults&& GetResults();
BenchmarkReporter::PerFamilyRunReports* GetReportsForFamily() const {
return reports_for_family;
};
private: private:
RunResults run_results; RunResults run_results;
const benchmark::internal::BenchmarkInstance& b; const benchmark::internal::BenchmarkInstance& b;
std::vector<BenchmarkReporter::Run>* complexity_reports; BenchmarkReporter::PerFamilyRunReports* reports_for_family;
const double min_time; const double min_time;
const int repeats; const int repeats;
const bool has_explicit_iteration_count; const bool has_explicit_iteration_count;
int num_repetitions_done = 0;
std::vector<std::thread> pool; std::vector<std::thread> pool;
IterationCount iters; // preserved between repetitions! IterationCount iters; // preserved between repetitions!
...@@ -83,14 +97,8 @@ class BenchmarkRunner { ...@@ -83,14 +97,8 @@ class BenchmarkRunner {
IterationCount PredictNumItersNeeded(const IterationResults& i) const; IterationCount PredictNumItersNeeded(const IterationResults& i) const;
bool ShouldReportIterationResults(const IterationResults& i) const; bool ShouldReportIterationResults(const IterationResults& i) const;
void DoOneRepetition(int64_t repetition_index);
}; };
RunResults RunBenchmark(
const benchmark::internal::BenchmarkInstance& b,
std::vector<BenchmarkReporter::Run>* complexity_reports);
} // namespace internal } // namespace internal
} // end namespace benchmark } // end namespace benchmark
......
...@@ -199,6 +199,7 @@ if (BENCHMARK_ENABLE_GTEST_TESTS) ...@@ -199,6 +199,7 @@ if (BENCHMARK_ENABLE_GTEST_TESTS)
add_gtest(benchmark_gtest) add_gtest(benchmark_gtest)
add_gtest(benchmark_name_gtest) add_gtest(benchmark_name_gtest)
add_gtest(benchmark_random_interleaving_gtest)
add_gtest(commandlineflags_gtest) add_gtest(commandlineflags_gtest)
add_gtest(statistics_gtest) add_gtest(statistics_gtest)
add_gtest(string_util_gtest) add_gtest(string_util_gtest)
......
#include <queue>
#include <string>
#include <vector>
#include "../src/commandlineflags.h"
#include "../src/string_util.h"
#include "benchmark/benchmark.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
DECLARE_bool(benchmark_enable_random_interleaving);
DECLARE_string(benchmark_filter);
DECLARE_int32(benchmark_repetitions);
namespace benchmark {
namespace internal {
namespace {
class EventQueue : public std::queue<std::string> {
public:
void Put(const std::string& event) { push(event); }
void Clear() {
while (!empty()) {
pop();
}
}
std::string Get() {
std::string event = front();
pop();
return event;
}
};
static EventQueue* queue = new EventQueue;
class NullReporter : public BenchmarkReporter {
public:
bool ReportContext(const Context& /*context*/) override { return true; }
void ReportRuns(const std::vector<Run>& /* report */) override {}
};
class BenchmarkTest : public testing::Test {
public:
static void SetupHook(int /* num_threads */) { queue->push("Setup"); }
static void TeardownHook(int /* num_threads */) { queue->push("Teardown"); }
void Execute(const std::string& pattern) {
queue->Clear();
BenchmarkReporter* reporter = new NullReporter;
FLAGS_benchmark_filter = pattern;
RunSpecifiedBenchmarks(reporter);
delete reporter;
queue->Put("DONE"); // End marker
}
};
static void BM_Match1(benchmark::State& state) {
const int64_t arg = state.range(0);
for (auto _ : state) {
}
queue->Put(StrFormat("BM_Match1/%d", static_cast<int>(arg)));
}
BENCHMARK(BM_Match1)
->Iterations(100)
->Arg(1)
->Arg(2)
->Arg(3)
->Range(10, 80)
->Args({90})
->Args({100});
TEST_F(BenchmarkTest, Match1) {
Execute("BM_Match1");
ASSERT_EQ("BM_Match1/1", queue->Get());
ASSERT_EQ("BM_Match1/2", queue->Get());
ASSERT_EQ("BM_Match1/3", queue->Get());
ASSERT_EQ("BM_Match1/10", queue->Get());
ASSERT_EQ("BM_Match1/64", queue->Get());
ASSERT_EQ("BM_Match1/80", queue->Get());
ASSERT_EQ("BM_Match1/90", queue->Get());
ASSERT_EQ("BM_Match1/100", queue->Get());
ASSERT_EQ("DONE", queue->Get());
}
TEST_F(BenchmarkTest, Match1WithRepetition) {
FLAGS_benchmark_repetitions = 2;
Execute("BM_Match1/(64|80)");
ASSERT_EQ("BM_Match1/64", queue->Get());
ASSERT_EQ("BM_Match1/64", queue->Get());
ASSERT_EQ("BM_Match1/80", queue->Get());
ASSERT_EQ("BM_Match1/80", queue->Get());
ASSERT_EQ("DONE", queue->Get());
}
TEST_F(BenchmarkTest, Match1WithRandomInterleaving) {
FLAGS_benchmark_enable_random_interleaving = true;
FLAGS_benchmark_repetitions = 100;
std::map<std::string, int> element_count;
std::map<std::string, int> interleaving_count;
Execute("BM_Match1/(64|80)");
for (int i = 0; i < 100; ++i) {
std::vector<std::string> interleaving;
interleaving.push_back(queue->Get());
interleaving.push_back(queue->Get());
element_count[interleaving[0].c_str()]++;
element_count[interleaving[1].c_str()]++;
interleaving_count[StrFormat("%s,%s", interleaving[0].c_str(),
interleaving[1].c_str())]++;
}
EXPECT_EQ(element_count["BM_Match1/64"], 100) << "Unexpected repetitions.";
EXPECT_EQ(element_count["BM_Match1/80"], 100) << "Unexpected repetitions.";
EXPECT_GE(interleaving_count.size(), 2) << "Interleaving was not randomized.";
ASSERT_EQ("DONE", queue->Get());
}
} // namespace
} // namespace internal
} // namespace benchmark
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment