Commit c6f3f0eb by Eric Committed by GitHub

Cleanup RunBenchmark code. (#289)

* Cleanup the code for generating and running benchmarks * Rework calculation of real/manual time * Add back TSAN builder
parent d038472c
...@@ -65,6 +65,16 @@ matrix: ...@@ -65,6 +65,16 @@ matrix:
- COMPILER=clang++-3.8 C_COMPILER=clang-3.8 BUILD_TYPE=Debug - COMPILER=clang++-3.8 C_COMPILER=clang-3.8 BUILD_TYPE=Debug
- LIBCXX_BUILD=1 LIBCXX_SANITIZER=MemoryWithOrigins - LIBCXX_BUILD=1 LIBCXX_SANITIZER=MemoryWithOrigins
- EXTRA_FLAGS="-stdlib=libc++ -g -O2 -fno-omit-frame-pointer -fsanitize=memory -fsanitize-memory-track-origins" - EXTRA_FLAGS="-stdlib=libc++ -g -O2 -fno-omit-frame-pointer -fsanitize=memory -fsanitize-memory-track-origins"
# Clang w/ libc++ and MSAN
- compiler: clang
addons:
apt:
packages:
clang-3.8
env:
- COMPILER=clang++-3.8 C_COMPILER=clang-3.8 BUILD_TYPE=RelWithDebInfo
- LIBCXX_BUILD=1 LIBCXX_SANITIZER=Thread
- EXTRA_FLAGS="-stdlib=libc++ -g -O2 -fno-omit-frame-pointer -fsanitize=thread -fno-sanitize-recover=all"
before_script: before_script:
- if [ -n "${LIBCXX_BUILD}" ]; then - if [ -n "${LIBCXX_BUILD}" ]; then
......
...@@ -131,15 +131,18 @@ class ThreadManager { ...@@ -131,15 +131,18 @@ class ThreadManager {
} }
public: public:
GUARDED_BY(GetBenchmarkMutex()) double real_time_used = 0; struct Result {
GUARDED_BY(GetBenchmarkMutex()) double cpu_time_used = 0; double real_time_used = 0;
GUARDED_BY(GetBenchmarkMutex()) double manual_time_used = 0; double cpu_time_used = 0;
GUARDED_BY(GetBenchmarkMutex()) int64_t bytes_processed = 0; double manual_time_used = 0;
GUARDED_BY(GetBenchmarkMutex()) int64_t items_processed = 0; int64_t bytes_processed = 0;
GUARDED_BY(GetBenchmarkMutex()) int complexity_n = 0; int64_t items_processed = 0;
GUARDED_BY(GetBenchmarkMutex()) std::string report_label_; int complexity_n = 0;
GUARDED_BY(GetBenchmarkMutex()) std::string error_message_; std::string report_label_;
GUARDED_BY(GetBenchmarkMutex()) bool has_error_ = false; std::string error_message_;
bool has_error_ = false;
};
GUARDED_BY(GetBenchmarkMutex()) Result results;
private: private:
mutable Mutex benchmark_mutex_; mutable Mutex benchmark_mutex_;
...@@ -211,6 +214,47 @@ class ThreadTimer { ...@@ -211,6 +214,47 @@ class ThreadTimer {
namespace { namespace {
BenchmarkReporter::Run
CreateRunReport(const benchmark::internal::Benchmark::Instance& b,
const internal::ThreadManager::Result& results,
size_t iters, double seconds)
{
// Create report about this benchmark run.
BenchmarkReporter::Run report;
report.benchmark_name = b.name;
report.error_occurred = results.has_error_;
report.error_message = results.error_message_;
report.report_label = results.report_label_;
// Report the total iterations across all threads.
report.iterations = static_cast<int64_t>(iters) * b.threads;
report.time_unit = b.time_unit;
if (!report.error_occurred) {
double bytes_per_second = 0;
if (results.bytes_processed > 0 && seconds > 0.0) {
bytes_per_second = (results.bytes_processed / seconds);
}
double items_per_second = 0;
if (results.items_processed > 0 && seconds > 0.0) {
items_per_second = (results.items_processed / seconds);
}
if (b.use_manual_time) {
report.real_accumulated_time = results.manual_time_used;
} else {
report.real_accumulated_time = results.real_time_used;
}
report.cpu_accumulated_time = results.cpu_time_used;
report.bytes_per_second = bytes_per_second;
report.items_per_second = items_per_second;
report.complexity_n = results.complexity_n;
report.complexity = b.complexity;
report.complexity_lambda = b.complexity_lambda;
}
return report;
}
// Execute one thread of benchmark b for the specified number of iterations. // Execute one thread of benchmark b for the specified number of iterations.
// Adds the stats collected for the thread into *total. // Adds the stats collected for the thread into *total.
void RunInThread(const benchmark::internal::Benchmark::Instance* b, void RunInThread(const benchmark::internal::Benchmark::Instance* b,
...@@ -223,12 +267,13 @@ void RunInThread(const benchmark::internal::Benchmark::Instance* b, ...@@ -223,12 +267,13 @@ void RunInThread(const benchmark::internal::Benchmark::Instance* b,
"Benchmark returned before State::KeepRunning() returned false!"; "Benchmark returned before State::KeepRunning() returned false!";
{ {
MutexLock l(manager->GetBenchmarkMutex()); MutexLock l(manager->GetBenchmarkMutex());
manager->cpu_time_used += timer.cpu_time_used(); internal::ThreadManager::Result& results = manager->results;
manager->real_time_used += timer.real_time_used(); results.cpu_time_used += timer.cpu_time_used();
manager->manual_time_used += timer.manual_time_used(); results.real_time_used += timer.real_time_used();
manager->bytes_processed += st.bytes_processed(); results.manual_time_used += timer.manual_time_used();
manager->items_processed += st.items_processed(); results.bytes_processed += st.bytes_processed();
manager->complexity_n += st.complexity_length_n(); results.items_processed += st.items_processed();
results.complexity_n += st.complexity_length_n();
} }
manager->NotifyThreadComplete(); manager->NotifyThreadComplete();
} }
...@@ -239,10 +284,8 @@ std::vector<BenchmarkReporter::Run> RunBenchmark( ...@@ -239,10 +284,8 @@ std::vector<BenchmarkReporter::Run> RunBenchmark(
std::vector<BenchmarkReporter::Run> reports; // return value std::vector<BenchmarkReporter::Run> reports; // return value
size_t iters = 1; size_t iters = 1;
const int num_threads = b.multithreaded ? b.threads : 1; std::unique_ptr<internal::ThreadManager> manager;
std::vector<std::thread> pool; std::vector<std::thread> pool(b.threads - 1);
if (num_threads > 1) pool.resize(num_threads -1);
const int repeats = b.repetitions != 0 ? b.repetitions const int repeats = b.repetitions != 0 ? b.repetitions
: FLAGS_benchmark_repetitions; : FLAGS_benchmark_repetitions;
const bool report_aggregates_only = repeats != 1 && const bool report_aggregates_only = repeats != 1 &&
...@@ -250,85 +293,49 @@ std::vector<BenchmarkReporter::Run> RunBenchmark( ...@@ -250,85 +293,49 @@ std::vector<BenchmarkReporter::Run> RunBenchmark(
? FLAGS_benchmark_report_aggregates_only ? FLAGS_benchmark_report_aggregates_only
: b.report_mode == internal::RM_ReportAggregatesOnly); : b.report_mode == internal::RM_ReportAggregatesOnly);
for (int i = 0; i < repeats; i++) { for (int i = 0; i < repeats; i++) {
std::string mem;
for (;;) { for (;;) {
// Try benchmark // Try benchmark
VLOG(2) << "Running " << b.name << " for " << iters << "\n"; VLOG(2) << "Running " << b.name << " for " << iters << "\n";
internal::ThreadManager manager(num_threads); manager.reset(new internal::ThreadManager(b.threads));
if (b.multithreaded) { for (std::size_t ti = 0; ti < pool.size(); ++ti) {
// If this is out first iteration of the while(true) loop then the pool[ti] = std::thread(&RunInThread, &b, iters,
// threads haven't been started and can't be joined. Otherwise we need static_cast<int>(ti + 1), manager.get());
// to join the thread before replacing them.
for (std::thread& thread : pool) {
if (thread.joinable())
thread.join();
}
for (std::size_t ti = 0; ti < pool.size(); ++ti) {
pool[ti] = std::thread(&RunInThread, &b, iters,
static_cast<int>(ti + 1), &manager);
}
} }
RunInThread(&b, iters, 0, &manager); RunInThread(&b, iters, 0, manager.get());
manager.WaitForAllThreads(); manager->WaitForAllThreads();
MutexLock l(manager.GetBenchmarkMutex()); for (std::thread& thread : pool)
thread.join();
const double cpu_accumulated_time = manager.cpu_time_used; internal::ThreadManager::Result results;
const double real_accumulated_time = manager.real_time_used / num_threads; {
const double manual_accumulated_time = manager.manual_time_used / num_threads; MutexLock l(manager->GetBenchmarkMutex());
results = manager->results;
}
manager.reset();
// Adjust real/manual time stats since they were reported per thread.
results.real_time_used /= b.threads;
results.manual_time_used /= b.threads;
VLOG(2) << "Ran in " << cpu_accumulated_time << "/" VLOG(2) << "Ran in " << results.cpu_time_used << "/"
<< real_accumulated_time << "\n"; << results.real_time_used << "\n";
// Base decisions off of real time if requested by this benchmark. // Base decisions off of real time if requested by this benchmark.
double seconds = cpu_accumulated_time; double seconds = results.cpu_time_used;
if (b.use_manual_time) { if (b.use_manual_time) {
seconds = manual_accumulated_time; seconds = results.manual_time_used;
} else if (b.use_real_time) { } else if (b.use_real_time) {
seconds = real_accumulated_time; seconds = results.real_time_used;
} }
const double min_time = !IsZero(b.min_time) ? b.min_time const double min_time = !IsZero(b.min_time) ? b.min_time
: FLAGS_benchmark_min_time; : FLAGS_benchmark_min_time;
// If this was the first run, was elapsed time or cpu time large enough? // If this was the first run, was elapsed time or cpu time large enough?
// If this is not the first run, go with the current value of iter. // If this is not the first run, go with the current value of iter.
if ((i > 0) || manager.has_error_ || (iters >= kMaxIterations) || if ((i > 0) || results.has_error_ || (iters >= kMaxIterations) ||
(seconds >= min_time) || (real_accumulated_time >= 5 * min_time)) { (seconds >= min_time) || (results.real_time_used >= 5 * min_time)) {
// Create report about this benchmark run. BenchmarkReporter::Run report = CreateRunReport(b, results, iters, seconds);
BenchmarkReporter::Run report; if (!report.error_occurred && b.complexity != oNone)
report.benchmark_name = b.name; complexity_reports->push_back(report);
report.error_occurred = manager.has_error_;
report.error_message = manager.error_message_;
report.report_label = manager.report_label_;
// Report the total iterations across all threads.
report.iterations = static_cast<int64_t>(iters) * b.threads;
report.time_unit = b.time_unit;
if (!report.error_occurred) {
double bytes_per_second = 0;
if (manager.bytes_processed > 0 && seconds > 0.0) {
bytes_per_second = (manager.bytes_processed / seconds);
}
double items_per_second = 0;
if (manager.items_processed > 0 && seconds > 0.0) {
items_per_second = (manager.items_processed / seconds);
}
if (b.use_manual_time) {
report.real_accumulated_time = manual_accumulated_time;
} else {
report.real_accumulated_time = real_accumulated_time;
}
report.cpu_accumulated_time = cpu_accumulated_time;
report.bytes_per_second = bytes_per_second;
report.items_per_second = items_per_second;
report.complexity_n = manager.complexity_n;
report.complexity = b.complexity;
report.complexity_lambda = b.complexity_lambda;
if(report.complexity != oNone)
complexity_reports->push_back(report);
}
reports.push_back(report); reports.push_back(report);
break; break;
} }
...@@ -352,10 +359,6 @@ std::vector<BenchmarkReporter::Run> RunBenchmark( ...@@ -352,10 +359,6 @@ std::vector<BenchmarkReporter::Run> RunBenchmark(
iters = static_cast<int>(next_iters + 0.5); iters = static_cast<int>(next_iters + 0.5);
} }
} }
if (b.multithreaded) {
for (std::thread& thread : pool)
thread.join();
}
// Calculate additional statistics // Calculate additional statistics
auto stat_reports = ComputeStats(reports); auto stat_reports = ComputeStats(reports);
if((b.complexity != oNone) && b.last_benchmark_instance) { if((b.complexity != oNone) && b.last_benchmark_instance) {
...@@ -409,9 +412,9 @@ void State::SkipWithError(const char* msg) { ...@@ -409,9 +412,9 @@ void State::SkipWithError(const char* msg) {
error_occurred_ = true; error_occurred_ = true;
{ {
MutexLock l(manager_->GetBenchmarkMutex()); MutexLock l(manager_->GetBenchmarkMutex());
if (manager_->has_error_ == false) { if (manager_->results.has_error_ == false) {
manager_->error_message_ = msg; manager_->results.error_message_ = msg;
manager_->has_error_ = true; manager_->results.has_error_ = true;
} }
} }
total_iterations_ = max_iterations; total_iterations_ = max_iterations;
...@@ -425,7 +428,7 @@ void State::SetIterationTime(double seconds) ...@@ -425,7 +428,7 @@ void State::SetIterationTime(double seconds)
void State::SetLabel(const char* label) { void State::SetLabel(const char* label) {
MutexLock l(manager_->GetBenchmarkMutex()); MutexLock l(manager_->GetBenchmarkMutex());
manager_->report_label_ = label; manager_->results.report_label_ = label;
} }
void State::StartKeepRunning() { void State::StartKeepRunning() {
......
...@@ -27,8 +27,7 @@ struct Benchmark::Instance { ...@@ -27,8 +27,7 @@ struct Benchmark::Instance {
bool last_benchmark_instance; bool last_benchmark_instance;
int repetitions; int repetitions;
double min_time; double min_time;
int threads; // Number of concurrent threads to use int threads; // Number of concurrent threads to us
bool multithreaded; // Is benchmark multi-threaded?
}; };
bool FindBenchmarksInternal(const std::string& re, bool FindBenchmarksInternal(const std::string& re,
......
...@@ -151,7 +151,6 @@ bool BenchmarkFamilies::FindBenchmarks( ...@@ -151,7 +151,6 @@ bool BenchmarkFamilies::FindBenchmarks(
instance.complexity = family->complexity_; instance.complexity = family->complexity_;
instance.complexity_lambda = family->complexity_lambda_; instance.complexity_lambda = family->complexity_lambda_;
instance.threads = num_threads; instance.threads = num_threads;
instance.multithreaded = !(family->thread_counts_.empty());
// Add arguments to instance name // Add arguments to instance name
for (auto const& arg : args) { for (auto const& arg : args) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment