Cleanup RunBenchmark code. (#289)

* Cleanup the code for generating and running benchmarks * Rework calculation of real/manual time * Add back TSAN builder

Cleanup RunBenchmark code. (#289)
c6f3f0eb · Eric · GitHub · d038472c · c6f3f0eb · c6f3f0eb
Commit c6f3f0eb authored Sep 06, 2016 by Eric Committed by GitHub Sep 06, 2016
Showing with 104 additions and 93 deletions

.travis.yml .travis.yml +10 -0

benchmark.cc src/benchmark.cc +93 -90

benchmark_api_internal.h src/benchmark_api_internal.h +1 -2

benchmark_register.cc src/benchmark_register.cc +0 -1

No files found.
--- a/.travis.yml
+++ b/.travis.yml
@@ -65,6 +65,16 @@ matrix:
            - COMPILER=clang++-3.8 C_COMPILER=clang-3.8 BUILD_TYPE=Debug
            - LIBCXX_BUILD=1 LIBCXX_SANITIZER=MemoryWithOrigins
            - EXTRA_FLAGS="-stdlib=libc++ -g -O2 -fno-omit-frame-pointer -fsanitize=memory -fsanitize-memory-track-origins"
+        # Clang w/ libc++ and MSAN
+        - compiler: clang
+          addons:
+            apt:
+              packages:
+                clang-3.8
+          env:
+            - COMPILER=clang++-3.8 C_COMPILER=clang-3.8 BUILD_TYPE=RelWithDebInfo
+            - LIBCXX_BUILD=1 LIBCXX_SANITIZER=Thread
+            - EXTRA_FLAGS="-stdlib=libc++ -g -O2 -fno-omit-frame-pointer -fsanitize=thread -fno-sanitize-recover=all"
 before_script:
    - if [ -n "${LIBCXX_BUILD}" ]; then

--- a/src/benchmark.cc
+++ b/src/benchmark.cc
@@ -131,15 +131,18 @@ class ThreadManager {
  }
 public:
-  GUARDED_BY(GetBenchmarkMutex()) double real_time_used = 0;
+  struct Result {
-  GUARDED_BY(GetBenchmarkMutex()) double cpu_time_used = 0;
+    double real_time_used = 0;
-  GUARDED_BY(GetBenchmarkMutex()) double manual_time_used = 0;
+    double cpu_time_used = 0;
-  GUARDED_BY(GetBenchmarkMutex()) int64_t bytes_processed = 0;
+    double manual_time_used = 0;
-  GUARDED_BY(GetBenchmarkMutex()) int64_t items_processed = 0;
+    int64_t bytes_processed = 0;
-  GUARDED_BY(GetBenchmarkMutex()) int complexity_n = 0;
+    int64_t items_processed = 0;
-  GUARDED_BY(GetBenchmarkMutex()) std::string report_label_;
+    int complexity_n = 0;
-  GUARDED_BY(GetBenchmarkMutex()) std::string error_message_;
+    std::string report_label_;
-  GUARDED_BY(GetBenchmarkMutex()) bool has_error_ = false;
+    std::string error_message_;
+    bool has_error_ = false;
+  };
+  GUARDED_BY(GetBenchmarkMutex()) Result results;
 private:
  mutable Mutex benchmark_mutex_;
@@ -211,6 +214,47 @@ class ThreadTimer {
 namespace {
+BenchmarkReporter::Run
+CreateRunReport(const benchmark::internal::Benchmark::Instance& b,
+                const internal::ThreadManager::Result& results,
+                size_t iters, double seconds)
+{
+  // Create report about this benchmark run.
+  BenchmarkReporter::Run report;
+  report.benchmark_name = b.name;
+  report.error_occurred = results.has_error_;
+  report.error_message = results.error_message_;
+  report.report_label = results.report_label_;
+  // Report the total iterations across all threads.
+  report.iterations = static_cast<int64_t>(iters) * b.threads;
+  report.time_unit = b.time_unit;
+  if (!report.error_occurred) {
+    double bytes_per_second = 0;
+    if (results.bytes_processed > 0 && seconds > 0.0) {
+      bytes_per_second = (results.bytes_processed / seconds);
+    }
+    double items_per_second = 0;
+    if (results.items_processed > 0 && seconds > 0.0) {
+      items_per_second = (results.items_processed / seconds);
+    }
+    if (b.use_manual_time) {
+      report.real_accumulated_time = results.manual_time_used;
+    } else {
+      report.real_accumulated_time = results.real_time_used;
+    }
+    report.cpu_accumulated_time = results.cpu_time_used;
+    report.bytes_per_second = bytes_per_second;
+    report.items_per_second = items_per_second;
+    report.complexity_n = results.complexity_n;
+    report.complexity = b.complexity;
+    report.complexity_lambda = b.complexity_lambda;
+  }
+  return report;
+}
 // Execute one thread of benchmark b for the specified number of iterations.
 // Adds the stats collected for the thread into *total.
 void RunInThread(const benchmark::internal::Benchmark::Instance* b,
@@ -223,12 +267,13 @@ void RunInThread(const benchmark::internal::Benchmark::Instance* b,
    "Benchmark returned before State::KeepRunning() returned false!";
  {
    MutexLock l(manager->GetBenchmarkMutex());
-    manager->cpu_time_used += timer.cpu_time_used();
+    internal::ThreadManager::Result& results = manager->results;
-    manager->real_time_used += timer.real_time_used();
+    results.cpu_time_used += timer.cpu_time_used();
-    manager->manual_time_used += timer.manual_time_used();
+    results.real_time_used += timer.real_time_used();
-    manager->bytes_processed += st.bytes_processed();
+    results.manual_time_used += timer.manual_time_used();
-    manager->items_processed += st.items_processed();
+    results.bytes_processed += st.bytes_processed();
-    manager->complexity_n += st.complexity_length_n();
+    results.items_processed += st.items_processed();
+    results.complexity_n += st.complexity_length_n();
  }
  manager->NotifyThreadComplete();
 }
@@ -239,10 +284,8 @@ std::vector<BenchmarkReporter::Run> RunBenchmark(
  std::vector<BenchmarkReporter::Run> reports; // return value
  size_t iters = 1;
-  const int num_threads = b.multithreaded ? b.threads : 1;
+  std::unique_ptr<internal::ThreadManager> manager;
-  std::vector<std::thread> pool;
+  std::vector<std::thread> pool(b.threads - 1);
-  if (num_threads > 1) pool.resize(num_threads -1);
  const int repeats = b.repetitions != 0 ? b.repetitions
                                         : FLAGS_benchmark_repetitions;
  const bool report_aggregates_only = repeats != 1 &&
@@ -250,85 +293,49 @@ std::vector<BenchmarkReporter::Run> RunBenchmark(
        ? FLAGS_benchmark_report_aggregates_only
        : b.report_mode == internal::RM_ReportAggregatesOnly);
  for (int i = 0; i < repeats; i++) {
-    std::string mem;
    for (;;) {
      // Try benchmark
      VLOG(2) << "Running " << b.name << " for " << iters << "\n";
-      internal::ThreadManager manager(num_threads);
+      manager.reset(new internal::ThreadManager(b.threads));
-      if (b.multithreaded) {
+      for (std::size_t ti = 0; ti < pool.size(); ++ti) {
-        // If this is out first iteration of the while(true) loop then the
+        pool[ti] = std::thread(&RunInThread, &b, iters,
-        // threads haven't been started and can't be joined. Otherwise we need
+                               static_cast<int>(ti + 1), manager.get());
-        // to join the thread before replacing them.
-        for (std::thread& thread : pool) {
-          if (thread.joinable())
-            thread.join();
-        }
-        for (std::size_t ti = 0; ti < pool.size(); ++ti) {
-          pool[ti] = std::thread(&RunInThread, &b, iters,
-                                 static_cast<int>(ti + 1), &manager);
-        }
      }
-      RunInThread(&b, iters, 0, &manager);
+      RunInThread(&b, iters, 0, manager.get());
-      manager.WaitForAllThreads();
+      manager->WaitForAllThreads();
-      MutexLock l(manager.GetBenchmarkMutex());
+      for (std::thread& thread : pool)
+        thread.join();
-      const double cpu_accumulated_time = manager.cpu_time_used;
+      internal::ThreadManager::Result results;
-      const double real_accumulated_time = manager.real_time_used / num_threads;
+      {
-      const double manual_accumulated_time = manager.manual_time_used / num_threads;
+        MutexLock l(manager->GetBenchmarkMutex());
+        results = manager->results;
+      }
+      manager.reset();
+      // Adjust real/manual time stats since they were reported per thread.
+      results.real_time_used /= b.threads;
+      results.manual_time_used /= b.threads;
-      VLOG(2) << "Ran in " << cpu_accumulated_time << "/"
+      VLOG(2) << "Ran in " << results.cpu_time_used << "/"
-              << real_accumulated_time << "\n";
+                           << results.real_time_used << "\n";
      // Base decisions off of real time if requested by this benchmark.
-      double seconds = cpu_accumulated_time;
+      double seconds = results.cpu_time_used;
      if (b.use_manual_time) {
-          seconds = manual_accumulated_time;
+          seconds = results.manual_time_used;
      } else if (b.use_real_time) {
-          seconds = real_accumulated_time;
+          seconds = results.real_time_used;
      }
      const double min_time = !IsZero(b.min_time) ? b.min_time
                                                  : FLAGS_benchmark_min_time;
      // If this was the first run, was elapsed time or cpu time large enough?
      // If this is not the first run, go with the current value of iter.
-      if ((i > 0) || manager.has_error_ || (iters >= kMaxIterations) ||
+      if ((i > 0) || results.has_error_ || (iters >= kMaxIterations) ||
-          (seconds >= min_time) || (real_accumulated_time >= 5 * min_time)) {
+          (seconds >= min_time) || (results.real_time_used >= 5 * min_time)) {
-        // Create report about this benchmark run.
+        BenchmarkReporter::Run report = CreateRunReport(b, results, iters, seconds);
-        BenchmarkReporter::Run report;
+        if (!report.error_occurred && b.complexity != oNone)
-        report.benchmark_name = b.name;
+          complexity_reports->push_back(report);
-        report.error_occurred = manager.has_error_;
-        report.error_message = manager.error_message_;
-        report.report_label = manager.report_label_;
-        // Report the total iterations across all threads.
-        report.iterations = static_cast<int64_t>(iters) * b.threads;
-        report.time_unit = b.time_unit;
-        if (!report.error_occurred) {
-          double bytes_per_second = 0;
-          if (manager.bytes_processed > 0 && seconds > 0.0) {
-            bytes_per_second = (manager.bytes_processed / seconds);
-          }
-          double items_per_second = 0;
-          if (manager.items_processed > 0 && seconds > 0.0) {
-            items_per_second = (manager.items_processed / seconds);
-          }
-          if (b.use_manual_time) {
-            report.real_accumulated_time = manual_accumulated_time;
-          } else {
-            report.real_accumulated_time = real_accumulated_time;
-          }
-          report.cpu_accumulated_time = cpu_accumulated_time;
-          report.bytes_per_second = bytes_per_second;
-          report.items_per_second = items_per_second;
-          report.complexity_n = manager.complexity_n;
-          report.complexity = b.complexity;
-          report.complexity_lambda = b.complexity_lambda;
-          if(report.complexity != oNone)
-            complexity_reports->push_back(report);
-        }
        reports.push_back(report);
        break;
      }
@@ -352,10 +359,6 @@ std::vector<BenchmarkReporter::Run> RunBenchmark(
      iters = static_cast<int>(next_iters + 0.5);
    }
  }
-  if (b.multithreaded) {
-    for (std::thread& thread : pool)
-      thread.join();
-  }
  // Calculate additional statistics
  auto stat_reports = ComputeStats(reports);
  if((b.complexity != oNone) && b.last_benchmark_instance) {
@@ -409,9 +412,9 @@ void State::SkipWithError(const char* msg) {
  error_occurred_ = true;
  {
    MutexLock l(manager_->GetBenchmarkMutex());
-    if (manager_->has_error_ == false) {
+    if (manager_->results.has_error_ == false) {
-      manager_->error_message_ = msg;
+      manager_->results.error_message_ = msg;
-      manager_->has_error_ = true;
+      manager_->results.has_error_ = true;
    }
  }
  total_iterations_ = max_iterations;
@@ -425,7 +428,7 @@ void State::SetIterationTime(double seconds)
 void State::SetLabel(const char* label) {
  MutexLock l(manager_->GetBenchmarkMutex());
-  manager_->report_label_ = label;
+  manager_->results.report_label_ = label;
 }
 void State::StartKeepRunning() {

--- a/src/benchmark_api_internal.h
+++ b/src/benchmark_api_internal.h
@@ -27,8 +27,7 @@ struct Benchmark::Instance {
  bool             last_benchmark_instance;
  int              repetitions;
  double           min_time;
-  int              threads;    // Number of concurrent threads to use
+  int              threads;    // Number of concurrent threads to us
-  bool             multithreaded;  // Is benchmark multi-threaded?
 };
 bool FindBenchmarksInternal(const std::string& re,

--- a/src/benchmark_register.cc
+++ b/src/benchmark_register.cc
@@ -151,7 +151,6 @@ bool BenchmarkFamilies::FindBenchmarks(
        instance.complexity = family->complexity_;
        instance.complexity_lambda = family->complexity_lambda_;
        instance.threads = num_threads;
-        instance.multithreaded = !(family->thread_counts_.empty());
        // Add arguments to instance name
        for (auto const& arg : args) {