Commit cba945e3 by Eric Committed by GitHub

Make `PauseTiming()` and `ResumeTiming()` per thread. (#286)

* Change to using per-thread timers * fix bad assertions * fix copy paste error on windows * Fix thread safety annotations * Make null-log thread safe * remove remaining globals * use chrono for walltime since it is thread safe * consolidate timer functions * Add missing ctime include * Rename to be consistent with Google style * Format patch using clang-format * cleanup -Wthread-safety configuration * Don't trust _POSIX_FEATURE macros because OS X lies. * Fix OS X thread timings * attempt to fix mingw build * Attempt to make mingw work again * Revert old mingw workaround * improve diagnostics * Drastically improve OS X measurements * Use average real time instead of max
parent 94c2a30a
......@@ -87,8 +87,7 @@ else()
add_cxx_compiler_flag(-Wstrict-aliasing)
endif()
add_cxx_compiler_flag(-Wthread-safety)
if (HAVE_WTHREAD_SAFETY)
add_definitions(-DHAVE_WTHREAD_SAFETY)
if (HAVE_CXX_FLAG_WTHREAD_SAFETY)
cxx_feature_check(THREAD_SAFETY_ATTRIBUTES)
endif()
......@@ -152,7 +151,6 @@ cxx_feature_check(STD_REGEX)
cxx_feature_check(GNU_POSIX_REGEX)
cxx_feature_check(POSIX_REGEX)
cxx_feature_check(STEADY_CLOCK)
# Ensure we have pthreads
find_package(Threads REQUIRED)
......
......@@ -270,31 +270,25 @@ enum BigO {
// computational complexity for the benchmark.
typedef double(BigOFunc)(int);
namespace internal {
class ThreadTimer;
class ThreadManager;
}
// State is passed to a running Benchmark and contains state for the
// benchmark to use.
class State {
public:
State(size_t max_iters, const std::vector<int>& ranges,
int thread_i, int n_threads);
// Returns true if the benchmark should continue through another iteration.
// NOTE: A benchmark may not return from the test until KeepRunning() has
// returned false.
bool KeepRunning() {
if (BENCHMARK_BUILTIN_EXPECT(!started_, false)) {
assert(!finished_);
started_ = true;
ResumeTiming();
StartKeepRunning();
}
bool const res = total_iterations_++ < max_iterations;
if (BENCHMARK_BUILTIN_EXPECT(!res, false)) {
assert(started_ && (!finished_ || error_occurred_));
if (!error_occurred_) {
PauseTiming();
}
// Total iterations now is one greater than max iterations. Fix this.
total_iterations_ = max_iterations;
finished_ = true;
FinishKeepRunning();
}
return res;
}
......@@ -304,10 +298,11 @@ public:
// Stop the benchmark timer. If not called, the timer will be
// automatically stopped after KeepRunning() returns false for the first time.
//
// For threaded benchmarks the PauseTiming() function acts
// like a barrier. I.e., the ith call by a particular thread to this
// function will block until all active threads have made their ith call.
// The timer will stop when the last thread has called this function.
// For threaded benchmarks the PauseTiming() function only pauses the timing
// for the current thread.
//
// NOTE: The "real time" measurement is per-thread. If different threads
// report different measurements the largest one is reported.
//
// NOTE: PauseTiming()/ResumeTiming() are relatively
// heavyweight, and so their use should generally be avoided
......@@ -319,11 +314,6 @@ public:
// Start the benchmark timer. The timer is NOT running on entrance to the
// benchmark function. It begins running after the first call to KeepRunning()
//
// For threaded benchmarks the ResumeTiming() function acts
// like a barrier. I.e., the ith call by a particular thread to this
// function will block until all active threads have made their ith call.
// The timer will start when the last thread has called this function.
//
// NOTE: PauseTiming()/ResumeTiming() are relatively
// heavyweight, and so their use should generally be avoided
// within each benchmark iteration, if possible.
......@@ -335,10 +325,10 @@ public:
// thread and report an error with the specified 'msg'. After this call
// the user may explicitly 'return' from the benchmark.
//
// For threaded benchmarks only the current thread stops executing. If
// multiple threads report an error only the first error message is used.
// The current thread is no longer considered 'active' by
// 'PauseTiming()' and 'ResumingTiming()'.
// For threaded benchmarks only the current thread stops executing and future
// calls to `KeepRunning()` will block until all threads have completed
// the `KeepRunning()` loop. If multiple threads report an error only the
// first error message is used.
//
// NOTE: Calling 'SkipWithError(...)' does not cause the benchmark to exit
// the current scope immediately. If the function is called from within
......@@ -351,10 +341,8 @@ public:
// is used instead of automatically measured time if UseManualTime() was
// specified.
//
// For threaded benchmarks the SetIterationTime() function acts
// like a barrier. I.e., the ith call by a particular thread to this
// function will block until all threads have made their ith call.
// The time will be set by the last thread to call this function.
// For threaded benchmarks the final value will be set to the largest
// reported values.
void SetIterationTime(double seconds);
// Set the number of bytes processed by the current benchmark
......@@ -465,7 +453,16 @@ public:
const int threads;
const size_t max_iterations;
private:
// TODO make me private
State(size_t max_iters, const std::vector<int>& ranges, int thread_i,
int n_threads, internal::ThreadTimer* timer,
internal::ThreadManager* manager);
private:
void StartKeepRunning();
void FinishKeepRunning();
internal::ThreadTimer* timer_;
internal::ThreadManager* manager_;
BENCHMARK_DISALLOW_COPY_AND_ASSIGN(State);
};
......
......@@ -8,9 +8,9 @@ endif()
# Define the source files
set(SOURCE_FILES "benchmark.cc" "colorprint.cc" "commandlineflags.cc"
"console_reporter.cc" "csv_reporter.cc" "json_reporter.cc"
"log.cc" "reporter.cc" "sleep.cc" "string_util.cc"
"sysinfo.cc" "walltime.cc" "complexity.cc")
"console_reporter.cc" "csv_reporter.cc"
"json_reporter.cc" "reporter.cc" "sleep.cc"
"string_util.cc" "sysinfo.cc" "complexity.cc" "timers.cc")
# Add headers to the list of source files. cmake does not require this,
# but IDEs such as Visual Studio need this to add the headers
# to the generated project.
......@@ -19,8 +19,7 @@ list(APPEND SOURCE_FILES "${_d}/benchmark.h" "${_d}/benchmark_api.h"
"${_d}/macros.h" "${_d}/reporter.h" "arraysize.h" "check.h"
"colorprint.h" "commandlineflags.h" "complexity.h"
"cycleclock.h" "internal_macros.h" "log.h" "mutex.h"
"re.h" "sleep.h" "stat.h" "string_util.h" "sysinfo.h"
"walltime.h")
"re.h" "sleep.h" "stat.h" "string_util.h" "sysinfo.h" "timers.h")
unset(_d)
# Determine the correct regular expression engine to use
......
......@@ -33,9 +33,7 @@ public:
<< check << "' failed. ";
}
std::ostream& GetLog() {
return log_;
}
LogType& GetLog() { return log_; }
BENCHMARK_NORETURN ~CheckHandler() BENCHMARK_NOEXCEPT_OP(false) {
log_ << std::endl;
......@@ -46,7 +44,7 @@ public:
CheckHandler(const CheckHandler&) = delete;
CheckHandler() = delete;
private:
std::ostream& log_;
LogType& log_;
};
} // end namespace internal
......
......@@ -28,7 +28,7 @@
#include "commandlineflags.h"
#include "internal_macros.h"
#include "string_util.h"
#include "walltime.h"
#include "timers.h"
namespace benchmark {
......
......@@ -23,7 +23,7 @@
#include <vector>
#include "string_util.h"
#include "walltime.h"
#include "timers.h"
// File format reference: http://edoceo.com/utilitas/csv-file-format.
......
......@@ -23,7 +23,7 @@
#include <vector>
#include "string_util.h"
#include "walltime.h"
#include "timers.h"
namespace benchmark {
......
#include "log.h"
#include <iostream>
namespace benchmark {
namespace internal {
int& LoggingLevelImp() {
static int level = 0;
return level;
}
void SetLogLevel(int value) {
LoggingLevelImp() = value;
}
int GetLogLevel() {
return LoggingLevelImp();
}
class NullLogBuffer : public std::streambuf
{
public:
int overflow(int c) {
return c;
}
};
std::ostream& GetNullLogInstance() {
static NullLogBuffer log_buff;
static std::ostream null_log(&log_buff);
return null_log;
}
std::ostream& GetErrorLogInstance() {
return std::clog;
}
} // end namespace internal
} // end namespace benchmark
\ No newline at end of file
#ifndef BENCHMARK_LOG_H_
#define BENCHMARK_LOG_H_
#include <iostream>
#include <ostream>
#include "benchmark/macros.h"
namespace benchmark {
namespace internal {
int GetLogLevel();
void SetLogLevel(int level);
typedef std::basic_ostream<char>&(EndLType)(std::basic_ostream<char>&);
class LogType {
friend LogType& GetNullLogInstance();
friend LogType& GetErrorLogInstance();
// FIXME: Add locking to output.
template <class Tp>
friend LogType& operator<<(LogType&, Tp const&);
friend LogType& operator<<(LogType&, EndLType*);
private:
LogType(std::ostream* out) : out_(out) {}
std::ostream* out_;
BENCHMARK_DISALLOW_COPY_AND_ASSIGN(LogType);
};
std::ostream& GetNullLogInstance();
std::ostream& GetErrorLogInstance();
template <class Tp>
LogType& operator<<(LogType& log, Tp const& value) {
if (log.out_) {
*log.out_ << value;
}
return log;
}
inline LogType& operator<<(LogType& log, EndLType* m) {
if (log.out_) {
*log.out_ << m;
}
return log;
}
inline int& LogLevel() {
static int log_level = 0;
return log_level;
}
inline LogType& GetNullLogInstance() {
static LogType log(nullptr);
return log;
}
inline LogType& GetErrorLogInstance() {
static LogType log(&std::clog);
return log;
}
inline std::ostream& GetLogInstanceForLevel(int level) {
if (level <= GetLogLevel()) {
inline LogType& GetLogInstanceForLevel(int level) {
if (level <= LogLevel()) {
return GetErrorLogInstance();
}
return GetNullLogInstance();
......
......@@ -4,6 +4,8 @@
#include <mutex>
#include <condition_variable>
#include "check.h"
// Enable thread safety attributes only with clang.
// The attributes can be safely erased when compiling with other compilers.
#if defined(HAVE_THREAD_SAFETY_ATTRIBUTES)
......@@ -105,36 +107,58 @@ private:
MutexLockImp ml_;
};
class Barrier {
public:
Barrier(int num_threads) : running_threads_(num_threads) {}
class Notification
{
public:
Notification() : notified_yet_(false) { }
void WaitForNotification() const EXCLUDES(mutex_) {
MutexLock m_lock(mutex_);
auto notified_fn = [this]() REQUIRES(mutex_) {
return this->HasBeenNotified();
};
cv_.wait(m_lock.native_handle(), notified_fn);
}
void Notify() EXCLUDES(mutex_) {
// Called by each thread
bool wait() EXCLUDES(lock_) {
bool last_thread = false;
{
MutexLock lock(mutex_);
notified_yet_ = 1;
MutexLock ml(lock_);
last_thread = createBarrier(ml);
}
cv_.notify_all();
if (last_thread) phase_condition_.notify_all();
return last_thread;
}
private:
bool HasBeenNotified() const REQUIRES(mutex_) {
return notified_yet_;
void removeThread() EXCLUDES(lock_) {
MutexLock ml(lock_);
--running_threads_;
if (entered_ != 0) phase_condition_.notify_all();
}
mutable Mutex mutex_;
mutable std::condition_variable cv_;
bool notified_yet_ GUARDED_BY(mutex_);
private:
Mutex lock_;
Condition phase_condition_;
int running_threads_;
// State for barrier management
int phase_number_ = 0;
int entered_ = 0; // Number of threads that have entered this barrier
// Enter the barrier and wait until all other threads have also
// entered the barrier. Returns iff this is the last thread to
// enter the barrier.
bool createBarrier(MutexLock& ml) REQUIRES(lock_) {
CHECK_LT(entered_, running_threads_);
entered_++;
if (entered_ < running_threads_) {
// Wait for all threads to enter
int phase_number_cp = phase_number_;
auto cb = [this, phase_number_cp]() {
return this->phase_number_ > phase_number_cp ||
entered_ == running_threads_; // A thread has aborted in error
};
phase_condition_.wait(ml.native_handle(), cb);
if (phase_number_ > phase_number_cp) return false;
// else (running_threads_ == entered_) and we are the last thread.
}
// Last thread has reached the barrier
phase_number_++;
entered_ = 0;
return true;
}
};
} // end namespace benchmark
......
......@@ -13,7 +13,7 @@
// limitations under the License.
#include "benchmark/reporter.h"
#include "walltime.h"
#include "timers.h"
#include <cstdlib>
......
......@@ -52,7 +52,6 @@ namespace {
std::once_flag cpuinfo_init;
double cpuinfo_cycles_per_second = 1.0;
int cpuinfo_num_cpus = 1; // Conservative guess
std::mutex cputimens_mutex;
#if !defined BENCHMARK_OS_MACOSX
const int64_t estimate_time_ms = 1000;
......@@ -288,101 +287,8 @@ void InitializeSystemInfo() {
cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond());
#endif
}
} // end namespace
// getrusage() based implementation of MyCPUUsage
static double MyCPUUsageRUsage() {
#ifndef BENCHMARK_OS_WINDOWS
struct rusage ru;
if (getrusage(RUSAGE_SELF, &ru) == 0) {
return (static_cast<double>(ru.ru_utime.tv_sec) +
static_cast<double>(ru.ru_utime.tv_usec) * 1e-6 +
static_cast<double>(ru.ru_stime.tv_sec) +
static_cast<double>(ru.ru_stime.tv_usec) * 1e-6);
} else {
return 0.0;
}
#else
HANDLE proc = GetCurrentProcess();
FILETIME creation_time;
FILETIME exit_time;
FILETIME kernel_time;
FILETIME user_time;
ULARGE_INTEGER kernel;
ULARGE_INTEGER user;
GetProcessTimes(proc, &creation_time, &exit_time, &kernel_time, &user_time);
kernel.HighPart = kernel_time.dwHighDateTime;
kernel.LowPart = kernel_time.dwLowDateTime;
user.HighPart = user_time.dwHighDateTime;
user.LowPart = user_time.dwLowDateTime;
return (static_cast<double>(kernel.QuadPart) +
static_cast<double>(user.QuadPart)) * 1e-7;
#endif // OS_WINDOWS
}
#ifndef BENCHMARK_OS_WINDOWS
static bool MyCPUUsageCPUTimeNsLocked(double* cputime) {
static int cputime_fd = -1;
if (cputime_fd == -1) {
cputime_fd = open("/proc/self/cputime_ns", O_RDONLY);
if (cputime_fd < 0) {
cputime_fd = -1;
return false;
}
}
char buff[64];
memset(buff, 0, sizeof(buff));
if (pread(cputime_fd, buff, sizeof(buff) - 1, 0) <= 0) {
close(cputime_fd);
cputime_fd = -1;
return false;
}
unsigned long long result = strtoull(buff, nullptr, 0);
if (result == (std::numeric_limits<unsigned long long>::max)()) {
close(cputime_fd);
cputime_fd = -1;
return false;
}
*cputime = static_cast<double>(result) / 1e9;
return true;
}
#endif // OS_WINDOWS
double MyCPUUsage() {
#ifndef BENCHMARK_OS_WINDOWS
{
std::lock_guard<std::mutex> l(cputimens_mutex);
static bool use_cputime_ns = true;
if (use_cputime_ns) {
double value;
if (MyCPUUsageCPUTimeNsLocked(&value)) {
return value;
}
// Once MyCPUUsageCPUTimeNsLocked fails once fall back to getrusage().
VLOG(1) << "Reading /proc/self/cputime_ns failed. Using getrusage().\n";
use_cputime_ns = false;
}
}
#endif // OS_WINDOWS
return MyCPUUsageRUsage();
}
double ChildrenCPUUsage() {
#ifndef BENCHMARK_OS_WINDOWS
struct rusage ru;
if (getrusage(RUSAGE_CHILDREN, &ru) == 0) {
return (static_cast<double>(ru.ru_utime.tv_sec) +
static_cast<double>(ru.ru_utime.tv_usec) * 1e-6 +
static_cast<double>(ru.ru_stime.tv_sec) +
static_cast<double>(ru.ru_stime.tv_usec) * 1e-6);
} else {
return 0.0;
}
#else
// TODO: Not sure what this even means on Windows
return 0.0;
#endif // OS_WINDOWS
}
} // end namespace
double CyclesPerSecond(void) {
std::call_once(cpuinfo_init, InitializeSystemInfo);
......
......@@ -2,8 +2,6 @@
#define BENCHMARK_SYSINFO_H_
namespace benchmark {
double MyCPUUsage();
double ChildrenCPUUsage();
int NumCPUs();
double CyclesPerSecond();
bool CpuScalingEnabled();
......
// Copyright 2015 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "timers.h"
#include "internal_macros.h"
#ifdef BENCHMARK_OS_WINDOWS
#include <Shlwapi.h>
#include <VersionHelpers.h>
#include <Windows.h>
#else
#include <fcntl.h>
#include <sys/resource.h>
#include <sys/time.h>
#include <sys/types.h> // this header must be included before 'sys/sysctl.h' to avoid compilation error on FreeBSD
#include <unistd.h>
#if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_MACOSX
#include <sys/sysctl.h>
#endif
#if defined(BENCHMARK_OS_MACOSX)
#include <mach/mach_init.h>
#include <mach/mach_port.h>
#include <mach/thread_act.h>
#endif
#endif
#include <cerrno>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <ctime>
#include <iostream>
#include <limits>
#include <mutex>
#include "check.h"
#include "log.h"
#include "sleep.h"
#include "string_util.h"
namespace benchmark {
// Suppress unused warnings on helper functions.
#if defined(__GNUC__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-function"
#endif
namespace {
#if defined(BENCHMARK_OS_WINDOWS)
double MakeTime(FILETIME const& kernel_time, FILETIME const& user_time) {
ULARGE_INTEGER kernel;
ULARGE_INTEGER user;
kernel.HighPart = kernel_time.dwHighDateTime;
kernel.LowPart = kernel_time.dwLowDateTime;
user.HighPart = user_time.dwHighDateTime;
user.LowPart = user_time.dwLowDateTime;
return (static_cast<double>(kernel.QuadPart) +
static_cast<double>(user.QuadPart)) *
1e-7;
}
#else
double MakeTime(struct timespec const& ts) {
return ts.tv_sec + (static_cast<double>(ts.tv_nsec) * 1e-9);
}
double MakeTime(struct rusage ru) {
return (static_cast<double>(ru.ru_utime.tv_sec) +
static_cast<double>(ru.ru_utime.tv_usec) * 1e-6 +
static_cast<double>(ru.ru_stime.tv_sec) +
static_cast<double>(ru.ru_stime.tv_usec) * 1e-6);
}
#endif
#if defined(BENCHMARK_OS_MACOSX)
double MakeTime(thread_basic_info_data_t const& info) {
return (static_cast<double>(info.user_time.seconds) +
static_cast<double>(info.user_time.microseconds) * 1e-6 +
static_cast<double>(info.system_time.seconds) +
static_cast<double>(info.user_time.microseconds) * 1e-6);
}
#endif
BENCHMARK_NORETURN static void DiagnoseAndExit(const char* msg) {
std::cerr << "ERROR: " << msg << std::endl;
std::exit(EXIT_FAILURE);
}
} // end namespace
#if defined(__GNUC__)
#pragma GCC diagnostic pop
#endif
double ProcessCPUUsage() {
#if defined(BENCHMARK_OS_WINDOWS)
HANDLE proc = GetCurrentProcess();
FILETIME creation_time;
FILETIME exit_time;
FILETIME kernel_time;
FILETIME user_time;
if (GetProcessTimes(proc, &creation_time, &exit_time, &kernel_time, &user_time))
return MakeTime(kernel_time, user_time);
DiagnoseAndExit("GetProccessTimes() failed");
#elif defined(CLOCK_PROCESS_CPUTIME_ID)
struct timespec spec;
if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &spec) == 0)
return MakeTime(spec);
DiagnoseAndExit("clock_gettime(CLOCK_PROCESS_CPUTIME_ID, ...) failed");
#else
struct rusage ru;
if (getrusage(RUSAGE_SELF, &ru) == 0)
return MakeTime(ru);
DiagnoseAndExit("clock_gettime(CLOCK_PROCESS_CPUTIME_ID, ...) failed");
#endif
}
double ThreadCPUUsage() {
#if defined(BENCHMARK_OS_WINDOWS)
HANDLE this_thread = GetCurrentThread();
FILETIME creation_time;
FILETIME exit_time;
FILETIME kernel_time;
FILETIME user_time;
GetThreadTimes(this_thread, &creation_time, &exit_time, &kernel_time,
&user_time);
return MakeTime(kernel_time, user_time);
#elif defined(CLOCK_THREAD_CPUTIME_ID)
struct timespec ts;
if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts) == 0)
return MakeTime(ts);
DiagnoseAndExit("clock_gettime(CLOCK_THREAD_CPUTIME_ID, ...) failed");
#elif defined(BENCHMARK_OS_MACOSX)
mach_msg_type_number_t count = THREAD_BASIC_INFO_COUNT;
thread_basic_info_data_t info;
mach_port_t thread = pthread_mach_thread_np(pthread_self());
if (thread_info(thread, THREAD_BASIC_INFO, (thread_info_t) &info, &count)
== KERN_SUCCESS) {
return MakeTime(info);
}
DiagnoseAndExit("ThreadCPUUsage() failed when evaluating thread_info");
#else
#error Per-thread timing is not available on your system.
#endif
}
namespace {
std::string DateTimeString(bool local) {
typedef std::chrono::system_clock Clock;
std::time_t now = Clock::to_time_t(Clock::now());
const std::size_t kStorageSize = 128;
char storage[kStorageSize];
std::size_t written;
if (local) {
#if defined(BENCHMARK_OS_WINDOWS)
written =
std::strftime(storage, sizeof(storage), "%x %X", ::localtime(&now));
#else
std::tm timeinfo;
std::memset(&timeinfo, 0, sizeof(std::tm));
::localtime_r(&now, &timeinfo);
written = std::strftime(storage, sizeof(storage), "%F %T", &timeinfo);
#endif
} else {
#if defined(BENCHMARK_OS_WINDOWS)
written = std::strftime(storage, sizeof(storage), "%x %X", ::gmtime(&now));
#else
std::tm timeinfo;
std::memset(&timeinfo, 0, sizeof(std::tm));
::gmtime_r(&now, &timeinfo);
written = std::strftime(storage, sizeof(storage), "%F %T", &timeinfo);
#endif
}
CHECK(written < kStorageSize);
((void)written); // prevent unused variable in optimized mode.
return std::string(storage);
}
} // end namespace
std::string LocalDateTimeString() { return DateTimeString(true); }
} // end namespace benchmark
#ifndef BENCHMARK_TIMERS_H
#define BENCHMARK_TIMERS_H
#include <chrono>
#include <string>
namespace benchmark {
// Return the CPU usage of the current process
double ProcessCPUUsage();
// Return the CPU usage of the children of the current process
double ChildrenCPUUsage();
// Return the CPU usage of the current thread
double ThreadCPUUsage();
#if defined(HAVE_STEADY_CLOCK)
template <bool HighResIsSteady = std::chrono::high_resolution_clock::is_steady>
struct ChooseSteadyClock {
typedef std::chrono::high_resolution_clock type;
};
template <>
struct ChooseSteadyClock<false> {
typedef std::chrono::steady_clock type;
};
#endif
struct ChooseClockType {
#if defined(HAVE_STEADY_CLOCK)
typedef ChooseSteadyClock<>::type type;
#else
typedef std::chrono::high_resolution_clock type;
#endif
};
inline double ChronoClockNow() {
typedef ChooseClockType::type ClockType;
using FpSeconds = std::chrono::duration<double, std::chrono::seconds::period>;
return FpSeconds(ClockType::now().time_since_epoch()).count();
}
std::string LocalDateTimeString();
} // end namespace benchmark
#endif // BENCHMARK_TIMERS_H
// Copyright 2015 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "benchmark/macros.h"
#include "internal_macros.h"
#include "walltime.h"
#if defined(BENCHMARK_OS_WINDOWS)
#include <time.h>
#include <winsock.h> // for timeval
#else
#include <sys/time.h>
#endif
#include <cstdio>
#include <cstdint>
#include <cstring>
#include <ctime>
#include <atomic>
#include <chrono>
#include <limits>
#include "arraysize.h"
#include "check.h"
#include "cycleclock.h"
#include "log.h"
#include "sysinfo.h"
namespace benchmark {
namespace walltime {
namespace {
#if defined(HAVE_STEADY_CLOCK)
template <bool HighResIsSteady = std::chrono::high_resolution_clock::is_steady>
struct ChooseSteadyClock {
typedef std::chrono::high_resolution_clock type;
};
template <>
struct ChooseSteadyClock<false> {
typedef std::chrono::steady_clock type;
};
#endif
struct ChooseClockType {
#if defined(HAVE_STEADY_CLOCK)
typedef ChooseSteadyClock<>::type type;
#else
typedef std::chrono::high_resolution_clock type;
#endif
};
class WallTimeImp
{
public:
WallTime Now();
static WallTimeImp& GetWallTimeImp() {
static WallTimeImp* imp = new WallTimeImp();
return *imp;
}
private:
WallTimeImp();
// Helper routines to load/store a float from an AtomicWord. Required because
// g++ < 4.7 doesn't support std::atomic<float> correctly. I cannot wait to
// get rid of this horror show.
void SetDrift(float f) {
int32_t w;
memcpy(&w, &f, sizeof(f));
std::atomic_store(&drift_adjust_, w);
}
float GetDrift() const {
float f;
int32_t w = std::atomic_load(&drift_adjust_);
memcpy(&f, &w, sizeof(f));
return f;
}
WallTime Slow() const {
struct timeval tv;
#if defined(BENCHMARK_OS_WINDOWS)
FILETIME file_time;
SYSTEMTIME system_time;
ULARGE_INTEGER ularge;
const unsigned __int64 epoch = 116444736000000000LL;
GetSystemTime(&system_time);
SystemTimeToFileTime(&system_time, &file_time);
ularge.LowPart = file_time.dwLowDateTime;
ularge.HighPart = file_time.dwHighDateTime;
tv.tv_sec = (long)((ularge.QuadPart - epoch) / (10L * 1000 * 1000));
tv.tv_usec = (long)(system_time.wMilliseconds * 1000);
#else
gettimeofday(&tv, nullptr);
#endif
return tv.tv_sec + tv.tv_usec * 1e-6;
}
private:
static_assert(sizeof(float) <= sizeof(int32_t),
"type sizes don't allow the drift_adjust hack");
WallTime base_walltime_;
int64_t base_cycletime_;
int64_t cycles_per_second_;
double seconds_per_cycle_;
uint32_t last_adjust_time_;
std::atomic<int32_t> drift_adjust_;
int64_t max_interval_cycles_;
BENCHMARK_DISALLOW_COPY_AND_ASSIGN(WallTimeImp);
};
WallTime WallTimeImp::Now() {
WallTime now = 0.0;
WallTime result = 0.0;
int64_t ct = 0;
uint32_t top_bits = 0;
do {
ct = cycleclock::Now();
int64_t cycle_delta = ct - base_cycletime_;
result = base_walltime_ + cycle_delta * seconds_per_cycle_;
top_bits = static_cast<uint32_t>(uint64_t(ct) >> 32);
// Recompute drift no more often than every 2^32 cycles.
// I.e., @2GHz, ~ every two seconds
if (top_bits == last_adjust_time_) { // don't need to recompute drift
return result + GetDrift();
}
now = Slow();
} while (cycleclock::Now() - ct > max_interval_cycles_);
// We are now sure that "now" and "result" were produced within
// kMaxErrorInterval of one another.
SetDrift(static_cast<float>(now - result));
last_adjust_time_ = top_bits;
return now;
}
WallTimeImp::WallTimeImp()
: base_walltime_(0.0), base_cycletime_(0),
cycles_per_second_(0), seconds_per_cycle_(0.0),
last_adjust_time_(0), drift_adjust_(0),
max_interval_cycles_(0) {
const double kMaxErrorInterval = 100e-6;
cycles_per_second_ = static_cast<int64_t>(CyclesPerSecond());
CHECK(cycles_per_second_ != 0);
seconds_per_cycle_ = 1.0 / cycles_per_second_;
max_interval_cycles_ =
static_cast<int64_t>(cycles_per_second_ * kMaxErrorInterval);
do {
base_cycletime_ = cycleclock::Now();
base_walltime_ = Slow();
} while (cycleclock::Now() - base_cycletime_ > max_interval_cycles_);
// We are now sure that "base_walltime" and "base_cycletime" were produced
// within kMaxErrorInterval of one another.
SetDrift(0.0);
last_adjust_time_ = static_cast<uint32_t>(uint64_t(base_cycletime_) >> 32);
}
WallTime CPUWalltimeNow() {
static WallTimeImp& imp = WallTimeImp::GetWallTimeImp();
return imp.Now();
}
WallTime ChronoWalltimeNow() {
typedef ChooseClockType::type Clock;
typedef std::chrono::duration<WallTime, std::chrono::seconds::period>
FPSeconds;
static_assert(std::chrono::treat_as_floating_point<WallTime>::value,
"This type must be treated as a floating point type.");
auto now = Clock::now().time_since_epoch();
return std::chrono::duration_cast<FPSeconds>(now).count();
}
bool UseCpuCycleClock() {
bool useWallTime = !CpuScalingEnabled();
if (useWallTime) {
VLOG(1) << "Using the CPU cycle clock to provide walltime::Now().\n";
} else {
VLOG(1) << "Using std::chrono to provide walltime::Now().\n";
}
return useWallTime;
}
} // end anonymous namespace
// WallTimeImp doesn't work when CPU Scaling is enabled. If CPU Scaling is
// enabled at the start of the program then std::chrono::system_clock is used
// instead.
WallTime Now()
{
static bool useCPUClock = UseCpuCycleClock();
if (useCPUClock) {
return CPUWalltimeNow();
} else {
return ChronoWalltimeNow();
}
}
} // end namespace walltime
namespace {
std::string DateTimeString(bool local) {
typedef std::chrono::system_clock Clock;
std::time_t now = Clock::to_time_t(Clock::now());
char storage[128];
std::size_t written;
if (local) {
#if defined(BENCHMARK_OS_WINDOWS)
written = std::strftime(storage, sizeof(storage), "%x %X", ::localtime(&now));
#else
std::tm timeinfo;
std::memset(&timeinfo, 0, sizeof(std::tm));
::localtime_r(&now, &timeinfo);
written = std::strftime(storage, sizeof(storage), "%F %T", &timeinfo);
#endif
} else {
#if defined(BENCHMARK_OS_WINDOWS)
written = std::strftime(storage, sizeof(storage), "%x %X", ::gmtime(&now));
#else
std::tm timeinfo;
std::memset(&timeinfo, 0, sizeof(std::tm));
::gmtime_r(&now, &timeinfo);
written = std::strftime(storage, sizeof(storage), "%F %T", &timeinfo);
#endif
}
CHECK(written < arraysize(storage));
((void)written); // prevent unused variable in optimized mode.
return std::string(storage);
}
} // end namespace
std::string LocalDateTimeString() {
return DateTimeString(true);
}
} // end namespace benchmark
#ifndef BENCHMARK_WALLTIME_H_
#define BENCHMARK_WALLTIME_H_
#include <string>
namespace benchmark {
typedef double WallTime;
namespace walltime {
WallTime Now();
} // end namespace walltime
std::string LocalDateTimeString();
} // end namespace benchmark
#endif // BENCHMARK_WALLTIME_H_
......@@ -59,6 +59,10 @@ def calculate_change(old_val, new_val):
"""
Return a float representing the decimal change between old_val and new_val.
"""
if old_val == 0 and new_val == 0:
return 0.0
if old_val == 0:
return float(new_val - old_val) / (float(old_val + new_val) / 2)
return float(new_val - old_val) / abs(old_val)
......@@ -73,7 +77,7 @@ def generate_difference_report(json1, json2, use_color=True):
if b['name'] == name:
return b
return None
first_line = "{:<{}s} Time CPU".format(
first_line = "{:<{}s} Time CPU Old New".format(
'Benchmark', first_col_width)
output_strs = [first_line, '-' * len(first_line)]
for bn in json1['benchmarks']:
......@@ -88,12 +92,13 @@ def generate_difference_report(json1, json2, use_color=True):
return BC_WHITE
else:
return BC_CYAN
fmt_str = "{}{:<{}s}{endc} {}{:+.2f}{endc} {}{:+.2f}{endc}"
fmt_str = "{}{:<{}s}{endc} {}{:+.2f}{endc} {}{:+.2f}{endc} {:4d} {:4d}"
tres = calculate_change(bn['real_time'], other_bench['real_time'])
cpures = calculate_change(bn['cpu_time'], other_bench['cpu_time'])
output_strs += [color_format(use_color, fmt_str,
BC_HEADER, bn['name'], first_col_width,
get_color(tres), tres, get_color(cpures), cpures,
bn['cpu_time'], other_bench['cpu_time'],
endc=BC_ENDC)]
return output_strs
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment