Commit 403f3544 by Dominic Hamon

Initial commit

Benchmark library builds and runs but only single-threaded. Multithreaded support needs a bit more love. Currently requires some C++11 support (g++ 4.6.3 seems to work).
parents
CMakeCache.txt
CMakeFiles/
Makefile
bin/
cmake_install.cmake
lib/
cmake_minimum_required (VERSION 2.8)
project (benchmark)
find_package(Threads)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}/bin)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}/lib)
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}/lib)
set(CMAKE_CXX_FLAGS "-Wall -Werror --std=c++0x")
set(CMAKE_CXX_FLAGS_DEBUG "-g -O0 -DDEBUG")
set(CMAKE_CXX_FLAGS_RELEASE "-fno-strict-aliasing -O3 -DNDEBUG")
# Set OS
if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
add_definitions(-DOS_MACOSX)
endif()
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
add_definitions(-DOS_LINUX)
endif()
if(${CMAKE_SYSTEM_NAME} MATCHES "Windows")
add_definitions(-DOS_WINDOWS)
endif()
# Set CPU
if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "x86")
add_definitions(-DARCH_X86)
endif()
# Set up directories
include_directories(${PROJECT_SOURCE_DIR}/include)
include_directories(${PROJECT_SOURCE_DIR}/src)
link_directories(${PROJECT_SOURCE_DIR}/lib)
# Build the targets
FILE(GLOB SOURCE_FILES "src/*.cc")
add_library(benchmark STATIC ${SOURCE_FILES})
add_executable(benchmark_test test/benchmark_test.cc)
target_link_libraries(benchmark_test benchmark ${CMAKE_THREAD_LIBS_INIT})
#ifndef BENCHMARK_MACROS_H_
#define BENCHMARK_MACROS_H_
#include <assert.h>
#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
TypeName(const TypeName&); \
void operator=(const TypeName&);
// The arraysize(arr) macro returns the # of elements in an array arr.
// The expression is a compile-time constant, and therefore can be
// used in defining new arrays, for example. If you use arraysize on
// a pointer by mistake, you will get a compile-time error.
//
// One caveat is that, for C++03, arraysize() doesn't accept any array of
// an anonymous type or a type defined inside a function. In these rare
// cases, you have to use the unsafe ARRAYSIZE() macro below. This is
// due to a limitation in C++03's template system. The limitation has
// been removed in C++11.
// This template function declaration is used in defining arraysize.
// Note that the function doesn't need an implementation, as we only
// use its type.
template <typename T, size_t N>
char (&ArraySizeHelper(T (&array)[N]))[N];
// That gcc wants both of these prototypes seems mysterious. VC, for
// its part, can't decide which to use (another mystery). Matching of
// template overloads: the final frontier.
#ifndef COMPILER_MSVC
template <typename T, size_t N>
char (&ArraySizeHelper(const T (&array)[N]))[N];
#endif
#define arraysize(array) (sizeof(ArraySizeHelper(array)))
// The STATIC_ASSERT macro can be used to verify that a compile time
// expression is true. For example, you could use it to verify the
// size of a static array:
//
// STATIC_ASSERT(ARRAYSIZE(content_type_names) == CONTENT_NUM_TYPES,
// content_type_names_incorrect_size);
//
// or to make sure a struct is smaller than a certain size:
//
// STATIC_ASSERT(sizeof(foo) < 128, foo_too_large);
//
// The second argument to the macro is the name of the variable. If
// the expression is false, most compilers will issue a warning/error
// containing the name of the variable.
template <bool>
struct StaticAssert {
};
#define STATIC_ASSERT(expr, msg) \
typedef StaticAssert<(bool(expr))> msg[bool(expr) ? 1 : -1]
// Implementation details of STATIC_ASSERT:
//
// - STATIC_ASSERT works by defining an array type that has -1
// elements (and thus is invalid) when the expression is false.
//
// - The simpler definition
//
// #define STATIC_ASSERT(expr, msg) typedef char msg[(expr) ? 1 : -1]
//
// does not work, as gcc supports variable-length arrays whose sizes
// are determined at run-time (this is gcc's extension and not part
// of the C++ standard). As a result, gcc fails to reject the
// following code with the simple definition:
//
// int foo;
// STATIC_ASSERT(foo, msg); // not supposed to compile as foo is
// // not a compile-time constant.
//
// - By using the type StaticAssert<(bool(expr))>, we ensures that
// expr is a compile-time constant. (Template arguments must be
// determined at compile-time.)
//
// - The outer parentheses in StaticAssert<(bool(expr))> are necessary
// to work around a bug in gcc 3.4.4 and 4.0.1. If we had written
//
// StaticAssert<bool(expr)>
//
// instead, these compilers will refuse to compile
//
// STATIC_ASSERT(5 > 0, some_message);
//
// (They seem to think the ">" in "5 > 0" marks the end of the
// template argument list.)
//
// - The array size is (bool(expr) ? 1 : -1), instead of simply
//
// ((expr) ? 1 : -1).
//
// This is to avoid running into a bug in MS VC 7.1, which
// causes ((0.0) ? 1 : -1) to incorrectly evaluate to 1.
#define CHECK(b) do { if (!(b)) assert(false); } while(0)
#define CHECK_EQ(a, b) CHECK((a) == (b))
#define CHECK_GE(a, b) CHECK((a) >= (b))
#define CHECK_LE(a, b) CHECK((a) <= (b))
#define CHECK_GT(a, b) CHECK((a) > (b))
#define CHECK_LT(a, b) CHECK((a) < (b))
//
// Prevent the compiler from complaining about or optimizing away variables
// that appear unused.
#define ATTRIBUTE_UNUSED __attribute__ ((unused))
//
// For functions we want to force inline or not inline.
// Introduced in gcc 3.1.
#define ATTRIBUTE_ALWAYS_INLINE __attribute__ ((always_inline))
#define HAVE_ATTRIBUTE_ALWAYS_INLINE 1
#define ATTRIBUTE_NOINLINE __attribute__ ((noinline))
#define HAVE_ATTRIBUTE_NOINLINE 1
#endif // BENCHMARK_MACROS_H_
#include "colorprint.h"
#include <stdarg.h>
#include "commandlineflags.h"
DECLARE_bool(color_print);
namespace {
#ifdef OS_WINDOWS
typedef WORD PlatformColorCode;
#else
typedef const char* PlatformColorCode;
#endif
PlatformColorCode GetPlatformColorCode(LogColor color) {
#ifdef OS_WINDOWS
switch (color) {
case COLOR_RED: return FOREGROUND_RED;
case COLOR_GREEN: return FOREGROUND_GREEN;
case COLOR_YELLOW: return FOREGROUND_RED | FOREGROUND_GREEN;
case COLOR_BLUE: return FOREGROUND_BLUE;
case COLOR_MAGENTA: return FOREGROUND_BLUE | FOREGROUND_RED;
case COLOR_CYAN: return FOREGROUND_BLUE | FOREGROUND_GREEN;
case COLOR_WHITE: // fall through to default
default: return 0;
}
#else
switch (color) {
case COLOR_RED: return "1";
case COLOR_GREEN: return "2";
case COLOR_YELLOW: return "3";
case COLOR_BLUE: return "4";
case COLOR_MAGENTA: return "5";
case COLOR_CYAN: return "6";
case COLOR_WHITE: return "7";
default: return NULL;
};
#endif
}
} // end namespace
void ColorPrintf(LogColor color, const char* fmt, ...) {
va_list args;
va_start(args, fmt);
if (!FLAGS_color_print) {
vprintf(fmt, args);
va_end(args);
return;
}
#ifdef OS_WINDOWS
const HANDLE stdout_handle = GetStdHandle(STD_OUTPUT_HANDLE);
// Gets the current text color.
CONSOLE_SCREEN_BUFFER_INFO buffer_info;
GetConsoleScreenBufferInfo(stdout_handle, &buffer_info);
const WORD old_color_attrs = buffer_info.wAttributes;
// We need to flush the stream buffers into the console before each
// SetConsoleTextAttribute call lest it affect the text that is already
// printed but has not yet reached the console.
fflush(stdout);
SetConsoleTextAttribute(stdout_handle,
GetPlatformColorCode(color) | FOREGROUND_INTENSITY);
vprintf(fmt, args);
fflush(stdout);
// Restores the text color.
SetConsoleTextAttribute(stdout_handle, old_color_attrs);
#else
const char* color_code = GetPlatformColorCode(color);
if (color_code)
fprintf(stdout, "\033[0;3%sm", color_code);
vprintf(fmt, args);
printf("\033[m"); // Resets the terminal to default.
#endif
va_end(args);
}
#ifndef BENCHMARK_COLORPRINT_H_
#define BENCHMARK_COLORPRINT_H_
enum LogColor {
COLOR_DEFAULT,
COLOR_RED,
COLOR_GREEN,
COLOR_YELLOW,
COLOR_BLUE,
COLOR_MAGENTA,
COLOR_CYAN,
COLOR_WHITE
};
void ColorPrintf(LogColor color, const char* fmt, ...);
#endif // BENCHMARK_COLORPRINT_H_
#include "commandlineflags.h"
#include <string.h>
#include <iostream>
#include <limits>
namespace benchmark {
// Parses 'str' for a 32-bit signed integer. If successful, writes
// the result to *value and returns true; otherwise leaves *value
// unchanged and returns false.
bool ParseInt32(const std::string& src_text, const char* str, int32_t* value) {
// Parses the environment variable as a decimal integer.
char* end = NULL;
const long long_value = strtol(str, &end, 10); // NOLINT
// Has strtol() consumed all characters in the string?
if (*end != '\0') {
// No - an invalid character was encountered.
std::cerr << src_text << " is expected to be a 32-bit integer, "
<< "but actually has value \"" << str << "\".\n";
return false;
}
// Is the parsed value in the range of an Int32?
const int32_t result = static_cast<int32_t>(long_value);
if (long_value == std::numeric_limits<long>::max() ||
long_value == std::numeric_limits<long>::min() ||
// The parsed value overflows as a long. (strtol() returns
// LONG_MAX or LONG_MIN when the input overflows.)
result != long_value
// The parsed value overflows as an Int32.
) {
std::cerr << src_text << " is expected to be a 32-bit integer, "
<< "but actually has value \"" << str << "\", "
<< "which overflows.\n";
return false;
}
*value = result;
return true;
}
// Parses 'str' for a double. If successful, writes the result to *value and
// returns true; otherwise leaves *value unchanged and returns false.
bool ParseDouble(const std::string& src_text, const char* str, double* value) {
// Parses the environment variable as a decimal integer.
char* end = NULL;
const double double_value = strtod(str, &end); // NOLINT
// Has strtol() consumed all characters in the string?
if (*end != '\0') {
// No - an invalid character was encountered.
std::cerr << src_text << " is expected to be a double, "
<< "but actually has value \"" << str << "\".\n";
return false;
}
*value = double_value;
return true;
}
inline const char* GetEnv(const char* name) {
#if GTEST_OS_WINDOWS_MOBILE
// We are on Windows CE, which has no environment variables.
return NULL;
#elif defined(__BORLANDC__) || defined(__SunOS_5_8) || defined(__SunOS_5_9)
// Environment variables which we programmatically clear will be set to the
// empty string rather than unset (NULL). Handle that case.
const char* const env = getenv(name);
return (env != NULL && env[0] != '\0') ? env : NULL;
#else
return getenv(name);
#endif
}
// Returns the name of the environment variable corresponding to the
// given flag. For example, FlagToEnvVar("foo") will return
// "BENCHMARK_FOO" in the open-source version.
static std::string FlagToEnvVar(const char* flag) {
const std::string flag_str(flag);
std::string env_var;
for (size_t i = 0; i != flag_str.length(); ++i)
env_var += ::toupper(flag_str.c_str()[i]);
return "BENCHMARK_" + env_var;
}
// Reads and returns the Boolean environment variable corresponding to
// the given flag; if it's not set, returns default_value.
//
// The value is considered true iff it's not "0".
bool BoolFromEnv(const char* flag, bool default_value) {
const std::string env_var = FlagToEnvVar(flag);
const char* const string_value = GetEnv(env_var.c_str());
return string_value == NULL ?
default_value : strcmp(string_value, "0") != 0;
}
// Reads and returns a 32-bit integer stored in the environment
// variable corresponding to the given flag; if it isn't set or
// doesn't represent a valid 32-bit integer, returns default_value.
int32_t Int32FromEnv(const char* flag, int32_t default_value) {
const std::string env_var = FlagToEnvVar(flag);
const char* const string_value = GetEnv(env_var.c_str());
if (string_value == NULL) {
// The environment variable is not set.
return default_value;
}
int32_t result = default_value;
if (!ParseInt32(std::string("Environment variable ") + env_var,
string_value, &result)) {
std::cout << "The default value " << default_value << " is used.\n";
return default_value;
}
return result;
}
// Reads and returns the string environment variable corresponding to
// the given flag; if it's not set, returns default_value.
const char* StringFromEnv(const char* flag, const char* default_value) {
const std::string env_var = FlagToEnvVar(flag);
const char* const value = GetEnv(env_var.c_str());
return value == NULL ? default_value : value;
}
// Parses a string as a command line flag. The string should have
// the format "--flag=value". When def_optional is true, the "=value"
// part can be omitted.
//
// Returns the value of the flag, or NULL if the parsing failed.
const char* ParseFlagValue(const char* str,
const char* flag,
bool def_optional) {
// str and flag must not be NULL.
if (str == NULL || flag == NULL) return NULL;
// The flag must start with "--".
const std::string flag_str = std::string("--") + std::string(flag);
const size_t flag_len = flag_str.length();
if (strncmp(str, flag_str.c_str(), flag_len) != 0) return NULL;
// Skips the flag name.
const char* flag_end = str + flag_len;
// When def_optional is true, it's OK to not have a "=value" part.
if (def_optional && (flag_end[0] == '\0'))
return flag_end;
// If def_optional is true and there are more characters after the
// flag name, or if def_optional is false, there must be a '=' after
// the flag name.
if (flag_end[0] != '=') return NULL;
// Returns the string after "=".
return flag_end + 1;
}
bool ParseBoolFlag(const char* str, const char* flag, bool* value) {
// Gets the value of the flag as a string.
const char* const value_str = ParseFlagValue(str, flag, true);
// Aborts if the parsing failed.
if (value_str == NULL) return false;
// Converts the string value to a bool.
*value = !(*value_str == '0' || *value_str == 'f' || *value_str == 'F');
return true;
}
bool ParseInt32Flag(const char* str, const char* flag, int32_t* value) {
// Gets the value of the flag as a string.
const char* const value_str = ParseFlagValue(str, flag, false);
// Aborts if the parsing failed.
if (value_str == NULL) return false;
// Sets *value to the value of the flag.
return ParseInt32(std::string("The value of flag --") + flag,
value_str, value);
}
bool ParseDoubleFlag(const char* str, const char* flag, double* value) {
// Gets the value of the flag as a string.
const char* const value_str = ParseFlagValue(str, flag, false);
// Aborts if the parsing failed.
if (value_str == NULL) return false;
// Sets *value to the value of the flag.
return ParseDouble(std::string("The value of flag --") + flag,
value_str, value);
}
bool ParseStringFlag(const char* str, const char* flag, std::string* value) {
// Gets the value of the flag as a string.
const char* const value_str = ParseFlagValue(str, flag, false);
// Aborts if the parsing failed.
if (value_str == NULL) return false;
*value = value_str;
return true;
}
bool IsFlag(const char* str, const char* flag) {
return (ParseFlagValue(str, flag, true) != NULL);
}
} // end namespace benchmark
#ifndef BENCHMARK_COMMANDLINEFLAGS_H_
#define BENCHMARK_COMMANDLINEFLAGS_H_
#include <stdint.h>
#include <string>
// Macro for referencing flags.
#define FLAG(name) FLAGS_##name
// Macros for declaring flags.
#define DECLARE_bool(name) extern bool FLAG(name)
#define DECLARE_int32(name) extern int32_t FLAG(name)
#define DECLARE_int64(name) extern int64_t FLAG(name)
#define DECLARE_double(name) extern double FLAG(name)
#define DECLARE_string(name) extern std::string FLAG(name)
// Macros for defining flags.
#define DEFINE_bool(name, default_val, doc) bool FLAG(name) = (default_val)
#define DEFINE_int32(name, default_val, doc) int32_t FLAG(name) = (default_val)
#define DEFINE_int64(name, default_val, doc) int64_t FLAG(name) = (default_val)
#define DEFINE_double(name, default_val, doc) double FLAG(name) = (default_val)
#define DEFINE_string(name, default_val, doc) \
std::string FLAG(name) = (default_val)
namespace benchmark {
// Parses 'str' for a 32-bit signed integer. If successful, writes the result
// to *value and returns true; otherwise leaves *value unchanged and returns
// false.
bool ParseInt32(const std::string& src_text, const char* str, int32_t* value);
// Parses a bool/Int32/string from the environment variable
// corresponding to the given Google Test flag.
bool BoolFromEnv(const char* flag, bool default_val);
int32_t Int32FromEnv(const char* flag, int32_t default_val);
double DoubleFromEnv(const char* flag, double default_val);
const char* StringFromEnv(const char* flag, const char* default_val);
// Parses a string for a bool flag, in the form of either
// "--flag=value" or "--flag".
//
// In the former case, the value is taken as true as long as it does
// not start with '0', 'f', or 'F'.
//
// In the latter case, the value is taken as true.
//
// On success, stores the value of the flag in *value, and returns
// true. On failure, returns false without changing *value.
bool ParseBoolFlag(const char* str, const char* flag, bool* value);
// Parses a string for an Int32 flag, in the form of
// "--flag=value".
//
// On success, stores the value of the flag in *value, and returns
// true. On failure, returns false without changing *value.
bool ParseInt32Flag(const char* str, const char* flag, int32_t* value);
// Parses a string for a Double flag, in the form of
// "--flag=value".
//
// On success, stores the value of the flag in *value, and returns
// true. On failure, returns false without changing *value.
bool ParseDoubleFlag(const char* str, const char* flag, double* value);
// Parses a string for a string flag, in the form of
// "--flag=value".
//
// On success, stores the value of the flag in *value, and returns
// true. On failure, returns false without changing *value.
bool ParseStringFlag(const char* str, const char* flag, std::string* value);
// Returns true if the string matches the flag.
bool IsFlag(const char* str, const char* flag);
} // end namespace gbenchmark
#endif // BENCHMARK_COMMANDLINEFLAGS_H_
// ----------------------------------------------------------------------
// CycleClock
// A CycleClock tells you the current time in Cycles. The "time"
// is actually time since power-on. This is like time() but doesn't
// involve a system call and is much more precise.
//
// NOTE: Not all cpu/platform/kernel combinations guarantee that this
// clock increments at a constant rate or is synchronized across all logical
// cpus in a system.
//
// If you need the above guarantees, please consider using a different
// API. There are efforts to provide an interface which provides a millisecond
// granularity and implemented as a memory read. A memory read is generally
// cheaper than the CycleClock for many architectures.
//
// Also, in some out of order CPU implementations, the CycleClock is not
// serializing. So if you're trying to count at cycles granularity, your
// data might be inaccurate due to out of order instruction execution.
// ----------------------------------------------------------------------
#ifndef BENCHMARK_CYCLECLOCK_H_
#define BENCHMARK_CYCLECLOCK_H_
#include <stdint.h>
#if defined(OS_MACOSX)
# include <mach/mach_time.h>
#endif
// For MSVC, we want to use '_asm rdtsc' when possible (since it works
// with even ancient MSVC compilers), and when not possible the
// __rdtsc intrinsic, declared in <intrin.h>. Unfortunately, in some
// environments, <windows.h> and <intrin.h> have conflicting
// declarations of some other intrinsics, breaking compilation.
// Therefore, we simply declare __rdtsc ourselves. See also
// http://connect.microsoft.com/VisualStudio/feedback/details/262047
#if defined(COMPILER_MSVC) && !defined(_M_IX86)
extern "C" uint64_t __rdtsc();
#pragma intrinsic(__rdtsc)
#endif
#include <sys/time.h>
// NOTE: only i386 and x86_64 have been well tested.
// PPC, sparc, alpha, and ia64 are based on
// http://peter.kuscsik.com/wordpress/?p=14
// with modifications by m3b. See also
// https://setisvn.ssl.berkeley.edu/svn/lib/fftw-3.0.1/kernel/cycle.h
struct CycleClock {
// This should return the number of cycles since power-on. Thread-safe.
static inline int64_t Now() {
#if defined(OS_MACOSX)
// this goes at the top because we need ALL Macs, regardless of
// architecture, to return the number of "mach time units" that
// have passed since startup. See sysinfo.cc where
// InitializeSystemInfo() sets the supposed cpu clock frequency of
// macs to the number of mach time units per second, not actual
// CPU clock frequency (which can change in the face of CPU
// frequency scaling). Also note that when the Mac sleeps, this
// counter pauses; it does not continue counting, nor does it
// reset to zero.
return mach_absolute_time();
#elif defined(__i386__)
int64_t ret;
__asm__ volatile ("rdtsc" : "=A" (ret) );
return ret;
#elif defined(__x86_64__) || defined(__amd64__)
uint64_t low, high;
__asm__ volatile ("rdtsc" : "=a" (low), "=d" (high));
return (high << 32) | low;
#elif defined(__powerpc__) || defined(__ppc__)
// This returns a time-base, which is not always precisely a cycle-count.
int64_t tbl, tbu0, tbu1;
asm("mftbu %0" : "=r" (tbu0));
asm("mftb %0" : "=r" (tbl));
asm("mftbu %0" : "=r" (tbu1));
tbl &= -static_cast<int64>(tbu0 == tbu1);
// high 32 bits in tbu1; low 32 bits in tbl (tbu0 is garbage)
return (tbu1 << 32) | tbl;
#elif defined(__sparc__)
int64_t tick;
asm(".byte 0x83, 0x41, 0x00, 0x00");
asm("mov %%g1, %0" : "=r" (tick));
return tick;
#elif defined(__ia64__)
int64_t itc;
asm("mov %0 = ar.itc" : "=r" (itc));
return itc;
#elif defined(COMPILER_MSVC) && defined(_M_IX86)
// Older MSVC compilers (like 7.x) don't seem to support the
// __rdtsc intrinsic properly, so I prefer to use _asm instead
// when I know it will work. Otherwise, I'll use __rdtsc and hope
// the code is being compiled with a non-ancient compiler.
_asm rdtsc
#elif defined(COMPILER_MSVC)
return __rdtsc();
#elif defined(ARMV3)
#if defined(ARMV6) // V6 is the earliest arch that has a standard cyclecount
uint32_t pmccntr;
uint32_t pmuseren;
uint32_t pmcntenset;
// Read the user mode perf monitor counter access permissions.
asm("mrc p15, 0, %0, c9, c14, 0" : "=r" (pmuseren));
if (pmuseren & 1) { // Allows reading perfmon counters for user mode code.
asm("mrc p15, 0, %0, c9, c12, 1" : "=r" (pmcntenset));
if (pmcntenset & 0x80000000ul) { // Is it counting?
asm("mrc p15, 0, %0, c9, c13, 0" : "=r" (pmccntr));
// The counter is set up to count every 64th cycle
return static_cast<int64>(pmccntr) * 64; // Should optimize to << 6
}
}
#endif
struct timeval tv;
gettimeofday(&tv, NULL);
return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
#elif defined(__mips__)
// mips apparently only allows rdtsc for superusers, so we fall
// back to gettimeofday. It's possible clock_gettime would be better.
struct timeval tv;
gettimeofday(&tv, NULL);
return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
#else
// The soft failover to a generic implementation is automatic only for ARM.
// For other platforms the developer is expected to make an attempt to create
// a fast implementation and use generic version if nothing better is available.
#error You need to define CycleTimer for your OS and CPU
#endif
}
};
#endif // BENCHMARK_CYCLECLOCK_H_
#ifndef BENCHMARK_MACROS_H_
#define BENCHMARK_MACROS_H_
#include <assert.h>
#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
TypeName(const TypeName&); \
void operator=(const TypeName&);
// The arraysize(arr) macro returns the # of elements in an array arr.
// The expression is a compile-time constant, and therefore can be
// used in defining new arrays, for example. If you use arraysize on
// a pointer by mistake, you will get a compile-time error.
//
// One caveat is that, for C++03, arraysize() doesn't accept any array of
// an anonymous type or a type defined inside a function. In these rare
// cases, you have to use the unsafe ARRAYSIZE() macro below. This is
// due to a limitation in C++03's template system. The limitation has
// been removed in C++11.
// This template function declaration is used in defining arraysize.
// Note that the function doesn't need an implementation, as we only
// use its type.
template <typename T, size_t N>
char (&ArraySizeHelper(T (&array)[N]))[N];
// That gcc wants both of these prototypes seems mysterious. VC, for
// its part, can't decide which to use (another mystery). Matching of
// template overloads: the final frontier.
#ifndef COMPILER_MSVC
template <typename T, size_t N>
char (&ArraySizeHelper(const T (&array)[N]))[N];
#endif
#define arraysize(array) (sizeof(ArraySizeHelper(array)))
// The STATIC_ASSERT macro can be used to verify that a compile time
// expression is true. For example, you could use it to verify the
// size of a static array:
//
// STATIC_ASSERT(ARRAYSIZE(content_type_names) == CONTENT_NUM_TYPES,
// content_type_names_incorrect_size);
//
// or to make sure a struct is smaller than a certain size:
//
// STATIC_ASSERT(sizeof(foo) < 128, foo_too_large);
//
// The second argument to the macro is the name of the variable. If
// the expression is false, most compilers will issue a warning/error
// containing the name of the variable.
template <bool>
struct StaticAssert {
};
#define STATIC_ASSERT(expr, msg) \
typedef StaticAssert<(bool(expr))> msg[bool(expr) ? 1 : -1]
// Implementation details of STATIC_ASSERT:
//
// - STATIC_ASSERT works by defining an array type that has -1
// elements (and thus is invalid) when the expression is false.
//
// - The simpler definition
//
// #define STATIC_ASSERT(expr, msg) typedef char msg[(expr) ? 1 : -1]
//
// does not work, as gcc supports variable-length arrays whose sizes
// are determined at run-time (this is gcc's extension and not part
// of the C++ standard). As a result, gcc fails to reject the
// following code with the simple definition:
//
// int foo;
// STATIC_ASSERT(foo, msg); // not supposed to compile as foo is
// // not a compile-time constant.
//
// - By using the type StaticAssert<(bool(expr))>, we ensures that
// expr is a compile-time constant. (Template arguments must be
// determined at compile-time.)
//
// - The outer parentheses in StaticAssert<(bool(expr))> are necessary
// to work around a bug in gcc 3.4.4 and 4.0.1. If we had written
//
// StaticAssert<bool(expr)>
//
// instead, these compilers will refuse to compile
//
// STATIC_ASSERT(5 > 0, some_message);
//
// (They seem to think the ">" in "5 > 0" marks the end of the
// template argument list.)
//
// - The array size is (bool(expr) ? 1 : -1), instead of simply
//
// ((expr) ? 1 : -1).
//
// This is to avoid running into a bug in MS VC 7.1, which
// causes ((0.0) ? 1 : -1) to incorrectly evaluate to 1.
#define CHECK(b) do { if (!(b)) assert(false); } while(0)
#define CHECK_EQ(a, b) CHECK((a) == (b))
#define CHECK_GE(a, b) CHECK((a) >= (b))
#define CHECK_LE(a, b) CHECK((a) <= (b))
#define CHECK_GT(a, b) CHECK((a) > (b))
#define CHECK_LT(a, b) CHECK((a) < (b))
#define ATTRIBUTE_UNUSED __attribute__ ((unused))
#endif // BENCHMARK_MACROS_H_
#ifndef BENCHMARK_MUTEX_LOCK_H_
#define BENCHMARK_MUTEX_LOCK_H_
#include <pthread.h>
class mutex_lock {
public:
explicit mutex_lock(pthread_mutex_t* mu) : mu_(mu) {
pthread_mutex_lock(mu_);
}
~mutex_lock() {
pthread_mutex_unlock(mu_);
}
private:
pthread_mutex_t* mu_;
};
#endif // BENCHMARK_MUTEX_LOCK_H_
#ifndef BENCHMARK_PORT_H_
#define BENCHMARK_PORT_H_
#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
TypeName(const TypeName&); \
void operator=(const TypeName&);
#endif // BENCHMARK_PORT_H_
#include "sleep.h"
#include <time.h>
#include <errno.h>
#ifdef OS_WINDOWS
// Window's _sleep takes milliseconds argument.
void SleepForMilliseconds(int milliseconds) {
_sleep(milliseconds);
}
void SleepForSeconds(double seconds) {
SleepForMilliseconds(static_cast<int>(seconds * 1000));
}
#else // OS_WINDOWS
static const int64_t kNumMillisPerSecond = 1000LL;
static const int64_t kNumMicrosPerMilli = 1000LL;
static const int64_t kNumMicrosPerSecond = kNumMillisPerSecond * 1000LL;
static const int64_t kNumNanosPerMicro = 1000LL;
void SleepForMicroseconds(int64_t microseconds) {
struct timespec sleep_time;
sleep_time.tv_sec = microseconds / kNumMicrosPerSecond;
sleep_time.tv_nsec = (microseconds % kNumMicrosPerSecond) * kNumNanosPerMicro;
while (nanosleep(&sleep_time, &sleep_time) != 0 && errno == EINTR)
; // Ignore signals and wait for the full interval to elapse.
}
void SleepForMilliseconds(int milliseconds) {
SleepForMicroseconds(static_cast<int64_t>(milliseconds) * kNumMicrosPerMilli);
}
void SleepForSeconds(double seconds) {
SleepForMicroseconds(static_cast<int64_t>(seconds * kNumMicrosPerSecond));
}
#endif // OS_WINDOWS
#ifndef BENCHMARK_SLEEP_H_
#define BENCHMARK_SLEEP_H_
#include <stdint.h>
void SleepForMicroseconds(int64_t microseconds);
void SleepForMilliseconds(int milliseconds);
void SleepForSeconds(double seconds);
#endif // BENCHMARK_SLEEP_H_
#ifndef BENCHMARK_STAT_H_
#define BENCHMARK_STAT_H_
#include <math.h>
#include <iostream>
#include <limits>
template <typename VType, typename NumType>
class Stat1;
template <typename VType, typename NumType>
class Stat1MinMax;
typedef Stat1<float, float> Stat1_f;
typedef Stat1<double, double> Stat1_d;
typedef Stat1MinMax<float, float> Stat1MinMax_f;
typedef Stat1MinMax<double, double> Stat1MinMax_d;
template <typename VType> class Vector2;
template <typename VType> class Vector3;
template <typename VType> class Vector4;
template <typename VType, typename NumType>
class Stat1 {
public:
typedef Stat1<VType, NumType> Self;
Stat1() {
Clear();
}
void Clear() {
numsamples_ = NumType();
sum_squares_ = sum_ = VType();
}
// Create a sample of value dat and weight 1
explicit Stat1(const VType &dat) {
sum_ = dat;
sum_squares_ = Sqr(dat);
numsamples_ = 1;
}
// Create statistics for all the samples between begin (included)
// and end(excluded)
explicit Stat1(const VType *begin, const VType *end) {
Clear();
for ( const VType *item = begin; item < end; ++item ) {
(*this) += Stat1(*item);
}
}
// Create a sample of value dat and weight w
Stat1(const VType &dat, const NumType &w) {
sum_ = w * dat;
sum_squares_ = w * Sqr(dat);
numsamples_ = w;
}
// Copy operator
Stat1(const Self &stat) {
sum_ = stat.sum_;
sum_squares_ = stat.sum_squares_;
numsamples_ = stat.numsamples_;
}
inline Self &operator =(const Self &stat) {
sum_ = stat.sum_;
sum_squares_ = stat.sum_squares_;
numsamples_ = stat.numsamples_;
return (*this);
}
// Merge statistics from two sample sets.
inline Self &operator +=(const Self &stat) {
sum_ += stat.sum_;
sum_squares_+= stat.sum_squares_;
numsamples_ += stat.numsamples_;
return (*this);
}
// The operation opposite to +=
inline Self &operator -=(const Self &stat) {
sum_ -= stat.sum_;
sum_squares_-= stat.sum_squares_;
numsamples_ -= stat.numsamples_;
return (*this);
}
// Multiply the weight of the set of samples by a factor k
inline Self &operator *=(const VType &k) {
sum_ *= k;
sum_squares_*= k;
numsamples_ *= k;
return (*this);
}
// Merge statistics from two sample sets.
inline Self operator + (const Self &stat) const {
return Self(*this) += stat;
}
// The operation opposite to +
inline Self operator - (const Self &stat) const {
return Self(*this) -= stat;
}
// Multiply the weight of the set of samples by a factor k
inline Self operator * (const VType &k) const {
return Self(*this) *= k;
}
// Return the total weight of this sample set
NumType NumSamples() const {
return numsamples_;
}
// Return the sum of this sample set
VType Sum() const {
return sum_;
}
// Return the mean of this sample set
VType Mean() const {
if (numsamples_ == 0) return VType();
return sum_ * (1.0 / numsamples_);
}
// Return the mean of this sample set and compute the standard deviation at
// the same time.
VType Mean(VType *stddev) const {
if (numsamples_ == 0) return VType();
VType mean = sum_ * (1.0 / numsamples_);
if (stddev) {
VType avg_squares = sum_squares_ * (1.0 / numsamples_);
*stddev = Sqrt(avg_squares - Sqr(mean));
}
return mean;
}
// Return the standard deviation of the sample set
VType StdDev() const {
if (numsamples_ == 0) return VType();
VType mean = Mean();
VType avg_squares = sum_squares_ * (1.0 / numsamples_);
return Sqrt(avg_squares - Sqr(mean));
}
private:
// Let i be the index of the samples provided (using +=)
// and weight[i],value[i] be the data of sample #i
// then the variables have the following meaning:
NumType numsamples_; // sum of weight[i];
VType sum_; // sum of weight[i]*value[i];
VType sum_squares_; // sum of weight[i]*value[i]^2;
// Template function used to square a number.
// For a vector we square all components
template <typename SType>
static inline SType Sqr(const SType &dat) {
return dat * dat;
}
template <typename SType>
static inline Vector2<SType> Sqr(const Vector2<SType> &dat) {
return dat.MulComponents(dat);
}
template <typename SType>
static inline Vector3<SType> Sqr(const Vector3<SType> &dat) {
return dat.MulComponents(dat);
}
template <typename SType>
static inline Vector4<SType> Sqr(const Vector4<SType> &dat) {
return dat.MulComponents(dat);
}
// Template function used to take the square root of a number.
// For a vector we square all components
template <typename SType>
static inline SType Sqrt(const SType &dat) {
// Avoid NaN due to imprecision in the calculations
if ( dat < 0 )
return 0;
return sqrt(dat);
}
template <typename SType>
static inline Vector2<SType> Sqrt(const Vector2<SType> &dat) {
// Avoid NaN due to imprecision in the calculations
return Max(dat, Vector2<SType>()).Sqrt();
}
template <typename SType>
static inline Vector3<SType> Sqrt(const Vector3<SType> &dat) {
// Avoid NaN due to imprecision in the calculations
return Max(dat, Vector3<SType>()).Sqrt();
}
template <typename SType>
static inline Vector4<SType> Sqrt(const Vector4<SType> &dat) {
// Avoid NaN due to imprecision in the calculations
return Max(dat, Vector4<SType>()).Sqrt();
}
};
// Useful printing function
template <typename VType, typename NumType>
inline std::ostream& operator<<(std::ostream& out,
const Stat1<VType, NumType>& s) {
out << "{ avg = " << s.Mean()
<< " std = " << s.StdDev()
<< " nsamples = " << s.NumSamples() << "}";
return out;
}
// Stat1MinMax: same as Stat1, but it also
// keeps the Min and Max values; the "-"
// operator is disabled because it cannot be implemented
// efficiently
template <typename VType, typename NumType>
class Stat1MinMax : public Stat1<VType, NumType> {
public:
typedef Stat1MinMax<VType, NumType> Self;
Stat1MinMax() {
Clear();
}
void Clear() {
Stat1<VType, NumType>::Clear();
if (std::numeric_limits<VType>::has_infinity) {
min_ = std::numeric_limits<VType>::infinity();
max_ = -std::numeric_limits<VType>::infinity();
} else {
min_ = std::numeric_limits<VType>::max();
max_ = std::numeric_limits<VType>::min();
}
}
// Create a sample of value dat and weight 1
explicit Stat1MinMax(const VType &dat) : Stat1<VType, NumType>(dat) {
max_ = dat;
min_ = dat;
}
// Create statistics for all the samples between begin (included)
// and end(excluded)
explicit Stat1MinMax(const VType *begin, const VType *end) {
Clear();
for ( const VType *item = begin; item < end; ++item ) {
(*this) += Stat1MinMax(*item);
}
}
// Create a sample of value dat and weight w
Stat1MinMax(const VType &dat, const NumType &w)
: Stat1<VType, NumType>(dat, w) {
max_ = dat;
min_ = dat;
}
// Copy operator
Stat1MinMax(const Self &stat) : Stat1<VType, NumType>(stat) {
max_ = stat.max_;
min_ = stat.min_;
}
inline Self &operator =(const Self &stat) {
this->Stat1<VType, NumType>::operator=(stat);
max_ = stat.max_;
min_ = stat.min_;
return (*this);
}
// Merge statistics from two sample sets.
inline Self &operator +=(const Self &stat) {
this->Stat1<VType, NumType>::operator+=(stat);
if (stat.max_ > max_) max_ = stat.max_;
if (stat.min_ < min_) min_ = stat.min_;
return (*this);
}
// Multiply the weight of the set of samples by a factor k
inline Self &operator *=(const VType &stat) {
this->Stat1<VType, NumType>::operator*=(stat);
return (*this);
}
// Merge statistics from two sample sets.
inline Self operator + (const Self &stat) const {
return Self(*this) += stat;
}
// Multiply the weight of the set of samples by a factor k
inline Self operator * (const VType &k) const {
return Self(*this) *= k;
}
private:
// The - operation makes no sense with Min/Max
// unless we keep the full list of values (but we don't)
// make it private, and let it undefined so nobody can call it
Self &operator -=(const Self &stat); // senseless. let it undefined.
// The operation opposite to -
Self operator - (const Self &stat) const; // senseless. let it undefined.
public:
// Return the maximal value in this sample set
VType Max() const {
return max_;
}
// Return the minimal value in this sample set
VType Min() const {
return min_;
}
private:
// Let i be the index of the samples provided (using +=)
// and weight[i],value[i] be the data of sample #i
// then the variables have the following meaning:
VType max_; // max of value[i]
VType min_; // min of value[i]
};
// Useful printing function
template <typename VType, typename NumType>
inline std::ostream& operator <<(std::ostream& out,
const Stat1MinMax<VType, NumType>& s) {
out << "{ avg = " << s.Mean()
<< " std = " << s.StdDev()
<< " nsamples = " << s.NumSamples()
<< " min = " << s.Min()
<< " max = " << s.Max() << "}";
return out;
}
#endif // BENCHMARK_STAT_H_
This diff is collapsed. Click to expand it.
#ifndef BENCHMARK_SYSINFO_H_
#define BENCHMARK_SYSINFO_H_
double MyCPUUsage();
double ChildrenCPUUsage();
int NumCPUs();
double CyclesPerSecond();
#endif // BENCHMARK_SYSINFO_H_
#include "walltime.h"
#include <stdio.h>
#include <string.h>
#include <sys/time.h>
#include <time.h>
#include <atomic>
#include <limits>
#include "cycleclock.h"
#include "macros.h"
#include "sysinfo.h"
namespace walltime {
namespace {
const double kMaxErrorInterval = 100e-6;
std::atomic<bool> initialized(false);
WallTime base_walltime = 0.0;
int64_t base_cycletime = 0;
int64_t cycles_per_second;
double seconds_per_cycle;
uint32_t last_adjust_time = 0;
std::atomic<int32_t> drift_adjust(0);
int64_t max_interval_cycles = 0;
// Helper routines to load/store a float from an AtomicWord. Required because
// g++ < 4.7 doesn't support std::atomic<float> correctly. I cannot wait to get
// rid of this horror show.
inline void SetDrift(float f) {
int32_t w;
memcpy(&w, &f, sizeof(f));
std::atomic_store(&drift_adjust, w);
}
inline float GetDrift() {
float f;
int32_t w = std::atomic_load(&drift_adjust);
memcpy(&f, &w, sizeof(f));
return f;
}
static_assert(sizeof(float) <= sizeof(int32_t),
"type sizes don't allow the drift_adjust hack");
WallTime Slow() {
struct timeval tv;
gettimeofday(&tv, NULL);
return tv.tv_sec + tv.tv_usec * 1e-6;
}
bool SplitTimezone(WallTime value, bool local, struct tm* t,
double* subsecond) {
memset(t, 0, sizeof(*t));
if ((value < 0) || (value > std::numeric_limits<time_t>::max())) {
*subsecond = 0.0;
return false;
}
const time_t whole_time = static_cast<time_t>(value);
*subsecond = value - whole_time;
if (local)
localtime_r(&whole_time, t);
else
gmtime_r(&whole_time, t);
return true;
}
} // end namespace
// This routine should be invoked to initialize walltime.
// It is not intended for general purpose use.
void Initialize() {
CHECK(!std::atomic_load(&initialized));
cycles_per_second = static_cast<int64_t>(CyclesPerSecond());
CHECK(cycles_per_second != 0);
seconds_per_cycle = 1.0 / cycles_per_second;
max_interval_cycles = static_cast<int64_t>(
cycles_per_second * kMaxErrorInterval);
do {
base_cycletime = CycleClock::Now();
base_walltime = Slow();
} while (CycleClock::Now() - base_cycletime > max_interval_cycles);
// We are now sure that "base_walltime" and "base_cycletime" were produced
// within kMaxErrorInterval of one another.
SetDrift(0.0);
last_adjust_time = static_cast<uint32_t>(uint64_t(base_cycletime) >> 32);
std::atomic_store(&initialized, true);
}
WallTime Now() {
if (!std::atomic_load(&initialized))
return Slow();
WallTime now = 0.0;
WallTime result = 0.0;
int64_t ct = 0;
uint32_t top_bits = 0;
do {
ct = CycleClock::Now();
int64_t cycle_delta = ct - base_cycletime;
result = base_walltime + cycle_delta * seconds_per_cycle;
top_bits = static_cast<uint32_t>(uint64_t(ct) >> 32);
// Recompute drift no more often than every 2^32 cycles.
// I.e., @2GHz, ~ every two seconds
if (top_bits == last_adjust_time) { // don't need to recompute drift
return result + GetDrift();
}
now = Slow();
} while (CycleClock::Now() - ct > max_interval_cycles);
// We are now sure that "now" and "result" were produced within
// kMaxErrorInterval of one another.
SetDrift(now - result);
last_adjust_time = top_bits;
return now;
}
const char* Print(WallTime time, const char *format, bool local,
char* storage, int *remainder_us) {
struct tm split;
double subsecond;
if (!SplitTimezone(time, local, &split, &subsecond)) {
snprintf(storage, sizeof(storage), "Invalid time: %f", time);
} else {
if (remainder_us != NULL) {
*remainder_us = static_cast<int>((subsecond * 1000000) + 0.5);
if (*remainder_us > 999999) *remainder_us = 999999;
if (*remainder_us < 0) *remainder_us = 0;
}
strftime(storage, sizeof(storage), format, &split);
}
return storage;
}
} // end namespace walltime
#ifndef BENCHMARK_WALLTIME_H_
#define BENCHMARK_WALLTIME_H_
typedef double WallTime;
namespace walltime {
void Initialize();
WallTime Now();
// GIVEN: walltime, generic format string (as understood by strftime),
// a boolean flag specifying if the time is local or UTC (true=local).
// RETURNS: the formatted string. ALSO RETURNS: the storage printbuffer
// passed and the remaining number of microseconds (never printed in
// the string since strftime does not understand it)
const char* Print(WallTime time, const char *format, bool local,
char* storage, int *remainder_us);
} // end namespace walltime
#endif // BENCHMARK_WALLTIME_H_
#include "benchmark/benchmark.h"
#include <math.h>
#include <stdint.h>
#include <limits>
#include <list>
#include <map>
#include <set>
#include <sstream>
#include <vector>
namespace {
int ATTRIBUTE_NOINLINE Factorial(uint32_t n) {
return (n == 1) ? 1 : n * Factorial(n - 1);
}
double CalculatePi(int depth) {
double pi = 0.0;
for (int i = 0; i < depth; ++i) {
double numerator = static_cast<double>(((i % 2) * 2) - 1);
double denominator = static_cast<double>((2 * i) - 1);
pi += numerator / denominator;
}
return (pi - 1.0) * 4;
}
std::set<int> ConstructRandomSet(int size) {
std::set<int> s;
for (int i = 0; i < size; ++i)
s.insert(i);
return s;
}
static std::vector<int>* test_vector = NULL;
} // end namespace
#ifdef DEBUG
static void BM_Factorial(benchmark::State& state) {
int fac_42 = 0;
while (state.KeepRunning())
fac_42 = Factorial(8);
// Prevent compiler optimizations
CHECK(fac_42 != std::numeric_limits<int>::max());
}
BENCHMARK(BM_Factorial);
#endif
static void BM_CalculatePiRange(benchmark::State& state) {
double pi = 0.0;
while (state.KeepRunning())
pi = CalculatePi(state.range_x());
std::stringstream ss;
ss << pi;
state.SetLabel(ss.str());
}
BENCHMARK_RANGE(BM_CalculatePiRange, 1, 1024 * 1024);
static void BM_CalculatePi(benchmark::State& state) {
static const int depth = 1024;
double pi ATTRIBUTE_UNUSED = 0.0;
while (state.KeepRunning()) {
pi = CalculatePi(depth);
}
}
BENCHMARK(BM_CalculatePi)->Threads(8);
BENCHMARK(BM_CalculatePi)->ThreadRange(1, 32);
BENCHMARK(BM_CalculatePi)->ThreadPerCpu();
static void BM_SetInsert(benchmark::State& state) {
while (state.KeepRunning()) {
state.PauseTiming();
std::set<int> data = ConstructRandomSet(state.range_x());
state.ResumeTiming();
for (int j = 0; j < state.range_y(); ++j)
data.insert(rand());
}
}
BENCHMARK(BM_SetInsert)->RangePair(1<<10,8<<10, 1,10);
template<typename Q>
static void BM_Sequential(benchmark::State& state) {
Q q;
typename Q::value_type v;
while (state.KeepRunning())
for (int i = state.range_x(); --i; )
q.push_back(v);
const int64_t items_processed =
static_cast<int64_t>(state.iterations()) * state.range_x();
state.SetItemsProcessed(items_processed);
state.SetBytesProcessed(items_processed * sizeof(v));
}
BENCHMARK_TEMPLATE(BM_Sequential, std::vector<int>)->Range(1 << 0, 1 << 10);
BENCHMARK_TEMPLATE(BM_Sequential, std::list<int>)->Range(1 << 0, 1 << 10);
static void BM_StringCompare(benchmark::State& state) {
std::string s1(state.range_x(), '-');
std::string s2(state.range_x(), '-');
int r = 0;
while (state.KeepRunning())
r |= s1.compare(s2);
// Prevent compiler optimizations
CHECK(r != std::numeric_limits<int>::max());
}
BENCHMARK(BM_StringCompare)->Range(1, 1<<20);
static void BM_SetupTeardown(benchmark::State& state) {
if (state.thread_index == 0)
test_vector = new std::vector<int>();
while (state.KeepRunning())
test_vector->push_back(0);
if (state.thread_index == 0) {
delete test_vector;
test_vector = NULL;
}
}
BENCHMARK(BM_SetupTeardown);
static void BM_LongTest(benchmark::State& state) {
double tracker = 0.0;
while (state.KeepRunning())
for (int i = 0; i < state.range_x(); ++i)
tracker += i;
CHECK(tracker != 0.0);
}
BENCHMARK(BM_LongTest)->Range(1<<16,1<<28);
int main(int argc, const char* argv[]) {
benchmark::Initialize(&argc, argv);
CHECK(Factorial(8) == 40320);
CHECK(CalculatePi(1) == 0.0);
benchmark::RunSpecifiedBenchmarks();
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment