Initial commit

Benchmark library builds and runs but only single-threaded. Multithreaded support needs a bit more love. Currently requires some C++11 support (g++ 4.6.3 seems to work).

Initial commit
403f3544 · Dominic Hamon · 403f3544 · 403f3544 · 403f3544 · 403f3544
Commit 403f3544 authored Dec 18, 2013 by Dominic Hamon
21 changed files
--- a/.gitignore
+++ b/.gitignore
+CMakeCache.txt
+CMakeFiles/
+Makefile
+bin/
+cmake_install.cmake
+lib/
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
+cmake_minimum_required (VERSION 2.8)
+project (benchmark)
+find_package(Threads)
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}/bin)
+set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}/lib)
+set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}/lib)
+set(CMAKE_CXX_FLAGS "-Wall -Werror --std=c++0x")
+set(CMAKE_CXX_FLAGS_DEBUG "-g -O0 -DDEBUG")
+set(CMAKE_CXX_FLAGS_RELEASE "-fno-strict-aliasing -O3 -DNDEBUG")
+# Set OS
+if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
+	add_definitions(-DOS_MACOSX)
+endif()
+if(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
+	add_definitions(-DOS_LINUX)
+endif()
+if(${CMAKE_SYSTEM_NAME} MATCHES "Windows")
+	add_definitions(-DOS_WINDOWS)
+endif()
+# Set CPU
+if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "x86")
+	add_definitions(-DARCH_X86)
+endif()
+# Set up directories
+include_directories(${PROJECT_SOURCE_DIR}/include)
+include_directories(${PROJECT_SOURCE_DIR}/src)
+link_directories(${PROJECT_SOURCE_DIR}/lib)
+# Build the targets
+FILE(GLOB SOURCE_FILES "src/*.cc")
+add_library(benchmark STATIC ${SOURCE_FILES})
+add_executable(benchmark_test test/benchmark_test.cc)
+target_link_libraries(benchmark_test benchmark ${CMAKE_THREAD_LIBS_INIT})
--- a/include/benchmark/benchmark.h
+++ b/include/benchmark/benchmark.h
--- a/include/benchmark/macros.h
+++ b/include/benchmark/macros.h
+#ifndef BENCHMARK_MACROS_H_
+#define BENCHMARK_MACROS_H_
+#include <assert.h>
+#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
+  TypeName(const TypeName&);               \
+  void operator=(const TypeName&);
+// The arraysize(arr) macro returns the # of elements in an array arr.
+// The expression is a compile-time constant, and therefore can be
+// used in defining new arrays, for example.  If you use arraysize on
+// a pointer by mistake, you will get a compile-time error.
+//
+// One caveat is that, for C++03, arraysize() doesn't accept any array of
+// an anonymous type or a type defined inside a function.  In these rare
+// cases, you have to use the unsafe ARRAYSIZE() macro below.  This is
+// due to a limitation in C++03's template system.  The limitation has
+// been removed in C++11.
+// This template function declaration is used in defining arraysize.
+// Note that the function doesn't need an implementation, as we only
+// use its type.
+template <typename T, size_t N>
+char (&ArraySizeHelper(T (&array)[N]))[N];
+// That gcc wants both of these prototypes seems mysterious. VC, for
+// its part, can't decide which to use (another mystery). Matching of
+// template overloads: the final frontier.
+#ifndef COMPILER_MSVC
+template <typename T, size_t N>
+char (&ArraySizeHelper(const T (&array)[N]))[N];
+#endif
+#define arraysize(array) (sizeof(ArraySizeHelper(array)))
+// The STATIC_ASSERT macro can be used to verify that a compile time
+// expression is true. For example, you could use it to verify the
+// size of a static array:
+//
+//   STATIC_ASSERT(ARRAYSIZE(content_type_names) == CONTENT_NUM_TYPES,
+//                  content_type_names_incorrect_size);
+//
+// or to make sure a struct is smaller than a certain size:
+//
+//   STATIC_ASSERT(sizeof(foo) < 128, foo_too_large);
+//
+// The second argument to the macro is the name of the variable. If
+// the expression is false, most compilers will issue a warning/error
+// containing the name of the variable.
+template <bool>
+struct StaticAssert {
+};
+#define STATIC_ASSERT(expr, msg) \
+  typedef StaticAssert<(bool(expr))> msg[bool(expr) ? 1 : -1]
+// Implementation details of STATIC_ASSERT:
+//
+// - STATIC_ASSERT works by defining an array type that has -1
+//   elements (and thus is invalid) when the expression is false.
+//
+// - The simpler definition
+//
+//     #define STATIC_ASSERT(expr, msg) typedef char msg[(expr) ? 1 : -1]
+//
+//   does not work, as gcc supports variable-length arrays whose sizes
+//   are determined at run-time (this is gcc's extension and not part
+//   of the C++ standard).  As a result, gcc fails to reject the
+//   following code with the simple definition:
+//
+//     int foo;
+//     STATIC_ASSERT(foo, msg); // not supposed to compile as foo is
+//                               // not a compile-time constant.
+//
+// - By using the type StaticAssert<(bool(expr))>, we ensures that
+//   expr is a compile-time constant.  (Template arguments must be
+//   determined at compile-time.)
+//
+// - The outer parentheses in StaticAssert<(bool(expr))> are necessary
+//   to work around a bug in gcc 3.4.4 and 4.0.1.  If we had written
+//
+//     StaticAssert<bool(expr)>
+//
+//   instead, these compilers will refuse to compile
+//
+//     STATIC_ASSERT(5 > 0, some_message);
+//
+//   (They seem to think the ">" in "5 > 0" marks the end of the
+//   template argument list.)
+//
+// - The array size is (bool(expr) ? 1 : -1), instead of simply
+//
+//     ((expr) ? 1 : -1).
+//
+//   This is to avoid running into a bug in MS VC 7.1, which
+//   causes ((0.0) ? 1 : -1) to incorrectly evaluate to 1.
+#define CHECK(b) do { if (!(b)) assert(false); } while(0)
+#define CHECK_EQ(a, b) CHECK((a) == (b))
+#define CHECK_GE(a, b) CHECK((a) >= (b))
+#define CHECK_LE(a, b) CHECK((a) <= (b))
+#define CHECK_GT(a, b) CHECK((a) > (b))
+#define CHECK_LT(a, b) CHECK((a) < (b))
+//
+// Prevent the compiler from complaining about or optimizing away variables
+// that appear unused.
+#define ATTRIBUTE_UNUSED __attribute__ ((unused))
+//
+// For functions we want to force inline or not inline.
+// Introduced in gcc 3.1.
+#define ATTRIBUTE_ALWAYS_INLINE  __attribute__ ((always_inline))
+#define HAVE_ATTRIBUTE_ALWAYS_INLINE 1
+#define ATTRIBUTE_NOINLINE __attribute__ ((noinline))
+#define HAVE_ATTRIBUTE_NOINLINE 1
+#endif  // BENCHMARK_MACROS_H_
--- a/src/benchmark.cc
+++ b/src/benchmark.cc
--- a/src/colorprint.cc
+++ b/src/colorprint.cc
+#include "colorprint.h"
+#include <stdarg.h>
+#include "commandlineflags.h"
+DECLARE_bool(color_print);
+namespace {
+#ifdef OS_WINDOWS
+typedef WORD PlatformColorCode;
+#else
+typedef const char* PlatformColorCode;
+#endif
+PlatformColorCode GetPlatformColorCode(LogColor color) {
+#ifdef OS_WINDOWS
+  switch (color) {
+    case COLOR_RED:     return FOREGROUND_RED;
+    case COLOR_GREEN:   return FOREGROUND_GREEN;
+    case COLOR_YELLOW:  return FOREGROUND_RED | FOREGROUND_GREEN;
+    case COLOR_BLUE:    return FOREGROUND_BLUE;
+    case COLOR_MAGENTA: return FOREGROUND_BLUE | FOREGROUND_RED;
+    case COLOR_CYAN:    return FOREGROUND_BLUE | FOREGROUND_GREEN;
+    case COLOR_WHITE:   // fall through to default
+    default:            return 0;
+  }
+#else
+  switch (color) {
+    case COLOR_RED:     return "1";
+    case COLOR_GREEN:   return "2";
+    case COLOR_YELLOW:  return "3";
+    case COLOR_BLUE:    return "4";
+    case COLOR_MAGENTA: return "5";
+    case COLOR_CYAN:    return "6";
+    case COLOR_WHITE:   return "7";
+    default:            return NULL;
+  };
+#endif
+}
+}  // end namespace
+void ColorPrintf(LogColor color, const char* fmt, ...) {
+  va_list args;
+  va_start(args, fmt);
+  if (!FLAGS_color_print) {
+    vprintf(fmt, args);
+    va_end(args);
+    return;
+  }
+#ifdef OS_WINDOWS
+  const HANDLE stdout_handle = GetStdHandle(STD_OUTPUT_HANDLE);
+  // Gets the current text color.
+  CONSOLE_SCREEN_BUFFER_INFO buffer_info;
+  GetConsoleScreenBufferInfo(stdout_handle, &buffer_info);
+  const WORD old_color_attrs = buffer_info.wAttributes;
+  // We need to flush the stream buffers into the console before each
+  // SetConsoleTextAttribute call lest it affect the text that is already
+  // printed but has not yet reached the console.
+  fflush(stdout);
+  SetConsoleTextAttribute(stdout_handle,
+                          GetPlatformColorCode(color) | FOREGROUND_INTENSITY);
+  vprintf(fmt, args);
+  fflush(stdout);
+  // Restores the text color.
+  SetConsoleTextAttribute(stdout_handle, old_color_attrs);
+#else
+  const char* color_code = GetPlatformColorCode(color);
+  if (color_code)
+    fprintf(stdout, "\033[0;3%sm", color_code);
+  vprintf(fmt, args);
+  printf("\033[m");  // Resets the terminal to default.
+#endif
+  va_end(args);
+}
--- a/src/colorprint.h
+++ b/src/colorprint.h
+#ifndef BENCHMARK_COLORPRINT_H_
+#define BENCHMARK_COLORPRINT_H_
+enum LogColor {
+  COLOR_DEFAULT,
+  COLOR_RED,
+  COLOR_GREEN,
+  COLOR_YELLOW,
+  COLOR_BLUE,
+  COLOR_MAGENTA,
+  COLOR_CYAN,
+  COLOR_WHITE
+};
+void ColorPrintf(LogColor color, const char* fmt, ...);
+#endif  // BENCHMARK_COLORPRINT_H_
--- a/src/commandlineflags.cc
+++ b/src/commandlineflags.cc
+#include "commandlineflags.h"
+#include <string.h>
+#include <iostream>
+#include <limits>
+namespace benchmark {
+// Parses 'str' for a 32-bit signed integer.  If successful, writes
+// the result to *value and returns true; otherwise leaves *value
+// unchanged and returns false.
+bool ParseInt32(const std::string& src_text, const char* str, int32_t* value) {
+  // Parses the environment variable as a decimal integer.
+  char* end = NULL;
+  const long long_value = strtol(str, &end, 10);  // NOLINT
+  // Has strtol() consumed all characters in the string?
+  if (*end != '\0') {
+    // No - an invalid character was encountered.
+    std::cerr << src_text << " is expected to be a 32-bit integer, "
+              << "but actually has value \"" << str << "\".\n";
+    return false;
+  }
+  // Is the parsed value in the range of an Int32?
+  const int32_t result = static_cast<int32_t>(long_value);
+  if (long_value == std::numeric_limits<long>::max() ||
+      long_value == std::numeric_limits<long>::min() ||
+      // The parsed value overflows as a long.  (strtol() returns
+      // LONG_MAX or LONG_MIN when the input overflows.)
+      result != long_value
+      // The parsed value overflows as an Int32.
+      ) {
+    std::cerr << src_text << " is expected to be a 32-bit integer, "
+              << "but actually has value \"" << str << "\", "
+              << "which overflows.\n";
+    return false;
+  }
+  *value = result;
+  return true;
+}
+// Parses 'str' for a double.  If successful, writes the result to *value and
+// returns true; otherwise leaves *value unchanged and returns false.
+bool ParseDouble(const std::string& src_text, const char* str, double* value) {
+  // Parses the environment variable as a decimal integer.
+  char* end = NULL;
+  const double double_value = strtod(str, &end);  // NOLINT
+  // Has strtol() consumed all characters in the string?
+  if (*end != '\0') {
+    // No - an invalid character was encountered.
+    std::cerr << src_text << " is expected to be a double, "
+              << "but actually has value \"" << str << "\".\n";
+    return false;
+  }
+  *value = double_value;
+  return true;
+}
+inline const char* GetEnv(const char* name) {
+#if GTEST_OS_WINDOWS_MOBILE
+  // We are on Windows CE, which has no environment variables.
+  return NULL;
+#elif defined(__BORLANDC__) || defined(__SunOS_5_8) || defined(__SunOS_5_9)
+  // Environment variables which we programmatically clear will be set to the
+  // empty string rather than unset (NULL).  Handle that case.
+  const char* const env = getenv(name);
+  return (env != NULL && env[0] != '\0') ? env : NULL;
+#else
+  return getenv(name);
+#endif
+}
+// Returns the name of the environment variable corresponding to the
+// given flag.  For example, FlagToEnvVar("foo") will return
+// "BENCHMARK_FOO" in the open-source version.
+static std::string FlagToEnvVar(const char* flag) {
+  const std::string flag_str(flag);
+  std::string env_var;
+  for (size_t i = 0; i != flag_str.length(); ++i)
+    env_var += ::toupper(flag_str.c_str()[i]);
+  return "BENCHMARK_" + env_var;
+}
+// Reads and returns the Boolean environment variable corresponding to
+// the given flag; if it's not set, returns default_value.
+//
+// The value is considered true iff it's not "0".
+bool BoolFromEnv(const char* flag, bool default_value) {
+  const std::string env_var = FlagToEnvVar(flag);
+  const char* const string_value = GetEnv(env_var.c_str());
+  return string_value == NULL ?
+      default_value : strcmp(string_value, "0") != 0;
+}
+// Reads and returns a 32-bit integer stored in the environment
+// variable corresponding to the given flag; if it isn't set or
+// doesn't represent a valid 32-bit integer, returns default_value.
+int32_t Int32FromEnv(const char* flag, int32_t default_value) {
+  const std::string env_var = FlagToEnvVar(flag);
+  const char* const string_value = GetEnv(env_var.c_str());
+  if (string_value == NULL) {
+    // The environment variable is not set.
+    return default_value;
+  }
+  int32_t result = default_value;
+  if (!ParseInt32(std::string("Environment variable ") + env_var,
+                  string_value, &result)) {
+    std::cout << "The default value " << default_value << " is used.\n";
+    return default_value;
+  }
+  return result;
+}
+// Reads and returns the string environment variable corresponding to
+// the given flag; if it's not set, returns default_value.
+const char* StringFromEnv(const char* flag, const char* default_value) {
+  const std::string env_var = FlagToEnvVar(flag);
+  const char* const value = GetEnv(env_var.c_str());
+  return value == NULL ? default_value : value;
+}
+// Parses a string as a command line flag.  The string should have
+// the format "--flag=value".  When def_optional is true, the "=value"
+// part can be omitted.
+//
+// Returns the value of the flag, or NULL if the parsing failed.
+const char* ParseFlagValue(const char* str,
+                           const char* flag,
+                           bool def_optional) {
+  // str and flag must not be NULL.
+  if (str == NULL || flag == NULL) return NULL;
+  // The flag must start with "--". 
+  const std::string flag_str = std::string("--") + std::string(flag);
+  const size_t flag_len = flag_str.length();
+  if (strncmp(str, flag_str.c_str(), flag_len) != 0) return NULL;
+  // Skips the flag name.
+  const char* flag_end = str + flag_len;
+  // When def_optional is true, it's OK to not have a "=value" part.
+  if (def_optional && (flag_end[0] == '\0'))
+    return flag_end;
+  // If def_optional is true and there are more characters after the
+  // flag name, or if def_optional is false, there must be a '=' after
+  // the flag name.
+  if (flag_end[0] != '=') return NULL;
+  // Returns the string after "=".
+  return flag_end + 1;
+}
+bool ParseBoolFlag(const char* str, const char* flag, bool* value) {
+  // Gets the value of the flag as a string.
+  const char* const value_str = ParseFlagValue(str, flag, true);
+  // Aborts if the parsing failed.
+  if (value_str == NULL) return false;
+  // Converts the string value to a bool.
+  *value = !(*value_str == '0' || *value_str == 'f' || *value_str == 'F');
+  return true;
+}
+bool ParseInt32Flag(const char* str, const char* flag, int32_t* value) {
+  // Gets the value of the flag as a string.
+  const char* const value_str = ParseFlagValue(str, flag, false);
+  // Aborts if the parsing failed.
+  if (value_str == NULL) return false;
+  // Sets *value to the value of the flag.
+  return ParseInt32(std::string("The value of flag --") + flag,
+                    value_str, value);
+}
+bool ParseDoubleFlag(const char* str, const char* flag, double* value) {
+  // Gets the value of the flag as a string.
+  const char* const value_str = ParseFlagValue(str, flag, false);
+  // Aborts if the parsing failed.
+  if (value_str == NULL) return false;
+  // Sets *value to the value of the flag.
+  return ParseDouble(std::string("The value of flag --") + flag,
+                     value_str, value);
+}
+bool ParseStringFlag(const char* str, const char* flag, std::string* value) {
+  // Gets the value of the flag as a string.
+  const char* const value_str = ParseFlagValue(str, flag, false);
+  // Aborts if the parsing failed.
+  if (value_str == NULL) return false;
+  *value = value_str;
+  return true;
+}
+bool IsFlag(const char* str, const char* flag) {
+  return (ParseFlagValue(str, flag, true) != NULL);
+}
+}  // end namespace benchmark
--- a/src/commandlineflags.h
+++ b/src/commandlineflags.h
+#ifndef BENCHMARK_COMMANDLINEFLAGS_H_
+#define BENCHMARK_COMMANDLINEFLAGS_H_
+#include <stdint.h>
+#include <string>
+// Macro for referencing flags.
+#define FLAG(name) FLAGS_##name
+// Macros for declaring flags.
+#define DECLARE_bool(name) extern bool FLAG(name)
+#define DECLARE_int32(name) extern int32_t FLAG(name)
+#define DECLARE_int64(name) extern int64_t FLAG(name)
+#define DECLARE_double(name) extern double FLAG(name)
+#define DECLARE_string(name) extern std::string FLAG(name)
+// Macros for defining flags.
+#define DEFINE_bool(name, default_val, doc) bool FLAG(name) = (default_val)
+#define DEFINE_int32(name, default_val, doc) int32_t FLAG(name) = (default_val)
+#define DEFINE_int64(name, default_val, doc) int64_t FLAG(name) = (default_val)
+#define DEFINE_double(name, default_val, doc) double FLAG(name) = (default_val)
+#define DEFINE_string(name, default_val, doc) \
+    std::string FLAG(name) = (default_val)
+namespace benchmark {
+// Parses 'str' for a 32-bit signed integer.  If successful, writes the result
+// to *value and returns true; otherwise leaves *value unchanged and returns
+// false.
+bool ParseInt32(const std::string& src_text, const char* str, int32_t* value);
+// Parses a bool/Int32/string from the environment variable
+// corresponding to the given Google Test flag.
+bool BoolFromEnv(const char* flag, bool default_val);
+int32_t Int32FromEnv(const char* flag, int32_t default_val);
+double DoubleFromEnv(const char* flag, double default_val);
+const char* StringFromEnv(const char* flag, const char* default_val);
+// Parses a string for a bool flag, in the form of either
+// "--flag=value" or "--flag".
+//
+// In the former case, the value is taken as true as long as it does
+// not start with '0', 'f', or 'F'.
+//
+// In the latter case, the value is taken as true.
+//
+// On success, stores the value of the flag in *value, and returns
+// true.  On failure, returns false without changing *value.
+bool ParseBoolFlag(const char* str, const char* flag, bool* value);
+// Parses a string for an Int32 flag, in the form of
+// "--flag=value".
+//
+// On success, stores the value of the flag in *value, and returns
+// true.  On failure, returns false without changing *value.
+bool ParseInt32Flag(const char* str, const char* flag, int32_t* value);
+// Parses a string for a Double flag, in the form of
+// "--flag=value".
+//
+// On success, stores the value of the flag in *value, and returns
+// true.  On failure, returns false without changing *value.
+bool ParseDoubleFlag(const char* str, const char* flag, double* value);
+// Parses a string for a string flag, in the form of
+// "--flag=value".
+//
+// On success, stores the value of the flag in *value, and returns
+// true.  On failure, returns false without changing *value.
+bool ParseStringFlag(const char* str, const char* flag, std::string* value);
+// Returns true if the string matches the flag.
+bool IsFlag(const char* str, const char* flag);
+}  // end namespace gbenchmark
+#endif  // BENCHMARK_COMMANDLINEFLAGS_H_
--- a/src/cycleclock.h
+++ b/src/cycleclock.h
+// ----------------------------------------------------------------------
+// CycleClock
+//    A CycleClock tells you the current time in Cycles.  The "time"
+//    is actually time since power-on.  This is like time() but doesn't
+//    involve a system call and is much more precise.
+//
+// NOTE: Not all cpu/platform/kernel combinations guarantee that this
+// clock increments at a constant rate or is synchronized across all logical
+// cpus in a system.
+//
+// If you need the above guarantees, please consider using a different
+// API. There are efforts to provide an interface which provides a millisecond
+// granularity and implemented as a memory read. A memory read is generally
+// cheaper than the CycleClock for many architectures.
+//
+// Also, in some out of order CPU implementations, the CycleClock is not
+// serializing. So if you're trying to count at cycles granularity, your
+// data might be inaccurate due to out of order instruction execution.
+// ----------------------------------------------------------------------
+#ifndef BENCHMARK_CYCLECLOCK_H_
+#define BENCHMARK_CYCLECLOCK_H_
+#include <stdint.h>
+#if defined(OS_MACOSX)
+# include <mach/mach_time.h>
+#endif
+// For MSVC, we want to use '_asm rdtsc' when possible (since it works
+// with even ancient MSVC compilers), and when not possible the
+// __rdtsc intrinsic, declared in <intrin.h>.  Unfortunately, in some
+// environments, <windows.h> and <intrin.h> have conflicting
+// declarations of some other intrinsics, breaking compilation.
+// Therefore, we simply declare __rdtsc ourselves. See also
+// http://connect.microsoft.com/VisualStudio/feedback/details/262047
+#if defined(COMPILER_MSVC) && !defined(_M_IX86)
+extern "C" uint64_t __rdtsc();
+#pragma intrinsic(__rdtsc)
+#endif
+#include <sys/time.h>
+// NOTE: only i386 and x86_64 have been well tested.
+// PPC, sparc, alpha, and ia64 are based on
+//    http://peter.kuscsik.com/wordpress/?p=14
+// with modifications by m3b.  See also
+//    https://setisvn.ssl.berkeley.edu/svn/lib/fftw-3.0.1/kernel/cycle.h
+struct CycleClock {
+  // This should return the number of cycles since power-on.  Thread-safe.
+  static inline int64_t Now() {
+#if defined(OS_MACOSX)
+    // this goes at the top because we need ALL Macs, regardless of
+    // architecture, to return the number of "mach time units" that
+    // have passed since startup.  See sysinfo.cc where
+    // InitializeSystemInfo() sets the supposed cpu clock frequency of
+    // macs to the number of mach time units per second, not actual
+    // CPU clock frequency (which can change in the face of CPU
+    // frequency scaling).  Also note that when the Mac sleeps, this
+    // counter pauses; it does not continue counting, nor does it
+    // reset to zero.
+    return mach_absolute_time();
+#elif defined(__i386__)
+    int64_t ret;
+    __asm__ volatile ("rdtsc" : "=A" (ret) );
+    return ret;
+#elif defined(__x86_64__) || defined(__amd64__)
+    uint64_t low, high;
+    __asm__ volatile ("rdtsc" : "=a" (low), "=d" (high));
+    return (high << 32) | low;
+#elif defined(__powerpc__) || defined(__ppc__)
+    // This returns a time-base, which is not always precisely a cycle-count.
+    int64_t tbl, tbu0, tbu1;
+    asm("mftbu %0" : "=r" (tbu0));
+    asm("mftb  %0" : "=r" (tbl));
+    asm("mftbu %0" : "=r" (tbu1));
+    tbl &= -static_cast<int64>(tbu0 == tbu1);
+    // high 32 bits in tbu1; low 32 bits in tbl  (tbu0 is garbage)
+    return (tbu1 << 32) | tbl;
+#elif defined(__sparc__)
+    int64_t tick;
+    asm(".byte 0x83, 0x41, 0x00, 0x00");
+    asm("mov   %%g1, %0" : "=r" (tick));
+    return tick;
+#elif defined(__ia64__)
+    int64_t itc;
+    asm("mov %0 = ar.itc" : "=r" (itc));
+    return itc;
+#elif defined(COMPILER_MSVC) && defined(_M_IX86)
+    // Older MSVC compilers (like 7.x) don't seem to support the
+    // __rdtsc intrinsic properly, so I prefer to use _asm instead
+    // when I know it will work.  Otherwise, I'll use __rdtsc and hope
+    // the code is being compiled with a non-ancient compiler.
+    _asm rdtsc
+#elif defined(COMPILER_MSVC)
+    return __rdtsc();
+#elif defined(ARMV3)
+#if defined(ARMV6)  // V6 is the earliest arch that has a standard cyclecount
+    uint32_t pmccntr;
+    uint32_t pmuseren;
+    uint32_t pmcntenset;
+    // Read the user mode perf monitor counter access permissions.
+    asm("mrc p15, 0, %0, c9, c14, 0" : "=r" (pmuseren));
+    if (pmuseren & 1) {  // Allows reading perfmon counters for user mode code.
+      asm("mrc p15, 0, %0, c9, c12, 1" : "=r" (pmcntenset));
+      if (pmcntenset & 0x80000000ul) {  // Is it counting?
+        asm("mrc p15, 0, %0, c9, c13, 0" : "=r" (pmccntr));
+        // The counter is set up to count every 64th cycle
+        return static_cast<int64>(pmccntr) * 64;  // Should optimize to << 6
+      }
+    }
+#endif
+    struct timeval tv;
+    gettimeofday(&tv, NULL);
+    return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
+#elif defined(__mips__)
+    // mips apparently only allows rdtsc for superusers, so we fall
+    // back to gettimeofday.  It's possible clock_gettime would be better.
+    struct timeval tv;
+    gettimeofday(&tv, NULL);
+    return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
+#else
+// The soft failover to a generic implementation is automatic only for ARM.
+// For other platforms the developer is expected to make an attempt to create
+// a fast implementation and use generic version if nothing better is available.
+#error You need to define CycleTimer for your OS and CPU
+#endif
+  }
+};
+#endif  // BENCHMARK_CYCLECLOCK_H_
--- a/src/macros.h
+++ b/src/macros.h
+#ifndef BENCHMARK_MACROS_H_
+#define BENCHMARK_MACROS_H_
+#include <assert.h>
+#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
+  TypeName(const TypeName&);               \
+  void operator=(const TypeName&);
+// The arraysize(arr) macro returns the # of elements in an array arr.
+// The expression is a compile-time constant, and therefore can be
+// used in defining new arrays, for example.  If you use arraysize on
+// a pointer by mistake, you will get a compile-time error.
+//
+// One caveat is that, for C++03, arraysize() doesn't accept any array of
+// an anonymous type or a type defined inside a function.  In these rare
+// cases, you have to use the unsafe ARRAYSIZE() macro below.  This is
+// due to a limitation in C++03's template system.  The limitation has
+// been removed in C++11.
+// This template function declaration is used in defining arraysize.
+// Note that the function doesn't need an implementation, as we only
+// use its type.
+template <typename T, size_t N>
+char (&ArraySizeHelper(T (&array)[N]))[N];
+// That gcc wants both of these prototypes seems mysterious. VC, for
+// its part, can't decide which to use (another mystery). Matching of
+// template overloads: the final frontier.
+#ifndef COMPILER_MSVC
+template <typename T, size_t N>
+char (&ArraySizeHelper(const T (&array)[N]))[N];
+#endif
+#define arraysize(array) (sizeof(ArraySizeHelper(array)))
+// The STATIC_ASSERT macro can be used to verify that a compile time
+// expression is true. For example, you could use it to verify the
+// size of a static array:
+//
+//   STATIC_ASSERT(ARRAYSIZE(content_type_names) == CONTENT_NUM_TYPES,
+//                  content_type_names_incorrect_size);
+//
+// or to make sure a struct is smaller than a certain size:
+//
+//   STATIC_ASSERT(sizeof(foo) < 128, foo_too_large);
+//
+// The second argument to the macro is the name of the variable. If
+// the expression is false, most compilers will issue a warning/error
+// containing the name of the variable.
+template <bool>
+struct StaticAssert {
+};
+#define STATIC_ASSERT(expr, msg) \
+  typedef StaticAssert<(bool(expr))> msg[bool(expr) ? 1 : -1]
+// Implementation details of STATIC_ASSERT:
+//
+// - STATIC_ASSERT works by defining an array type that has -1
+//   elements (and thus is invalid) when the expression is false.
+//
+// - The simpler definition
+//
+//     #define STATIC_ASSERT(expr, msg) typedef char msg[(expr) ? 1 : -1]
+//
+//   does not work, as gcc supports variable-length arrays whose sizes
+//   are determined at run-time (this is gcc's extension and not part
+//   of the C++ standard).  As a result, gcc fails to reject the
+//   following code with the simple definition:
+//
+//     int foo;
+//     STATIC_ASSERT(foo, msg); // not supposed to compile as foo is
+//                               // not a compile-time constant.
+//
+// - By using the type StaticAssert<(bool(expr))>, we ensures that
+//   expr is a compile-time constant.  (Template arguments must be
+//   determined at compile-time.)
+//
+// - The outer parentheses in StaticAssert<(bool(expr))> are necessary
+//   to work around a bug in gcc 3.4.4 and 4.0.1.  If we had written
+//
+//     StaticAssert<bool(expr)>
+//
+//   instead, these compilers will refuse to compile
+//
+//     STATIC_ASSERT(5 > 0, some_message);
+//
+//   (They seem to think the ">" in "5 > 0" marks the end of the
+//   template argument list.)
+//
+// - The array size is (bool(expr) ? 1 : -1), instead of simply
+//
+//     ((expr) ? 1 : -1).
+//
+//   This is to avoid running into a bug in MS VC 7.1, which
+//   causes ((0.0) ? 1 : -1) to incorrectly evaluate to 1.
+#define CHECK(b) do { if (!(b)) assert(false); } while(0)
+#define CHECK_EQ(a, b) CHECK((a) == (b))
+#define CHECK_GE(a, b) CHECK((a) >= (b))
+#define CHECK_LE(a, b) CHECK((a) <= (b))
+#define CHECK_GT(a, b) CHECK((a) > (b))
+#define CHECK_LT(a, b) CHECK((a) < (b))
+#define ATTRIBUTE_UNUSED  __attribute__ ((unused))
+#endif  // BENCHMARK_MACROS_H_
--- a/src/mutex_lock.h
+++ b/src/mutex_lock.h
+#ifndef BENCHMARK_MUTEX_LOCK_H_
+#define BENCHMARK_MUTEX_LOCK_H_
+#include <pthread.h>
+class mutex_lock {
+ public:
+  explicit mutex_lock(pthread_mutex_t* mu) : mu_(mu) {
+    pthread_mutex_lock(mu_);
+  }
+  ~mutex_lock() {
+    pthread_mutex_unlock(mu_);
+  }
+ private:
+  pthread_mutex_t* mu_;
+};
+#endif  // BENCHMARK_MUTEX_LOCK_H_
--- a/src/port.h
+++ b/src/port.h
+#ifndef BENCHMARK_PORT_H_
+#define BENCHMARK_PORT_H_
+#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
+  TypeName(const TypeName&);               \
+  void operator=(const TypeName&);
+#endif  // BENCHMARK_PORT_H_
--- a/src/sleep.cc
+++ b/src/sleep.cc
+#include "sleep.h"
+#include <time.h>
+#include <errno.h>
+#ifdef OS_WINDOWS
+// Window's _sleep takes milliseconds argument.
+void SleepForMilliseconds(int milliseconds) {
+  _sleep(milliseconds);
+}
+void SleepForSeconds(double seconds) {
+  SleepForMilliseconds(static_cast<int>(seconds * 1000));
+}
+#else  // OS_WINDOWS
+static const int64_t kNumMillisPerSecond = 1000LL;
+static const int64_t kNumMicrosPerMilli = 1000LL;
+static const int64_t kNumMicrosPerSecond = kNumMillisPerSecond * 1000LL;
+static const int64_t kNumNanosPerMicro = 1000LL;
+void SleepForMicroseconds(int64_t microseconds) {
+  struct timespec sleep_time;
+  sleep_time.tv_sec = microseconds / kNumMicrosPerSecond;
+  sleep_time.tv_nsec = (microseconds % kNumMicrosPerSecond) * kNumNanosPerMicro;
+  while (nanosleep(&sleep_time, &sleep_time) != 0 && errno == EINTR)
+    ;  // Ignore signals and wait for the full interval to elapse.
+}
+void SleepForMilliseconds(int milliseconds) {
+  SleepForMicroseconds(static_cast<int64_t>(milliseconds) * kNumMicrosPerMilli);
+}
+void SleepForSeconds(double seconds) {
+  SleepForMicroseconds(static_cast<int64_t>(seconds * kNumMicrosPerSecond));
+}
+#endif  // OS_WINDOWS
--- a/src/sleep.h
+++ b/src/sleep.h
+#ifndef BENCHMARK_SLEEP_H_
+#define BENCHMARK_SLEEP_H_
+#include <stdint.h>
+void SleepForMicroseconds(int64_t microseconds);
+void SleepForMilliseconds(int milliseconds);
+void SleepForSeconds(double seconds);
+#endif  // BENCHMARK_SLEEP_H_
--- a/src/stat.h
+++ b/src/stat.h
+#ifndef BENCHMARK_STAT_H_
+#define BENCHMARK_STAT_H_
+#include <math.h>
+#include <iostream>
+#include <limits>
+template <typename VType, typename NumType>
+class Stat1;
+template <typename VType, typename NumType>
+class Stat1MinMax;
+typedef Stat1<float, float>  Stat1_f;
+typedef Stat1<double, double> Stat1_d;
+typedef Stat1MinMax<float, float>  Stat1MinMax_f;
+typedef Stat1MinMax<double, double> Stat1MinMax_d;
+template <typename VType> class Vector2;
+template <typename VType> class Vector3;
+template <typename VType> class Vector4;
+template <typename VType, typename NumType>
+class Stat1 {
+ public:
+  typedef Stat1<VType, NumType> Self;
+  Stat1()  {
+    Clear();
+  }
+  void Clear() {
+    numsamples_ = NumType();
+    sum_squares_ = sum_ = VType();
+  }
+  // Create a sample of value dat and weight 1
+  explicit Stat1(const VType &dat) {
+    sum_ = dat;
+    sum_squares_ = Sqr(dat);
+    numsamples_ = 1;
+  }
+  // Create statistics for all the samples between begin (included)
+  // and end(excluded)
+  explicit Stat1(const VType *begin, const VType *end) {
+    Clear();
+    for ( const VType *item = begin; item < end; ++item ) {
+      (*this) += Stat1(*item);
+    }
+  }
+  // Create a sample of value dat and weight w
+  Stat1(const VType &dat, const NumType &w) {
+    sum_ = w * dat;
+    sum_squares_ = w * Sqr(dat);
+    numsamples_ = w;
+  }
+  // Copy operator
+  Stat1(const Self &stat) {
+    sum_ = stat.sum_;
+    sum_squares_ = stat.sum_squares_;
+    numsamples_ = stat.numsamples_;
+  }
+  inline Self &operator =(const Self &stat) {
+    sum_ = stat.sum_;
+    sum_squares_ = stat.sum_squares_;
+    numsamples_ = stat.numsamples_;
+    return (*this);
+  }
+  // Merge statistics from two sample sets.
+  inline Self &operator +=(const Self &stat) {
+    sum_ += stat.sum_;
+    sum_squares_+= stat.sum_squares_;
+    numsamples_ += stat.numsamples_;
+    return (*this);
+  }
+  // The operation opposite to +=
+  inline Self &operator -=(const Self &stat) {
+    sum_ -= stat.sum_;
+    sum_squares_-= stat.sum_squares_;
+    numsamples_ -= stat.numsamples_;
+    return (*this);
+  }
+  // Multiply the weight of the set of samples by a factor k
+  inline Self &operator *=(const VType &k) {
+    sum_ *= k;
+    sum_squares_*= k;
+    numsamples_ *= k;
+    return (*this);
+  }
+  // Merge statistics from two sample sets.
+  inline Self operator + (const Self &stat) const {
+    return Self(*this) += stat;
+  }
+  // The operation opposite to +
+  inline Self operator - (const Self &stat) const {
+    return Self(*this) -= stat;
+  }
+  // Multiply the weight of the set of samples by a factor k
+  inline Self operator * (const VType &k) const {
+    return Self(*this) *= k;
+  }
+  // Return the total weight of this sample set
+  NumType NumSamples() const {
+    return numsamples_;
+  }
+  // Return the sum of this sample set
+  VType Sum() const {
+    return sum_;
+  }
+  // Return the mean of this sample set
+  VType Mean() const {
+    if (numsamples_ == 0) return VType();
+    return sum_ * (1.0 / numsamples_);
+  }
+  // Return the mean of this sample set and compute the standard deviation at
+  // the same time.
+  VType Mean(VType *stddev) const {
+    if (numsamples_ == 0) return VType();
+    VType mean = sum_ * (1.0 / numsamples_);
+    if (stddev) {
+      VType avg_squares = sum_squares_ * (1.0 / numsamples_);
+     *stddev = Sqrt(avg_squares - Sqr(mean));
+    }
+    return mean;
+  }
+  // Return the standard deviation of the sample set
+  VType StdDev() const {
+    if (numsamples_ == 0) return VType();
+    VType mean = Mean();
+    VType avg_squares = sum_squares_ * (1.0 / numsamples_);
+    return Sqrt(avg_squares - Sqr(mean));
+  }
+ private:
+                        // Let i be the index of the samples provided (using +=)
+                        // and weight[i],value[i] be the data of sample #i
+                        // then the variables have the following meaning:
+  NumType numsamples_;  // sum of weight[i];
+  VType sum_;           // sum of weight[i]*value[i];
+  VType sum_squares_;   // sum of weight[i]*value[i]^2;
+  // Template function used to square a number.
+  // For a vector we square all components
+  template <typename SType>
+  static inline SType Sqr(const SType &dat) {
+    return dat * dat;
+  }
+  template <typename SType>
+  static inline Vector2<SType> Sqr(const Vector2<SType> &dat) {
+    return dat.MulComponents(dat);
+  }
+  template <typename SType>
+  static inline Vector3<SType> Sqr(const Vector3<SType> &dat) {
+    return dat.MulComponents(dat);
+  }
+  template <typename SType>
+  static inline Vector4<SType> Sqr(const Vector4<SType> &dat) {
+    return dat.MulComponents(dat);
+  }
+  // Template function used to take the square root of a number.
+  // For a vector we square all components
+  template <typename SType>
+  static inline SType Sqrt(const SType &dat) {
+    // Avoid NaN due to imprecision in the calculations
+    if ( dat < 0 )
+      return 0;
+    return sqrt(dat);
+  }
+  template <typename SType>
+  static inline Vector2<SType> Sqrt(const Vector2<SType> &dat) {
+    // Avoid NaN due to imprecision in the calculations
+    return Max(dat, Vector2<SType>()).Sqrt();
+  }
+  template <typename SType>
+  static inline Vector3<SType> Sqrt(const Vector3<SType> &dat) {
+    // Avoid NaN due to imprecision in the calculations
+    return Max(dat, Vector3<SType>()).Sqrt();
+  }
+  template <typename SType>
+  static inline Vector4<SType> Sqrt(const Vector4<SType> &dat) {
+    // Avoid NaN due to imprecision in the calculations
+    return Max(dat, Vector4<SType>()).Sqrt();
+  }
+};
+// Useful printing function
+template <typename VType, typename NumType>
+inline std::ostream& operator<<(std::ostream& out,
+                                const Stat1<VType, NumType>& s) {
+  out << "{ avg = " << s.Mean()
+      << " std = " << s.StdDev()
+      << " nsamples = " << s.NumSamples() << "}";
+  return out;
+}
+// Stat1MinMax: same as Stat1, but it also
+// keeps the Min and Max values; the "-"
+// operator is disabled because it cannot be implemented
+// efficiently
+template <typename VType, typename NumType>
+class Stat1MinMax : public Stat1<VType, NumType> {
+ public:
+  typedef Stat1MinMax<VType, NumType> Self;
+  Stat1MinMax()  {
+    Clear();
+  }
+  void Clear() {
+    Stat1<VType, NumType>::Clear();
+    if (std::numeric_limits<VType>::has_infinity) {
+      min_ = std::numeric_limits<VType>::infinity();
+      max_ = -std::numeric_limits<VType>::infinity();
+    } else {
+      min_ = std::numeric_limits<VType>::max();
+      max_ = std::numeric_limits<VType>::min();
+    }
+  }
+  // Create a sample of value dat and weight 1
+  explicit Stat1MinMax(const VType &dat) : Stat1<VType, NumType>(dat) {
+    max_ = dat;
+    min_ = dat;
+  }
+  // Create statistics for all the samples between begin (included)
+  // and end(excluded)
+  explicit Stat1MinMax(const VType *begin, const VType *end) {
+    Clear();
+    for ( const VType *item = begin; item < end; ++item ) {
+      (*this) += Stat1MinMax(*item);
+    }
+  }
+  // Create a sample of value dat and weight w
+  Stat1MinMax(const VType &dat, const NumType &w)
+  : Stat1<VType, NumType>(dat, w) {
+    max_ = dat;
+    min_ = dat;
+  }
+  // Copy operator
+  Stat1MinMax(const Self &stat) : Stat1<VType, NumType>(stat) {
+    max_ = stat.max_;
+    min_ = stat.min_;
+  }
+  inline Self &operator =(const Self &stat) {
+    this->Stat1<VType, NumType>::operator=(stat);
+    max_ = stat.max_;
+    min_ = stat.min_;
+    return (*this);
+  }
+  // Merge statistics from two sample sets.
+  inline Self &operator +=(const Self &stat) {
+    this->Stat1<VType, NumType>::operator+=(stat);
+    if (stat.max_ > max_) max_ = stat.max_;
+    if (stat.min_ < min_) min_ = stat.min_;
+    return (*this);
+  }
+  // Multiply the weight of the set of samples by a factor k
+  inline Self &operator *=(const VType &stat) {
+    this->Stat1<VType, NumType>::operator*=(stat);
+    return (*this);
+  }
+  // Merge statistics from two sample sets.
+  inline Self operator + (const Self &stat) const {
+    return Self(*this) += stat;
+  }
+  // Multiply the weight of the set of samples by a factor k
+  inline Self operator * (const VType &k) const {
+    return Self(*this) *= k;
+  }
+ private:
+  // The - operation makes no sense with Min/Max
+  // unless we keep the full list of values (but we don't)
+  // make it private, and let it undefined so nobody can call it
+  Self &operator -=(const Self &stat);  // senseless. let it undefined.
+  // The operation opposite to -
+  Self operator - (const Self &stat) const;  // senseless. let it undefined.
+ public:
+  // Return the maximal value in this sample set
+  VType Max() const {
+    return max_;
+  }
+  // Return the minimal value in this sample set
+  VType Min() const {
+    return min_;
+  }
+ private:
+                        // Let i be the index of the samples provided (using +=)
+                        // and weight[i],value[i] be the data of sample #i
+                        // then the variables have the following meaning:
+  VType max_;           // max of value[i]
+  VType min_;           // min of value[i]
+};
+// Useful printing function
+template <typename VType, typename NumType>
+inline std::ostream& operator <<(std::ostream& out,
+                                 const Stat1MinMax<VType, NumType>& s) {
+  out << "{ avg = " << s.Mean()
+      << " std = " << s.StdDev()
+      << " nsamples = " << s.NumSamples()
+      << " min = " << s.Min()
+      << " max = " << s.Max() << "}";
+  return out;
+}
+#endif  // BENCHMARK_STAT_H_
--- a/src/sysinfo.cc
+++ b/src/sysinfo.cc
--- a/src/sysinfo.h
+++ b/src/sysinfo.h
+#ifndef BENCHMARK_SYSINFO_H_
+#define BENCHMARK_SYSINFO_H_
+double MyCPUUsage();
+double ChildrenCPUUsage();
+int NumCPUs();
+double CyclesPerSecond();
+#endif  // BENCHMARK_SYSINFO_H_
--- a/src/walltime.cc
+++ b/src/walltime.cc
+#include "walltime.h"
+#include <stdio.h>
+#include <string.h>
+#include <sys/time.h>
+#include <time.h>
+#include <atomic>
+#include <limits>
+#include "cycleclock.h"
+#include "macros.h"
+#include "sysinfo.h"
+namespace walltime {
+namespace {
+const double kMaxErrorInterval = 100e-6;
+std::atomic<bool> initialized(false);
+WallTime base_walltime = 0.0;
+int64_t base_cycletime = 0;
+int64_t cycles_per_second;
+double seconds_per_cycle;
+uint32_t last_adjust_time = 0;
+std::atomic<int32_t> drift_adjust(0);
+int64_t max_interval_cycles = 0;
+// Helper routines to load/store a float from an AtomicWord. Required because
+// g++ < 4.7 doesn't support std::atomic<float> correctly. I cannot wait to get
+// rid of this horror show.
+inline void SetDrift(float f) {
+  int32_t w;
+  memcpy(&w, &f, sizeof(f));
+  std::atomic_store(&drift_adjust, w);
+}
+inline float GetDrift() {
+  float f;
+  int32_t w = std::atomic_load(&drift_adjust);
+  memcpy(&f, &w, sizeof(f));
+  return f;
+}
+static_assert(sizeof(float) <= sizeof(int32_t),
+              "type sizes don't allow the drift_adjust hack");
+WallTime Slow() {
+  struct timeval tv;
+  gettimeofday(&tv, NULL);
+  return tv.tv_sec + tv.tv_usec * 1e-6;
+}
+bool SplitTimezone(WallTime value, bool local, struct tm* t,
+                   double* subsecond) {
+  memset(t, 0, sizeof(*t));
+  if ((value < 0) || (value > std::numeric_limits<time_t>::max())) {
+    *subsecond = 0.0;
+    return false;
+  }
+  const time_t whole_time = static_cast<time_t>(value);
+  *subsecond = value - whole_time;
+  if (local)
+    localtime_r(&whole_time, t);
+  else
+    gmtime_r(&whole_time, t);
+  return true;
+}
+}  // end namespace
+// This routine should be invoked to initialize walltime.
+// It is not intended for general purpose use.
+void Initialize() {
+  CHECK(!std::atomic_load(&initialized));
+  cycles_per_second = static_cast<int64_t>(CyclesPerSecond());
+  CHECK(cycles_per_second != 0);
+  seconds_per_cycle = 1.0 / cycles_per_second;
+  max_interval_cycles = static_cast<int64_t>(
+      cycles_per_second * kMaxErrorInterval);
+  do {
+    base_cycletime = CycleClock::Now();
+    base_walltime = Slow();
+  } while (CycleClock::Now() - base_cycletime > max_interval_cycles);
+  // We are now sure that "base_walltime" and "base_cycletime" were produced
+  // within kMaxErrorInterval of one another.
+  SetDrift(0.0);
+  last_adjust_time = static_cast<uint32_t>(uint64_t(base_cycletime) >> 32);
+  std::atomic_store(&initialized, true);
+}
+WallTime Now() {
+  if (!std::atomic_load(&initialized))
+    return Slow();
+  WallTime now = 0.0;
+  WallTime result = 0.0;
+  int64_t ct = 0;
+  uint32_t top_bits = 0;
+  do {
+    ct = CycleClock::Now();
+    int64_t cycle_delta = ct - base_cycletime;
+    result = base_walltime + cycle_delta * seconds_per_cycle;
+    top_bits = static_cast<uint32_t>(uint64_t(ct) >> 32);
+    // Recompute drift no more often than every 2^32 cycles.
+    // I.e., @2GHz, ~ every two seconds
+    if (top_bits == last_adjust_time) { // don't need to recompute drift
+      return result + GetDrift();
+    }
+    now = Slow();
+  } while (CycleClock::Now() - ct > max_interval_cycles);
+  // We are now sure that "now" and "result" were produced within
+  // kMaxErrorInterval of one another.
+  SetDrift(now - result);
+  last_adjust_time = top_bits;
+  return now;
+}
+const char* Print(WallTime time, const char *format, bool local,
+                  char* storage, int *remainder_us) {
+    struct tm split;
+    double subsecond;
+    if (!SplitTimezone(time, local, &split, &subsecond)) {
+      snprintf(storage, sizeof(storage), "Invalid time: %f", time);
+    } else {
+      if (remainder_us != NULL) {
+        *remainder_us = static_cast<int>((subsecond * 1000000) + 0.5);
+        if (*remainder_us > 999999) *remainder_us = 999999;
+        if (*remainder_us < 0)      *remainder_us = 0;
+      }
+      strftime(storage, sizeof(storage), format, &split);
+    }
+    return storage;
+}
+}  // end namespace walltime
--- a/src/walltime.h
+++ b/src/walltime.h
+#ifndef BENCHMARK_WALLTIME_H_
+#define BENCHMARK_WALLTIME_H_
+typedef double WallTime;
+namespace walltime {
+void Initialize();
+WallTime Now();
+// GIVEN: walltime, generic format string (as understood by strftime),
+// a boolean flag specifying if the time is local or UTC (true=local).
+// RETURNS: the formatted string. ALSO RETURNS: the storage printbuffer
+// passed and the remaining number of microseconds (never printed in
+// the string since strftime does not understand it)
+const char* Print(WallTime time, const char *format, bool local,
+                  char* storage, int *remainder_us);
+}  // end namespace walltime
+#endif  // BENCHMARK_WALLTIME_H_
--- a/test/benchmark_test.cc
+++ b/test/benchmark_test.cc
+#include "benchmark/benchmark.h"
+#include <math.h>
+#include <stdint.h>
+#include <limits>
+#include <list>
+#include <map>
+#include <set>
+#include <sstream>
+#include <vector>
+namespace {
+int ATTRIBUTE_NOINLINE Factorial(uint32_t n) {
+  return (n == 1) ? 1 : n * Factorial(n - 1);
+}
+double CalculatePi(int depth) {
+  double pi = 0.0;
+  for (int i = 0; i < depth; ++i) {
+    double numerator = static_cast<double>(((i % 2) * 2) - 1);
+    double denominator = static_cast<double>((2 * i) - 1);
+    pi += numerator / denominator;
+  }
+  return (pi - 1.0) * 4;
+}
+std::set<int> ConstructRandomSet(int size) {
+  std::set<int> s;
+  for (int i = 0; i < size; ++i)
+    s.insert(i);
+  return s;
+}
+static std::vector<int>* test_vector = NULL;
+}  // end namespace
+#ifdef DEBUG
+static void BM_Factorial(benchmark::State& state) {
+  int fac_42 = 0;
+  while (state.KeepRunning())
+    fac_42 = Factorial(8);
+  // Prevent compiler optimizations
+  CHECK(fac_42 != std::numeric_limits<int>::max());
+}
+BENCHMARK(BM_Factorial);
+#endif
+static void BM_CalculatePiRange(benchmark::State& state) {
+  double pi = 0.0;
+  while (state.KeepRunning())
+    pi = CalculatePi(state.range_x());
+  std::stringstream ss;
+  ss << pi;
+  state.SetLabel(ss.str());
+}
+BENCHMARK_RANGE(BM_CalculatePiRange, 1, 1024 * 1024);
+static void BM_CalculatePi(benchmark::State& state) {
+  static const int depth = 1024;
+  double pi ATTRIBUTE_UNUSED = 0.0;
+  while (state.KeepRunning()) {
+    pi = CalculatePi(depth);
+  }
+}
+BENCHMARK(BM_CalculatePi)->Threads(8);
+BENCHMARK(BM_CalculatePi)->ThreadRange(1, 32);
+BENCHMARK(BM_CalculatePi)->ThreadPerCpu();
+static void BM_SetInsert(benchmark::State& state) {
+  while (state.KeepRunning()) {
+    state.PauseTiming();
+    std::set<int> data = ConstructRandomSet(state.range_x());
+    state.ResumeTiming();
+    for (int j = 0; j < state.range_y(); ++j)
+      data.insert(rand());
+  }
+}
+BENCHMARK(BM_SetInsert)->RangePair(1<<10,8<<10, 1,10);
+template<typename Q>
+static void BM_Sequential(benchmark::State& state) {
+  Q q;
+  typename Q::value_type v;
+  while (state.KeepRunning())
+    for (int i = state.range_x(); --i; )
+      q.push_back(v);
+  const int64_t items_processed = 
+      static_cast<int64_t>(state.iterations()) * state.range_x();
+  state.SetItemsProcessed(items_processed);
+  state.SetBytesProcessed(items_processed * sizeof(v));
+}
+BENCHMARK_TEMPLATE(BM_Sequential, std::vector<int>)->Range(1 << 0, 1 << 10);
+BENCHMARK_TEMPLATE(BM_Sequential, std::list<int>)->Range(1 << 0, 1 << 10);
+static void BM_StringCompare(benchmark::State& state) {
+  std::string s1(state.range_x(), '-');
+  std::string s2(state.range_x(), '-');
+  int r = 0;
+  while (state.KeepRunning())
+    r |= s1.compare(s2);
+  // Prevent compiler optimizations
+  CHECK(r != std::numeric_limits<int>::max());
+}
+BENCHMARK(BM_StringCompare)->Range(1, 1<<20);
+static void BM_SetupTeardown(benchmark::State& state) {
+  if (state.thread_index == 0)
+    test_vector = new std::vector<int>();
+  while (state.KeepRunning())
+    test_vector->push_back(0);
+  if (state.thread_index == 0) {
+    delete test_vector;
+    test_vector = NULL;
+  }
+}
+BENCHMARK(BM_SetupTeardown);
+static void BM_LongTest(benchmark::State& state) {
+  double tracker = 0.0;
+  while (state.KeepRunning())
+    for (int i = 0; i < state.range_x(); ++i)
+      tracker += i;
+  CHECK(tracker != 0.0);
+}
+BENCHMARK(BM_LongTest)->Range(1<<16,1<<28);
+int main(int argc, const char* argv[]) {
+  benchmark::Initialize(&argc, argv);
+  CHECK(Factorial(8) == 40320);
+  CHECK(CalculatePi(1) == 0.0);
+  benchmark::RunSpecifiedBenchmarks();
+}