Commit 27e0b439 by Eric Committed by Dominic Hamon

Refactor System information collection -- Add CPU Cache Info (#483)

* Refactor System information collection. This patch refactors the system information collection, and in particular information about the target CPU. The motivation is to make it easier to access CPU information, and easier to add new information as need be. This patch additionally adds information about the cache sizes of the CPU. * Address review comments: Clean up integer types. This commit cleans up the integer types used in ValueUnion to follow the Google style guide. Additionally it adds a BENCHMARK_UNREACHABLE macro to assist in documenting/catching unreachable code paths. * Rename ValueUnion accessors.
parent aad6a5fa
...@@ -1154,6 +1154,25 @@ class Fixture : public internal::Benchmark { ...@@ -1154,6 +1154,25 @@ class Fixture : public internal::Benchmark {
namespace benchmark { namespace benchmark {
struct CPUInfo {
struct CacheInfo {
std::string type;
int level;
int size;
};
int num_cpus;
double cycles_per_second;
std::vector<CacheInfo> caches;
bool scaling_enabled;
static const CPUInfo& Get();
private:
CPUInfo();
BENCHMARK_DISALLOW_COPY_AND_ASSIGN(CPUInfo);
};
// Interface for custom benchmark result printers. // Interface for custom benchmark result printers.
// By default, benchmark reports are printed to stdout. However an application // By default, benchmark reports are printed to stdout. However an application
// can control the destination of the reports by calling // can control the destination of the reports by calling
...@@ -1162,12 +1181,11 @@ namespace benchmark { ...@@ -1162,12 +1181,11 @@ namespace benchmark {
class BenchmarkReporter { class BenchmarkReporter {
public: public:
struct Context { struct Context {
int num_cpus; CPUInfo const& cpu_info;
double mhz_per_cpu;
bool cpu_scaling_enabled;
// The number of chars in the longest benchmark name. // The number of chars in the longest benchmark name.
size_t name_field_width; size_t name_field_width;
Context();
}; };
struct Run { struct Run {
......
...@@ -37,13 +37,13 @@ ...@@ -37,13 +37,13 @@
#include "colorprint.h" #include "colorprint.h"
#include "commandlineflags.h" #include "commandlineflags.h"
#include "complexity.h" #include "complexity.h"
#include "statistics.h"
#include "counter.h" #include "counter.h"
#include "internal_macros.h"
#include "log.h" #include "log.h"
#include "mutex.h" #include "mutex.h"
#include "re.h" #include "re.h"
#include "statistics.h"
#include "string_util.h" #include "string_util.h"
#include "sysinfo.h"
#include "timers.h" #include "timers.h"
DEFINE_bool(benchmark_list_tests, false, DEFINE_bool(benchmark_list_tests, false,
...@@ -108,6 +108,14 @@ namespace internal { ...@@ -108,6 +108,14 @@ namespace internal {
void UseCharPointer(char const volatile*) {} void UseCharPointer(char const volatile*) {}
#ifdef BENCHMARK_HAS_NO_BUILTIN_UNREACHABLE
BENCHMARK_NORETURN void UnreachableImp(const char* FName, int Line) {
std::cerr << FName << ":" << Line << " executing unreachable code!"
<< std::endl;
std::abort();
}
#endif
class ThreadManager { class ThreadManager {
public: public:
ThreadManager(int num_threads) ThreadManager(int num_threads)
...@@ -493,10 +501,6 @@ void RunBenchmarks(const std::vector<Benchmark::Instance>& benchmarks, ...@@ -493,10 +501,6 @@ void RunBenchmarks(const std::vector<Benchmark::Instance>& benchmarks,
// Print header here // Print header here
BenchmarkReporter::Context context; BenchmarkReporter::Context context;
context.num_cpus = NumCPUs();
context.mhz_per_cpu = CyclesPerSecond() / 1000000.0;
context.cpu_scaling_enabled = CpuScalingEnabled();
context.name_field_width = name_field_width; context.name_field_width = name_field_width;
// Keep track of runing times of all instances of current benchmark // Keep track of runing times of all instances of current benchmark
......
...@@ -42,7 +42,6 @@ ...@@ -42,7 +42,6 @@
#include "mutex.h" #include "mutex.h"
#include "re.h" #include "re.h"
#include "string_util.h" #include "string_util.h"
#include "sysinfo.h"
#include "timers.h" #include "timers.h"
namespace benchmark { namespace benchmark {
...@@ -448,8 +447,7 @@ Benchmark* Benchmark::DenseThreadRange(int min_threads, int max_threads, ...@@ -448,8 +447,7 @@ Benchmark* Benchmark::DenseThreadRange(int min_threads, int max_threads,
} }
Benchmark* Benchmark::ThreadPerCpu() { Benchmark* Benchmark::ThreadPerCpu() {
static int num_cpus = NumCPUs(); thread_counts_.push_back(CPUInfo::Get().num_cpus);
thread_counts_.push_back(num_cpus);
return this; return this;
} }
......
...@@ -6,6 +6,9 @@ ...@@ -6,6 +6,9 @@
#ifndef __has_feature #ifndef __has_feature
#define __has_feature(x) 0 #define __has_feature(x) 0
#endif #endif
#ifndef __has_builtin
#define __has_builtin(x) 0
#endif
#if defined(__clang__) #if defined(__clang__)
#define COMPILER_CLANG #define COMPILER_CLANG
...@@ -56,4 +59,23 @@ ...@@ -56,4 +59,23 @@
#define BENCHMARK_HAS_NO_EXCEPTIONS #define BENCHMARK_HAS_NO_EXCEPTIONS
#endif #endif
#if defined(COMPILER_CLANG) || defined(COMPILER_GCC)
#define BENCHMARK_MAYBE_UNUSED __attribute__((unused))
#else
#define BENCHMARK_MAYBE_UNUSED
#endif
#if defined(COMPILER_GCC) || __has_builtin(__builtin_unreachable)
#define BENCHMARK_UNREACHABLE() __builtin_unreachable()
#else
#define BENCHMARK_HAS_NO_BUILTIN_UNREACHABLE
namespace benchmark {
namespace internal {
BENCHMARK_NORETURN void UnreachableImp(const char* FName, int Line);
}
} // namespace benchmark
#define BENCHMARK_UNREACHABLE() \
::benchmark::internal::UnreachableImp(__FILE__, __LINE__)
#endif
#endif // BENCHMARK_INTERNAL_MACROS_H_ #endif // BENCHMARK_INTERNAL_MACROS_H_
...@@ -77,11 +77,14 @@ bool JSONReporter::ReportContext(const Context& context) { ...@@ -77,11 +77,14 @@ bool JSONReporter::ReportContext(const Context& context) {
std::string walltime_value = LocalDateTimeString(); std::string walltime_value = LocalDateTimeString();
out << indent << FormatKV("date", walltime_value) << ",\n"; out << indent << FormatKV("date", walltime_value) << ",\n";
out << indent << FormatKV("num_cpus", static_cast<int64_t>(context.num_cpus)) CPUInfo const& info = context.cpu_info;
out << indent << FormatKV("num_cpus", static_cast<int64_t>(info.num_cpus))
<< ",\n"; << ",\n";
out << indent << FormatKV("mhz_per_cpu", RoundDouble(context.mhz_per_cpu)) out << indent
<< FormatKV("mhz_per_cpu",
RoundDouble(info.cycles_per_second / 1000000.0))
<< ",\n"; << ",\n";
out << indent << FormatKV("cpu_scaling_enabled", context.cpu_scaling_enabled) out << indent << FormatKV("cpu_scaling_enabled", info.scaling_enabled)
<< ",\n"; << ",\n";
#if defined(NDEBUG) #if defined(NDEBUG)
......
...@@ -35,12 +35,21 @@ void BenchmarkReporter::PrintBasicContext(std::ostream *out, ...@@ -35,12 +35,21 @@ void BenchmarkReporter::PrintBasicContext(std::ostream *out,
CHECK(out) << "cannot be null"; CHECK(out) << "cannot be null";
auto &Out = *out; auto &Out = *out;
Out << "Run on (" << context.num_cpus << " X " << context.mhz_per_cpu
<< " MHz CPU " << ((context.num_cpus > 1) ? "s" : "") << ")\n";
Out << LocalDateTimeString() << "\n"; Out << LocalDateTimeString() << "\n";
if (context.cpu_scaling_enabled) { const CPUInfo &info = context.cpu_info;
Out << "Run on (" << info.num_cpus << " X "
<< (info.cycles_per_second / 1000000.0) << " MHz CPU "
<< ((info.num_cpus > 1) ? "s" : "") << ")\n";
if (info.caches.size() != 0) {
Out << "CPU Caches:\n";
for (auto &CInfo : info.caches) {
Out << " L" << CInfo.level << " " << CInfo.type << " "
<< (CInfo.size / 1000) << "K\n";
}
}
if (info.scaling_enabled) {
Out << "***WARNING*** CPU scaling is enabled, the benchmark " Out << "***WARNING*** CPU scaling is enabled, the benchmark "
"real time measurements may be noisy and will incur extra " "real time measurements may be noisy and will incur extra "
"overhead.\n"; "overhead.\n";
...@@ -52,6 +61,8 @@ void BenchmarkReporter::PrintBasicContext(std::ostream *out, ...@@ -52,6 +61,8 @@ void BenchmarkReporter::PrintBasicContext(std::ostream *out,
#endif #endif
} }
BenchmarkReporter::Context::Context() : cpu_info(CPUInfo::Get()) {}
double BenchmarkReporter::Run::GetAdjustedRealTime() const { double BenchmarkReporter::Run::GetAdjustedRealTime() const {
double new_time = real_accumulated_time * GetTimeUnitMultiplier(time_unit); double new_time = real_accumulated_time * GetTimeUnitMultiplier(time_unit);
if (iterations != 0) new_time /= static_cast<double>(iterations); if (iterations != 0) new_time /= static_cast<double>(iterations);
......
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "sysinfo.h"
#include "internal_macros.h" #include "internal_macros.h"
#ifdef BENCHMARK_OS_WINDOWS #ifdef BENCHMARK_OS_WINDOWS
...@@ -25,21 +24,26 @@ ...@@ -25,21 +24,26 @@
#include <sys/time.h> #include <sys/time.h>
#include <sys/types.h> // this header must be included before 'sys/sysctl.h' to avoid compilation error on FreeBSD #include <sys/types.h> // this header must be included before 'sys/sysctl.h' to avoid compilation error on FreeBSD
#include <unistd.h> #include <unistd.h>
#if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_MACOSX || defined BENCHMARK_OS_NETBSD #if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_MACOSX || \
defined BENCHMARK_OS_NETBSD
#define BENCHMARK_HAS_SYSCTL
#include <sys/sysctl.h> #include <sys/sysctl.h>
#endif #endif
#endif #endif
#include <algorithm>
#include <cerrno> #include <cerrno>
#include <cstdint> #include <cstdint>
#include <cstdio> #include <cstdio>
#include <cstdlib> #include <cstdlib>
#include <cstring> #include <cstring>
#include <fstream>
#include <iostream> #include <iostream>
#include <iterator>
#include <limits> #include <limits>
#include <mutex> #include <memory>
#include <sstream>
#include "arraysize.h"
#include "check.h" #include "check.h"
#include "cycleclock.h" #include "cycleclock.h"
#include "internal_macros.h" #include "internal_macros.h"
...@@ -49,225 +53,348 @@ ...@@ -49,225 +53,348 @@
namespace benchmark { namespace benchmark {
namespace { namespace {
std::once_flag cpuinfo_init;
double cpuinfo_cycles_per_second = 1.0;
int cpuinfo_num_cpus = 1; // Conservative guess
#if !defined BENCHMARK_OS_MACOSX void PrintImp(std::ostream& out) { out << std::endl; }
const int64_t estimate_time_ms = 1000;
// Helper function estimates cycles/sec by observing cycles elapsed during template <class First, class... Rest>
// sleep(). Using small sleep time decreases accuracy significantly. void PrintImp(std::ostream& out, First&& f, Rest&&... rest) {
int64_t EstimateCyclesPerSecond() { out << std::forward<First>(f);
const int64_t start_ticks = cycleclock::Now(); PrintImp(out, std::forward<Rest>(rest)...);
SleepForMilliseconds(estimate_time_ms); }
return cycleclock::Now() - start_ticks;
template <class... Args>
BENCHMARK_NORETURN void PrintErrorAndDie(Args&&... args) {
PrintImp(std::cerr, std::forward<Args>(args)...);
std::exit(EXIT_FAILURE);
} }
#ifdef BENCHMARK_HAS_SYSCTL
#ifdef __GNUC__
// Suppress the warning generated by the C11 flexible array member below.
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wpedantic"
#endif #endif
#if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN /// ValueUnion - A type used to correctly alias the byte-for-byte output of
// Helper function for reading an int from a file. Returns true if successful /// `sysctl` with the result type it's to be interpreted as.
// and the memory location pointed to by value is set to the value read. struct ValueUnion {
bool ReadIntFromFile(const char* file, long* value) { union DataT {
bool ret = false; uint32_t uint32_value;
int fd = open(file, O_RDONLY); uint64_t uint64_value;
if (fd != -1) { // FIXME (Maybe?): This is a C11 flexible array member, and not technically
char line[1024]; // C++. However, all compilers support it and it allows for correct aliasing
char* err; // of union members from bytes.
memset(line, '\0', sizeof(line)); char bytes[];
ssize_t read_err = read(fd, line, sizeof(line) - 1); };
((void)read_err); // prevent unused warning using DataPtr = std::unique_ptr<DataT, decltype(&std::free)>;
CHECK(read_err >= 0);
const long temp_value = strtol(line, &err, 10); // The size of the data union member + its trailing array size.
if (line[0] != '\0' && (*err == '\n' || *err == '\0')) { size_t Size;
*value = temp_value; DataPtr Buff;
ret = true;
} public:
close(fd); ValueUnion() : Size(0), Buff(nullptr, &std::free) {}
explicit ValueUnion(size_t BuffSize)
: Size(sizeof(DataT) + BuffSize),
Buff(::new (std::malloc(Size)) DataT(), &std::free) {}
ValueUnion(ValueUnion&& other) = default;
explicit operator bool() const { return bool(Buff); }
char* data() const { return Buff->bytes; }
std::string GetAsString() const { return std::string(data()); }
int64_t GetAsInteger() const {
if (Size == sizeof(Buff->uint32_value))
return static_cast<int32_t>(Buff->uint32_value);
else if (Size == sizeof(Buff->uint64_value))
return static_cast<int64_t>(Buff->uint64_value);
BENCHMARK_UNREACHABLE();
}
uint64_t GetAsUnsigned() const {
if (Size == sizeof(Buff->uint32_value))
return Buff->uint32_value;
else if (Size == sizeof(Buff->uint64_value))
return Buff->uint64_value;
BENCHMARK_UNREACHABLE();
} }
return ret; };
#ifdef __GNUC__
#pragma GCC diagnostic pop
#endif
ValueUnion GetSysctlImp(std::string const& Name) {
size_t CurBuffSize = 0;
if (sysctlbyname(Name.c_str(), nullptr, &CurBuffSize, nullptr, 0) == -1)
return ValueUnion();
ValueUnion buff(CurBuffSize);
if (sysctlbyname(Name.c_str(), buff.data(), &buff.Size, nullptr, 0) == 0)
return buff;
return ValueUnion();
}
BENCHMARK_MAYBE_UNUSED
bool GetSysctl(std::string const& Name, std::string* Out) {
Out->clear();
auto Buff = GetSysctlImp(Name);
if (!Buff) return false;
Out->assign(Buff.data());
return true;
}
template <class Tp,
class = typename std::enable_if<std::is_integral<Tp>::value>::type>
bool GetSysctl(std::string const& Name, Tp* Out) {
*Out = 0;
auto Buff = GetSysctlImp(Name);
if (!Buff) return false;
*Out = static_cast<Tp>(Buff.GetAsUnsigned());
return true;
} }
#endif #endif
#if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN template <class ArgT>
static std::string convertToLowerCase(std::string s) { bool ReadFromFile(std::string const& fname, ArgT* arg) {
for (auto& ch : s) *arg = ArgT();
ch = std::tolower(ch); std::ifstream f(fname.c_str());
return s; if (!f.is_open()) return false;
f >> *arg;
return f.good();
} }
static bool startsWithKey(std::string Value, std::string Key,
bool IgnoreCase = true) { bool CpuScalingEnabled(int num_cpus) {
if (IgnoreCase) { // We don't have a valid CPU count, so don't even bother.
Key = convertToLowerCase(std::move(Key)); if (num_cpus <= 0) return false;
Value = convertToLowerCase(std::move(Value)); #ifndef BENCHMARK_OS_WINDOWS
// On Linux, the CPUfreq subsystem exposes CPU information as files on the
// local file system. If reading the exported files fails, then we may not be
// running on Linux, so we silently ignore all the read errors.
std::string res;
for (int cpu = 0; cpu < num_cpus; ++cpu) {
std::string governor_file =
StrCat("/sys/devices/system/cpu/cpu", cpu, "/cpufreq/scaling_governor");
if (ReadFromFile(governor_file, &res) && res != "performance") return true;
} }
return Value.compare(0, Key.size(), Key) == 0; #endif
return false;
} }
BENCHMARK_MAYBE_UNUSED
std::vector<CPUInfo::CacheInfo> GetCacheSizesFromKVFS() {
std::vector<CPUInfo::CacheInfo> res;
std::string dir = "/sys/devices/system/cpu/cpu0/cache/";
int Idx = 0;
while (true) {
CPUInfo::CacheInfo info;
std::string FPath = StrCat(dir, "index", Idx++, "/");
std::ifstream f(StrCat(FPath, "size").c_str());
if (!f.is_open()) break;
std::string suffix;
f >> info.size;
if (f.fail())
PrintErrorAndDie("Failed while reading file '", FPath, "size'");
if (f.good()) {
f >> suffix;
if (f.bad())
PrintErrorAndDie(
"Invalid cache size format: failed to read size suffix");
else if (f && suffix != "K")
PrintErrorAndDie("Invalid cache size format: Expected bytes ", suffix);
else if (suffix == "K")
info.size *= 1000;
}
if (!ReadFromFile(StrCat(FPath, "type"), &info.type))
PrintErrorAndDie("Failed to read from file ", FPath, "type");
if (!ReadFromFile(StrCat(FPath, "level"), &info.level))
PrintErrorAndDie("Failed to read from file ", FPath, "level");
res.push_back(info);
}
return res;
}
#ifdef BENCHMARK_OS_MACOSX
std::vector<CPUInfo::CacheInfo> GetCacheSizesMacOSX() {
std::vector<CPUInfo::CacheInfo> res;
struct {
std::string name;
std::string type;
int level;
} Cases[] = {{"hw.l1dcachesize", "Data", 1},
{"hw.l1icachesize", "Instruction", 1},
{"hw.l2cachesize", "Unified", 2},
{"hw.l3cachesize", "Unified", 3}};
for (auto& C : Cases) {
int val;
if (!GetSysctl(C.name, &val)) continue;
CPUInfo::CacheInfo info;
info.type = C.type;
info.level = C.level;
info.size = val;
res.push_back(std::move(info));
}
return res;
}
#endif
std::vector<CPUInfo::CacheInfo> GetCacheSizes() {
#ifdef BENCHMARK_OS_MACOSX
return GetCacheSizesMacOSX();
#else
return GetCacheSizesFromKVFS();
#endif #endif
}
void InitializeSystemInfo() { int GetNumCPUs() {
#ifdef BENCHMARK_HAS_SYSCTL
int NumCPU = -1;
if (GetSysctl("hw.ncpu", &NumCPU)) return NumCPU;
fprintf(stderr, "Err: %s\n", strerror(errno));
std::exit(EXIT_FAILURE);
#elif defined(BENCHMARK_OS_WINDOWS)
SYSTEM_INFO sysinfo;
// Use memset as opposed to = {} to avoid GCC missing initializer false
// positives.
std::memset(&sysinfo, 0, sizeof(SYSTEM_INFO));
GetSystemInfo(&sysinfo);
return sysinfo.dwNumberOfProcessors; // number of logical
// processors in the current
// group
#else
int NumCPUs = 0;
int MaxID = -1;
std::ifstream f("/proc/cpuinfo");
if (!f.is_open()) {
std::cerr << "failed to open /proc/cpuinfo\n";
return -1;
}
const std::string Key = "processor";
std::string ln;
while (std::getline(f, ln)) {
if (ln.empty()) continue;
size_t SplitIdx = ln.find(':');
std::string value;
if (SplitIdx != std::string::npos) value = ln.substr(SplitIdx + 1);
if (ln.size() >= Key.size() && ln.compare(0, Key.size(), Key) == 0) {
NumCPUs++;
if (!value.empty()) {
int CurID = std::stoi(value);
MaxID = std::max(CurID, MaxID);
}
}
}
if (f.bad()) {
std::cerr << "Failure reading /proc/cpuinfo\n";
return -1;
}
if (!f.eof()) {
std::cerr << "Failed to read to end of /proc/cpuinfo\n";
return -1;
}
f.close();
if ((MaxID + 1) != NumCPUs) {
fprintf(stderr,
"CPU ID assignments in /proc/cpuinfo seem messed up."
" This is usually caused by a bad BIOS.\n");
}
return NumCPUs;
#endif
BENCHMARK_UNREACHABLE();
}
double GetCPUCyclesPerSecond() {
#if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN #if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN
char line[1024];
char* err;
long freq; long freq;
bool saw_mhz = false;
// If the kernel is exporting the tsc frequency use that. There are issues // If the kernel is exporting the tsc frequency use that. There are issues
// where cpuinfo_max_freq cannot be relied on because the BIOS may be // where cpuinfo_max_freq cannot be relied on because the BIOS may be
// exporintg an invalid p-state (on x86) or p-states may be used to put the // exporintg an invalid p-state (on x86) or p-states may be used to put the
// processor in a new mode (turbo mode). Essentially, those frequencies // processor in a new mode (turbo mode). Essentially, those frequencies
// cannot always be relied upon. The same reasons apply to /proc/cpuinfo as // cannot always be relied upon. The same reasons apply to /proc/cpuinfo as
// well. // well.
if (!saw_mhz && if (ReadFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq)
ReadIntFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq)) { // If CPU scaling is in effect, we want to use the *maximum* frequency,
// not whatever CPU speed some random processor happens to be using now.
|| ReadFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
&freq)) {
// The value is in kHz (as the file name suggests). For example, on a // The value is in kHz (as the file name suggests). For example, on a
// 2GHz warpstation, the file contains the value "2000000". // 2GHz warpstation, the file contains the value "2000000".
cpuinfo_cycles_per_second = freq * 1000.0; return freq * 1000.0;
saw_mhz = true;
} }
// If CPU scaling is in effect, we want to use the *maximum* frequency, const double error_value = -1;
// not whatever CPU speed some random processor happens to be using now. double bogo_clock = error_value;
if (!saw_mhz &&
ReadIntFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
&freq)) {
// The value is in kHz. For example, on a 2GHz warpstation, the file
// contains the value "2000000".
cpuinfo_cycles_per_second = freq * 1000.0;
saw_mhz = true;
}
// Read /proc/cpuinfo for other values, and if there is no cpuinfo_max_freq. std::ifstream f("/proc/cpuinfo");
const char* pname = "/proc/cpuinfo"; if (!f.is_open()) {
int fd = open(pname, O_RDONLY); std::cerr << "failed to open /proc/cpuinfo\n";
if (fd == -1) { return error_value;
perror(pname);
if (!saw_mhz) {
cpuinfo_cycles_per_second =
static_cast<double>(EstimateCyclesPerSecond());
}
return;
} }
double bogo_clock = 1.0; auto startsWithKey = [](std::string const& Value, std::string const& Key) {
bool saw_bogo = false; if (Key.size() > Value.size()) return false;
long max_cpu_id = 0; auto Cmp = [&](char X, char Y) {
int num_cpus = 0; return std::tolower(X) == std::tolower(Y);
line[0] = line[1] = '\0'; };
size_t chars_read = 0; return std::equal(Key.begin(), Key.end(), Value.begin(), Cmp);
do { // we'll exit when the last read didn't read anything };
// Move the next line to the beginning of the buffer
const size_t oldlinelen = strlen(line);
if (sizeof(line) == oldlinelen + 1) // oldlinelen took up entire line
line[0] = '\0';
else // still other lines left to save
memmove(line, line + oldlinelen + 1, sizeof(line) - (oldlinelen + 1));
// Terminate the new line, reading more if we can't find the newline
char* newline = strchr(line, '\n');
if (newline == nullptr) {
const size_t linelen = strlen(line);
const size_t bytes_to_read = sizeof(line) - 1 - linelen;
CHECK(bytes_to_read > 0); // because the memmove recovered >=1 bytes
chars_read = read(fd, line + linelen, bytes_to_read);
line[linelen + chars_read] = '\0';
newline = strchr(line, '\n');
}
if (newline != nullptr) *newline = '\0';
std::string ln;
while (std::getline(f, ln)) {
if (ln.empty()) continue;
size_t SplitIdx = ln.find(':');
std::string value;
if (SplitIdx != std::string::npos) value = ln.substr(SplitIdx + 1);
// When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only // When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only
// accept postive values. Some environments (virtual machines) report zero, // accept postive values. Some environments (virtual machines) report zero,
// which would cause infinite looping in WallTime_Init. // which would cause infinite looping in WallTime_Init.
if (!saw_mhz && startsWithKey(line, "cpu MHz")) { if (startsWithKey(ln, "cpu MHz")) {
const char* freqstr = strchr(line, ':'); if (!value.empty()) {
if (freqstr) { double cycles_per_second = std::stod(value) * 1000000.0;
cpuinfo_cycles_per_second = strtod(freqstr + 1, &err) * 1000000.0; if (cycles_per_second > 0) return cycles_per_second;
if (freqstr[1] != '\0' && *err == '\0' && cpuinfo_cycles_per_second > 0)
saw_mhz = true;
} }
} else if (startsWithKey(line, "bogomips")) { } else if (startsWithKey(ln, "bogomips")) {
const char* freqstr = strchr(line, ':'); if (!value.empty()) {
if (freqstr) { bogo_clock = std::stod(value) * 1000000.0;
bogo_clock = strtod(freqstr + 1, &err) * 1000000.0; if (bogo_clock < 0.0) bogo_clock = error_value;
if (freqstr[1] != '\0' && *err == '\0' && bogo_clock > 0)
saw_bogo = true;
} }
} else if (startsWithKey(line, "processor", /*IgnoreCase*/false)) {
// The above comparison is case-sensitive because ARM kernels often
// include a "Processor" line that tells you about the CPU, distinct
// from the usual "processor" lines that give you CPU ids. No current
// Linux architecture is using "Processor" for CPU ids.
num_cpus++; // count up every time we see an "processor :" entry
const char* id_str = strchr(line, ':');
if (id_str) {
const long cpu_id = strtol(id_str + 1, &err, 10);
if (id_str[1] != '\0' && *err == '\0' && max_cpu_id < cpu_id)
max_cpu_id = cpu_id;
}
}
} while (chars_read > 0);
close(fd);
if (!saw_mhz) {
if (saw_bogo) {
// If we didn't find anything better, we'll use bogomips, but
// we're not happy about it.
cpuinfo_cycles_per_second = bogo_clock;
} else {
// If we don't even have bogomips, we'll use the slow estimation.
cpuinfo_cycles_per_second =
static_cast<double>(EstimateCyclesPerSecond());
} }
} }
if (num_cpus == 0) { if (f.bad()) {
fprintf(stderr, "Failed to read num. CPUs correctly from /proc/cpuinfo\n"); std::cerr << "Failure reading /proc/cpuinfo\n";
} else { return error_value;
if ((max_cpu_id + 1) != num_cpus) {
fprintf(stderr,
"CPU ID assignments in /proc/cpuinfo seem messed up."
" This is usually caused by a bad BIOS.\n");
}
cpuinfo_num_cpus = num_cpus;
} }
if (!f.eof()) {
std::cerr << "Failed to read to end of /proc/cpuinfo\n";
return error_value;
}
f.close();
// If we found the bogomips clock, but nothing better, we'll use it (but
// we're not happy about it); otherwise, fallback to the rough estimation
// below.
if (bogo_clock >= 0.0) return bogo_clock;
#elif defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_NETBSD #elif defined BENCHMARK_HAS_SYSCTL
// FreeBSD notes constexpr auto* FreqStr =
// ============= #if defined(BENCHMARK_OS_FREEBSD) || defined(BENCHMARK_OS_NETBSD)
// For this sysctl to work, the machine must be configured without "machdep.tsc_freq";
// SMP, APIC, or APM support. hz should be 64-bit in freebsd 7.0
// and later. Before that, it's a 32-bit quantity (and gives the
// wrong answer on machines faster than 2^32 Hz). See
// http://lists.freebsd.org/pipermail/freebsd-i386/2004-November/001846.html
// But also compare FreeBSD 7.0:
// http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG70#L223
// 231 error = sysctl_handle_quad(oidp, &freq, 0, req);
// To FreeBSD 6.3 (it's the same in 6-STABLE):
// http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG6#L131
// 139 error = sysctl_handle_int(oidp, &freq, sizeof(freq), req);
#if (__FreeBSD__ >= 7) || defined(__NetBSD__)
uint64_t hz = 0;
#else #else
unsigned int hz = 0; "hw.cpufrequency";
#endif #endif
size_t sz = sizeof(hz); unsigned long long hz = 0;
const char* sysctl_path = "machdep.tsc_freq"; if (GetSysctl(FreqStr, &hz)) return hz;
if (sysctlbyname(sysctl_path, &hz, &sz, nullptr, 0) != 0) {
fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n", fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n",
sysctl_path, strerror(errno)); FreqStr, strerror(errno));
cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond());
} else {
cpuinfo_cycles_per_second = hz;
}
int32_t num_cpus = 0;
size_t size = sizeof(num_cpus);
if (::sysctlbyname("hw.ncpu", &num_cpus, &size, nullptr, 0) == 0 &&
(size == sizeof(num_cpus))) {
cpuinfo_num_cpus = num_cpus;
} else {
fprintf(stderr, "%s\n", strerror(errno));
std::exit(EXIT_FAILURE);
}
#elif defined BENCHMARK_OS_WINDOWS #elif defined BENCHMARK_OS_WINDOWS
// In NT, read MHz from the registry. If we fail to do so or we're in win9x // In NT, read MHz from the registry. If we fail to do so or we're in win9x
// then make a crude estimate. // then make a crude estimate.
...@@ -277,89 +404,27 @@ void InitializeSystemInfo() { ...@@ -277,89 +404,27 @@ void InitializeSystemInfo() {
SHGetValueA(HKEY_LOCAL_MACHINE, SHGetValueA(HKEY_LOCAL_MACHINE,
"HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0", "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0",
"~MHz", nullptr, &data, &data_size))) "~MHz", nullptr, &data, &data_size)))
cpuinfo_cycles_per_second = return static_cast<double>((int64_t)data *
static_cast<double>((int64_t)data * (int64_t)(1000 * 1000)); // was mhz (int64_t)(1000 * 1000)); // was mhz
else
cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond());
SYSTEM_INFO sysinfo;
// Use memset as opposed to = {} to avoid GCC missing initializer false
// positives.
std::memset(&sysinfo, 0, sizeof(SYSTEM_INFO));
GetSystemInfo(&sysinfo);
cpuinfo_num_cpus = sysinfo.dwNumberOfProcessors; // number of logical
// processors in the current
// group
#elif defined BENCHMARK_OS_MACOSX
int32_t num_cpus = 0;
size_t size = sizeof(num_cpus);
if (::sysctlbyname("hw.ncpu", &num_cpus, &size, nullptr, 0) == 0 &&
(size == sizeof(num_cpus))) {
cpuinfo_num_cpus = num_cpus;
} else {
fprintf(stderr, "%s\n", strerror(errno));
std::exit(EXIT_FAILURE);
}
int64_t cpu_freq = 0;
size = sizeof(cpu_freq);
if (::sysctlbyname("hw.cpufrequency", &cpu_freq, &size, nullptr, 0) == 0 &&
(size == sizeof(cpu_freq))) {
cpuinfo_cycles_per_second = cpu_freq;
} else {
#if defined BENCHMARK_OS_IOS
fprintf(stderr, "CPU frequency cannot be detected. \n");
cpuinfo_cycles_per_second = 0;
#else
fprintf(stderr, "%s\n", strerror(errno));
std::exit(EXIT_FAILURE);
#endif
}
#else
// Generic cycles per second counter
cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond());
#endif #endif
// If we've fallen through, attempt to roughly estimate the CPU clock rate.
const int estimate_time_ms = 1000;
const auto start_ticks = cycleclock::Now();
SleepForMilliseconds(estimate_time_ms);
return static_cast<double>(cycleclock::Now() - start_ticks);
} }
} // end namespace } // end namespace
double CyclesPerSecond(void) { const CPUInfo& CPUInfo::Get() {
std::call_once(cpuinfo_init, InitializeSystemInfo); static const CPUInfo* info = new CPUInfo();
return cpuinfo_cycles_per_second; return *info;
}
int NumCPUs(void) {
std::call_once(cpuinfo_init, InitializeSystemInfo);
return cpuinfo_num_cpus;
} }
// The ""'s catch people who don't pass in a literal for "str" CPUInfo::CPUInfo()
#define strliterallen(str) (sizeof("" str "") - 1) : num_cpus(GetNumCPUs()),
cycles_per_second(GetCPUCyclesPerSecond()),
// Must use a string literal for prefix. caches(GetCacheSizes()),
#define memprefix(str, len, prefix) \ scaling_enabled(CpuScalingEnabled(num_cpus)) {}
((((len) >= strliterallen(prefix)) && \
std::memcmp(str, prefix, strliterallen(prefix)) == 0) \
? str + strliterallen(prefix) \
: nullptr)
bool CpuScalingEnabled() {
#ifndef BENCHMARK_OS_WINDOWS
// On Linux, the CPUfreq subsystem exposes CPU information as files on the
// local file system. If reading the exported files fails, then we may not be
// running on Linux, so we silently ignore all the read errors.
for (int cpu = 0, num_cpus = NumCPUs(); cpu < num_cpus; ++cpu) {
std::string governor_file =
StrCat("/sys/devices/system/cpu/cpu", cpu, "/cpufreq/scaling_governor");
FILE* file = fopen(governor_file.c_str(), "r");
if (!file) break;
char buff[16];
size_t bytes_read = fread(buff, 1, sizeof(buff), file);
fclose(file);
if (memprefix(buff, bytes_read, "performance") == nullptr) return true;
}
#endif
return false;
}
} // end namespace benchmark } // end namespace benchmark
#ifndef BENCHMARK_SYSINFO_H_
#define BENCHMARK_SYSINFO_H_
namespace benchmark {
int NumCPUs();
double CyclesPerSecond();
bool CpuScalingEnabled();
} // end namespace benchmark
#endif // BENCHMARK_SYSINFO_H_
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment