1 // Copyright 2015 Google Inc. All rights reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "sysinfo.h"
16 #include "internal_macros.h"
17
18 #ifdef BENCHMARK_OS_WINDOWS
19 #include <Shlwapi.h>
20 #include <Windows.h>
21 #include <VersionHelpers.h>
22 #else
23 #include <fcntl.h>
24 #include <sys/resource.h>
25 #include <sys/types.h> // this header must be included before 'sys/sysctl.h' to avoid compilation error on FreeBSD
26 #include <sys/time.h>
27 #include <unistd.h>
28 #if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_MACOSX
29 #include <sys/sysctl.h>
30 #endif
31 #endif
32
33 #include <cerrno>
34 #include <cstdio>
35 #include <cstdint>
36 #include <cstdlib>
37 #include <cstring>
38 #include <iostream>
39 #include <limits>
40 #include <mutex>
41
42 #include "arraysize.h"
43 #include "check.h"
44 #include "cycleclock.h"
45 #include "internal_macros.h"
46 #include "log.h"
47 #include "sleep.h"
48 #include "string_util.h"
49
50 namespace benchmark {
51 namespace {
52 std::once_flag cpuinfo_init;
53 double cpuinfo_cycles_per_second = 1.0;
54 int cpuinfo_num_cpus = 1; // Conservative guess
55 std::mutex cputimens_mutex;
56
57 #if !defined BENCHMARK_OS_MACOSX
58 const int64_t estimate_time_ms = 1000;
59
60 // Helper function estimates cycles/sec by observing cycles elapsed during
61 // sleep(). Using small sleep time decreases accuracy significantly.
EstimateCyclesPerSecond()62 int64_t EstimateCyclesPerSecond() {
63 const int64_t start_ticks = cycleclock::Now();
64 SleepForMilliseconds(estimate_time_ms);
65 return cycleclock::Now() - start_ticks;
66 }
67 #endif
68
69 #if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN
70 // Helper function for reading an int from a file. Returns true if successful
71 // and the memory location pointed to by value is set to the value read.
ReadIntFromFile(const char * file,long * value)72 bool ReadIntFromFile(const char* file, long* value) {
73 bool ret = false;
74 int fd = open(file, O_RDONLY);
75 if (fd != -1) {
76 char line[1024];
77 char* err;
78 memset(line, '\0', sizeof(line));
79 CHECK(read(fd, line, sizeof(line) - 1));
80 const long temp_value = strtol(line, &err, 10);
81 if (line[0] != '\0' && (*err == '\n' || *err == '\0')) {
82 *value = temp_value;
83 ret = true;
84 }
85 close(fd);
86 }
87 return ret;
88 }
89 #endif
90
InitializeSystemInfo()91 void InitializeSystemInfo() {
92 #if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN
93 char line[1024];
94 char* err;
95 long freq;
96
97 bool saw_mhz = false;
98
99 // If the kernel is exporting the tsc frequency use that. There are issues
100 // where cpuinfo_max_freq cannot be relied on because the BIOS may be
101 // exporintg an invalid p-state (on x86) or p-states may be used to put the
102 // processor in a new mode (turbo mode). Essentially, those frequencies
103 // cannot always be relied upon. The same reasons apply to /proc/cpuinfo as
104 // well.
105 if (!saw_mhz &&
106 ReadIntFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq)) {
107 // The value is in kHz (as the file name suggests). For example, on a
108 // 2GHz warpstation, the file contains the value "2000000".
109 cpuinfo_cycles_per_second = freq * 1000.0;
110 saw_mhz = true;
111 }
112
113 // If CPU scaling is in effect, we want to use the *maximum* frequency,
114 // not whatever CPU speed some random processor happens to be using now.
115 if (!saw_mhz &&
116 ReadIntFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
117 &freq)) {
118 // The value is in kHz. For example, on a 2GHz warpstation, the file
119 // contains the value "2000000".
120 cpuinfo_cycles_per_second = freq * 1000.0;
121 saw_mhz = true;
122 }
123
124 // Read /proc/cpuinfo for other values, and if there is no cpuinfo_max_freq.
125 const char* pname = "/proc/cpuinfo";
126 int fd = open(pname, O_RDONLY);
127 if (fd == -1) {
128 perror(pname);
129 if (!saw_mhz) {
130 cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond());
131 }
132 return;
133 }
134
135 double bogo_clock = 1.0;
136 bool saw_bogo = false;
137 long max_cpu_id = 0;
138 int num_cpus = 0;
139 line[0] = line[1] = '\0';
140 size_t chars_read = 0;
141 do { // we'll exit when the last read didn't read anything
142 // Move the next line to the beginning of the buffer
143 const size_t oldlinelen = strlen(line);
144 if (sizeof(line) == oldlinelen + 1) // oldlinelen took up entire line
145 line[0] = '\0';
146 else // still other lines left to save
147 memmove(line, line + oldlinelen + 1, sizeof(line) - (oldlinelen + 1));
148 // Terminate the new line, reading more if we can't find the newline
149 char* newline = strchr(line, '\n');
150 if (newline == nullptr) {
151 const size_t linelen = strlen(line);
152 const size_t bytes_to_read = sizeof(line) - 1 - linelen;
153 CHECK(bytes_to_read > 0); // because the memmove recovered >=1 bytes
154 chars_read = read(fd, line + linelen, bytes_to_read);
155 line[linelen + chars_read] = '\0';
156 newline = strchr(line, '\n');
157 }
158 if (newline != nullptr) *newline = '\0';
159
160 // When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only
161 // accept postive values. Some environments (virtual machines) report zero,
162 // which would cause infinite looping in WallTime_Init.
163 if (!saw_mhz && strncasecmp(line, "cpu MHz", sizeof("cpu MHz") - 1) == 0) {
164 const char* freqstr = strchr(line, ':');
165 if (freqstr) {
166 cpuinfo_cycles_per_second = strtod(freqstr + 1, &err) * 1000000.0;
167 if (freqstr[1] != '\0' && *err == '\0' && cpuinfo_cycles_per_second > 0)
168 saw_mhz = true;
169 }
170 } else if (strncasecmp(line, "bogomips", sizeof("bogomips") - 1) == 0) {
171 const char* freqstr = strchr(line, ':');
172 if (freqstr) {
173 bogo_clock = strtod(freqstr + 1, &err) * 1000000.0;
174 if (freqstr[1] != '\0' && *err == '\0' && bogo_clock > 0)
175 saw_bogo = true;
176 }
177 } else if (strncmp(line, "processor", sizeof("processor") - 1) == 0) {
178 // The above comparison is case-sensitive because ARM kernels often
179 // include a "Processor" line that tells you about the CPU, distinct
180 // from the usual "processor" lines that give you CPU ids. No current
181 // Linux architecture is using "Processor" for CPU ids.
182 num_cpus++; // count up every time we see an "processor :" entry
183 const char* id_str = strchr(line, ':');
184 if (id_str) {
185 const long cpu_id = strtol(id_str + 1, &err, 10);
186 if (id_str[1] != '\0' && *err == '\0' && max_cpu_id < cpu_id)
187 max_cpu_id = cpu_id;
188 }
189 }
190 } while (chars_read > 0);
191 close(fd);
192
193 if (!saw_mhz) {
194 if (saw_bogo) {
195 // If we didn't find anything better, we'll use bogomips, but
196 // we're not happy about it.
197 cpuinfo_cycles_per_second = bogo_clock;
198 } else {
199 // If we don't even have bogomips, we'll use the slow estimation.
200 cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond());
201 }
202 }
203 if (num_cpus == 0) {
204 fprintf(stderr, "Failed to read num. CPUs correctly from /proc/cpuinfo\n");
205 } else {
206 if ((max_cpu_id + 1) != num_cpus) {
207 fprintf(stderr,
208 "CPU ID assignments in /proc/cpuinfo seem messed up."
209 " This is usually caused by a bad BIOS.\n");
210 }
211 cpuinfo_num_cpus = num_cpus;
212 }
213
214 #elif defined BENCHMARK_OS_FREEBSD
215 // For this sysctl to work, the machine must be configured without
216 // SMP, APIC, or APM support. hz should be 64-bit in freebsd 7.0
217 // and later. Before that, it's a 32-bit quantity (and gives the
218 // wrong answer on machines faster than 2^32 Hz). See
219 // http://lists.freebsd.org/pipermail/freebsd-i386/2004-November/001846.html
220 // But also compare FreeBSD 7.0:
221 // http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG70#L223
222 // 231 error = sysctl_handle_quad(oidp, &freq, 0, req);
223 // To FreeBSD 6.3 (it's the same in 6-STABLE):
224 // http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG6#L131
225 // 139 error = sysctl_handle_int(oidp, &freq, sizeof(freq), req);
226 #if __FreeBSD__ >= 7
227 uint64_t hz = 0;
228 #else
229 unsigned int hz = 0;
230 #endif
231 size_t sz = sizeof(hz);
232 const char* sysctl_path = "machdep.tsc_freq";
233 if (sysctlbyname(sysctl_path, &hz, &sz, nullptr, 0) != 0) {
234 fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n",
235 sysctl_path, strerror(errno));
236 cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond());
237 } else {
238 cpuinfo_cycles_per_second = hz;
239 }
240 // TODO: also figure out cpuinfo_num_cpus
241
242 #elif defined BENCHMARK_OS_WINDOWS
243 // In NT, read MHz from the registry. If we fail to do so or we're in win9x
244 // then make a crude estimate.
245 DWORD data, data_size = sizeof(data);
246 if (IsWindowsXPOrGreater() &&
247 SUCCEEDED(
248 SHGetValueA(HKEY_LOCAL_MACHINE,
249 "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0",
250 "~MHz", nullptr, &data, &data_size)))
251 cpuinfo_cycles_per_second = static_cast<double>((int64_t)data * (int64_t)(1000 * 1000)); // was mhz
252 else
253 cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond());
254 // TODO: also figure out cpuinfo_num_cpus
255
256 #elif defined BENCHMARK_OS_MACOSX
257 // returning "mach time units" per second. the current number of elapsed
258 // mach time units can be found by calling uint64 mach_absolute_time();
259 // while not as precise as actual CPU cycles, it is accurate in the face
260 // of CPU frequency scaling and multi-cpu/core machines.
261 // Our mac users have these types of machines, and accuracy
262 // (i.e. correctness) trumps precision.
263 // See cycleclock.h: CycleClock::Now(), which returns number of mach time
264 // units on Mac OS X.
265 mach_timebase_info_data_t timebase_info;
266 mach_timebase_info(&timebase_info);
267 double mach_time_units_per_nanosecond =
268 static_cast<double>(timebase_info.denom) /
269 static_cast<double>(timebase_info.numer);
270 cpuinfo_cycles_per_second = mach_time_units_per_nanosecond * 1e9;
271
272 int num_cpus = 0;
273 size_t size = sizeof(num_cpus);
274 int numcpus_name[] = {CTL_HW, HW_NCPU};
275 if (::sysctl(numcpus_name, arraysize(numcpus_name), &num_cpus, &size, nullptr, 0) ==
276 0 &&
277 (size == sizeof(num_cpus)))
278 cpuinfo_num_cpus = num_cpus;
279
280 #else
281 // Generic cycles per second counter
282 cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond());
283 #endif
284 }
285 } // end namespace
286
287 // getrusage() based implementation of MyCPUUsage
MyCPUUsageRUsage()288 static double MyCPUUsageRUsage() {
289 #ifndef BENCHMARK_OS_WINDOWS
290 struct rusage ru;
291 if (getrusage(RUSAGE_SELF, &ru) == 0) {
292 return (static_cast<double>(ru.ru_utime.tv_sec) +
293 static_cast<double>(ru.ru_utime.tv_usec) * 1e-6 +
294 static_cast<double>(ru.ru_stime.tv_sec) +
295 static_cast<double>(ru.ru_stime.tv_usec) * 1e-6);
296 } else {
297 return 0.0;
298 }
299 #else
300 HANDLE proc = GetCurrentProcess();
301 FILETIME creation_time;
302 FILETIME exit_time;
303 FILETIME kernel_time;
304 FILETIME user_time;
305 ULARGE_INTEGER kernel;
306 ULARGE_INTEGER user;
307 GetProcessTimes(proc, &creation_time, &exit_time, &kernel_time, &user_time);
308 kernel.HighPart = kernel_time.dwHighDateTime;
309 kernel.LowPart = kernel_time.dwLowDateTime;
310 user.HighPart = user_time.dwHighDateTime;
311 user.LowPart = user_time.dwLowDateTime;
312 return (static_cast<double>(kernel.QuadPart) +
313 static_cast<double>(user.QuadPart)) * 1e-7;
314 #endif // OS_WINDOWS
315 }
316
317 #ifndef BENCHMARK_OS_WINDOWS
MyCPUUsageCPUTimeNsLocked(double * cputime)318 static bool MyCPUUsageCPUTimeNsLocked(double* cputime) {
319 static int cputime_fd = -1;
320 if (cputime_fd == -1) {
321 cputime_fd = open("/proc/self/cputime_ns", O_RDONLY);
322 if (cputime_fd < 0) {
323 cputime_fd = -1;
324 return false;
325 }
326 }
327 char buff[64];
328 memset(buff, 0, sizeof(buff));
329 if (pread(cputime_fd, buff, sizeof(buff) - 1, 0) <= 0) {
330 close(cputime_fd);
331 cputime_fd = -1;
332 return false;
333 }
334 unsigned long long result = strtoull(buff, nullptr, 0);
335 if (result == (std::numeric_limits<unsigned long long>::max)()) {
336 close(cputime_fd);
337 cputime_fd = -1;
338 return false;
339 }
340 *cputime = static_cast<double>(result) / 1e9;
341 return true;
342 }
343 #endif // OS_WINDOWS
344
MyCPUUsage()345 double MyCPUUsage() {
346 #ifndef BENCHMARK_OS_WINDOWS
347 {
348 std::lock_guard<std::mutex> l(cputimens_mutex);
349 static bool use_cputime_ns = true;
350 if (use_cputime_ns) {
351 double value;
352 if (MyCPUUsageCPUTimeNsLocked(&value)) {
353 return value;
354 }
355 // Once MyCPUUsageCPUTimeNsLocked fails once fall back to getrusage().
356 VLOG(1) << "Reading /proc/self/cputime_ns failed. Using getrusage().\n";
357 use_cputime_ns = false;
358 }
359 }
360 #endif // OS_WINDOWS
361 return MyCPUUsageRUsage();
362 }
363
ChildrenCPUUsage()364 double ChildrenCPUUsage() {
365 #ifndef BENCHMARK_OS_WINDOWS
366 struct rusage ru;
367 if (getrusage(RUSAGE_CHILDREN, &ru) == 0) {
368 return (static_cast<double>(ru.ru_utime.tv_sec) +
369 static_cast<double>(ru.ru_utime.tv_usec) * 1e-6 +
370 static_cast<double>(ru.ru_stime.tv_sec) +
371 static_cast<double>(ru.ru_stime.tv_usec) * 1e-6);
372 } else {
373 return 0.0;
374 }
375 #else
376 // TODO: Not sure what this even means on Windows
377 return 0.0;
378 #endif // OS_WINDOWS
379 }
380
CyclesPerSecond(void)381 double CyclesPerSecond(void) {
382 std::call_once(cpuinfo_init, InitializeSystemInfo);
383 return cpuinfo_cycles_per_second;
384 }
385
NumCPUs(void)386 int NumCPUs(void) {
387 std::call_once(cpuinfo_init, InitializeSystemInfo);
388 return cpuinfo_num_cpus;
389 }
390
391 // The ""'s catch people who don't pass in a literal for "str"
392 #define strliterallen(str) (sizeof("" str "") - 1)
393
394 // Must use a string literal for prefix.
395 #define memprefix(str, len, prefix) \
396 ((((len) >= strliterallen(prefix)) && \
397 std::memcmp(str, prefix, strliterallen(prefix)) == 0) \
398 ? str + strliterallen(prefix) \
399 : nullptr)
400
CpuScalingEnabled()401 bool CpuScalingEnabled() {
402 #ifndef BENCHMARK_OS_WINDOWS
403 // On Linux, the CPUfreq subsystem exposes CPU information as files on the
404 // local file system. If reading the exported files fails, then we may not be
405 // running on Linux, so we silently ignore all the read errors.
406 for (int cpu = 0, num_cpus = NumCPUs(); cpu < num_cpus; ++cpu) {
407 std::string governor_file = StrCat("/sys/devices/system/cpu/cpu", cpu,
408 "/cpufreq/scaling_governor");
409 FILE* file = fopen(governor_file.c_str(), "r");
410 if (!file) break;
411 char buff[16];
412 size_t bytes_read = fread(buff, 1, sizeof(buff), file);
413 fclose(file);
414 if (memprefix(buff, bytes_read, "performance") == nullptr) return true;
415 }
416 #endif
417 return false;
418 }
419
420 } // end namespace benchmark
421