1 // Copyright 2006 Google Inc. All Rights Reserved.
2 // Author: nsanders, menderico
3 
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 
16 // os.cc : os and machine specific implementation
17 // This file includes an abstracted interface
18 // for linux-distro specific and HW specific
19 // interfaces.
20 
21 #include "os.h"
22 
23 #include <errno.h>
24 #include <fcntl.h>
25 #include <linux/types.h>
26 #include <malloc.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <sys/mman.h>
31 #include <sys/ioctl.h>
32 #include <sys/time.h>
33 #include <sys/types.h>
34 #include <sys/ipc.h>
35 #ifdef HAVE_SYS_SHM_H
36 #include <sys/shm.h>
37 #endif
38 #include <unistd.h>
39 
40 #ifndef SHM_HUGETLB
41 #define SHM_HUGETLB      04000  // remove when glibc defines it
42 #endif
43 
44 #include <string>
45 #include <list>
46 
47 // This file must work with autoconf on its public version,
48 // so these includes are correct.
49 #include "sattypes.h"
50 #include "error_diag.h"
51 #include "clock.h"
52 
53 // OsLayer initialization.
OsLayer()54 OsLayer::OsLayer() {
55   testmem_ = 0;
56   testmemsize_ = 0;
57   totalmemsize_ = 0;
58   min_hugepages_bytes_ = 0;
59   reserve_mb_ = 0;
60   normal_mem_ = true;
61   use_hugepages_ = false;
62   use_posix_shm_ = false;
63   dynamic_mapped_shmem_ = false;
64   mmapped_allocation_ = false;
65   shmid_ = 0;
66 
67   time_initialized_ = 0;
68 
69   regionsize_ = 0;
70   regioncount_ = 1;
71   num_cpus_ = 0;
72   num_nodes_ = 0;
73   num_cpus_per_node_ = 0;
74   error_diagnoser_ = 0;
75   err_log_callback_ = 0;
76   error_injection_ = false;
77 
78   void *pvoid = 0;
79   address_mode_ = sizeof(pvoid) * 8;
80 
81   has_clflush_ = false;
82   has_vector_ = false;
83 
84   use_flush_page_cache_ = false;
85 
86   clock_ = NULL;
87 }
88 
89 // OsLayer cleanup.
~OsLayer()90 OsLayer::~OsLayer() {
91   if (error_diagnoser_)
92     delete error_diagnoser_;
93   if (clock_)
94     delete clock_;
95 }
96 
97 // OsLayer initialization.
Initialize()98 bool OsLayer::Initialize() {
99   if (!clock_) {
100     clock_ = new Clock();
101   }
102 
103   time_initialized_ = clock_->Now();
104   // Detect asm support.
105   GetFeatures();
106 
107   if (num_cpus_ == 0) {
108     num_nodes_ = 1;
109     num_cpus_ = sysconf(_SC_NPROCESSORS_ONLN);
110     num_cpus_per_node_ = num_cpus_ / num_nodes_;
111   }
112   logprintf(5, "Log: %d nodes, %d cpus.\n", num_nodes_, num_cpus_);
113   sat_assert(CPU_SETSIZE >= num_cpus_);
114   cpu_sets_.resize(num_nodes_);
115   cpu_sets_valid_.resize(num_nodes_);
116   // Create error diagnoser.
117   error_diagnoser_ = new ErrorDiag();
118   if (!error_diagnoser_->set_os(this))
119     return false;
120   return true;
121 }
122 
123 // Machine type detected. Can we implement all these functions correctly?
IsSupported()124 bool OsLayer::IsSupported() {
125   if (kOpenSource) {
126     // There are no explicitly supported systems in open source version.
127     return true;
128   }
129 
130   // This is the default empty implementation.
131   // SAT won't report full error information.
132   return false;
133 }
134 
AddressMode()135 int OsLayer::AddressMode() {
136   // Detect 32/64 bit binary.
137   void *pvoid = 0;
138   return sizeof(pvoid) * 8;
139 }
140 
141 // Translates user virtual to physical address.
VirtualToPhysical(void * vaddr)142 uint64 OsLayer::VirtualToPhysical(void *vaddr) {
143   uint64 frame, shift;
144   off64_t off = ((uintptr_t)vaddr) / sysconf(_SC_PAGESIZE) * 8;
145   int fd = open(kPagemapPath, O_RDONLY);
146   // /proc/self/pagemap is available in kernel >= 2.6.25
147   if (fd < 0)
148     return 0;
149 
150   if (lseek64(fd, off, SEEK_SET) != off || read(fd, &frame, 8) != 8) {
151     int err = errno;
152     string errtxt = ErrorString(err);
153     logprintf(0, "Process Error: failed to access %s with errno %d (%s)\n",
154               kPagemapPath, err, errtxt.c_str());
155     if (fd >= 0)
156       close(fd);
157     return 0;
158   }
159   close(fd);
160   if (!(frame & (1LL << 63)) || (frame & (1LL << 62)))
161     return 0;
162   shift = (frame >> 55) & 0x3f;
163   frame = (frame & 0x007fffffffffffffLL) << shift;
164   return frame | ((uintptr_t)vaddr & ((1LL << shift) - 1));
165 }
166 
167 // Returns the HD device that contains this file.
FindFileDevice(string filename)168 string OsLayer::FindFileDevice(string filename) {
169   return "hdUnknown";
170 }
171 
172 // Returns a list of locations corresponding to HD devices.
FindFileDevices()173 list<string> OsLayer::FindFileDevices() {
174   // No autodetection on unknown systems.
175   list<string> locations;
176   return locations;
177 }
178 
179 
180 // Get HW core features from cpuid instruction.
GetFeatures()181 void OsLayer::GetFeatures() {
182 #if defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
183   unsigned int eax = 1, ebx, ecx, edx;
184   cpuid(&eax, &ebx, &ecx, &edx);
185   has_clflush_ = (edx >> 19) & 1;
186   has_vector_ = (edx >> 26) & 1;  // SSE2 caps bit.
187 
188   logprintf(9, "Log: has clflush: %s, has sse2: %s\n",
189             has_clflush_ ? "true" : "false",
190             has_vector_ ? "true" : "false");
191 #elif defined(STRESSAPPTEST_CPU_PPC)
192   // All PPC implementations have cache flush instructions.
193   has_clflush_ = true;
194 #elif defined(STRESSAPPTEST_CPU_ARMV7A)
195   // TODO(nsanders): add detect from /proc/cpuinfo or /proc/self/auxv.
196   // For now assume neon and don't run -W if you don't have it.
197   has_vector_ = true; // NEON.
198 #warning "Unsupported CPU type ARMV7A: unable to determine feature set."
199 #else
200 #warning "Unsupported CPU type: unable to determine feature set."
201 #endif
202 }
203 
204 
205 // Enable FlushPageCache to be functional instead of a NOP.
ActivateFlushPageCache(void)206 void OsLayer::ActivateFlushPageCache(void) {
207   logprintf(9, "Log: page cache will be flushed as needed\n");
208   use_flush_page_cache_ = true;
209 }
210 
211 // Flush the page cache to ensure reads come from the disk.
FlushPageCache(void)212 bool OsLayer::FlushPageCache(void) {
213   if (!use_flush_page_cache_)
214     return true;
215 
216   // First, ask the kernel to write the cache to the disk.
217   sync();
218 
219   // Second, ask the kernel to empty the cache by writing "1" to
220   // "/proc/sys/vm/drop_caches".
221   static const char *drop_caches_file = "/proc/sys/vm/drop_caches";
222   int dcfile = open(drop_caches_file, O_WRONLY);
223   if (dcfile < 0) {
224     int err = errno;
225     string errtxt = ErrorString(err);
226     logprintf(3, "Log: failed to open %s - err %d (%s)\n",
227               drop_caches_file, err, errtxt.c_str());
228     return false;
229   }
230 
231   ssize_t bytes_written = write(dcfile, "1", 1);
232   close(dcfile);
233 
234   if (bytes_written != 1) {
235     int err = errno;
236     string errtxt = ErrorString(err);
237     logprintf(3, "Log: failed to write %s - err %d (%s)\n",
238               drop_caches_file, err, errtxt.c_str());
239     return false;
240   }
241   return true;
242 }
243 
244 
245 // We need to flush the cacheline here.
Flush(void * vaddr)246 void OsLayer::Flush(void *vaddr) {
247   // Use the generic flush. This function is just so we can override
248   // this if we are so inclined.
249   if (has_clflush_) {
250     OsLayer::FastFlush(vaddr);
251   }
252 }
253 
254 
255 // Run C or ASM copy as appropriate..
AdlerMemcpyWarm(uint64 * dstmem,uint64 * srcmem,unsigned int size_in_bytes,AdlerChecksum * checksum)256 bool OsLayer::AdlerMemcpyWarm(uint64 *dstmem, uint64 *srcmem,
257                               unsigned int size_in_bytes,
258                               AdlerChecksum *checksum) {
259   if (has_vector_) {
260     return AdlerMemcpyAsm(dstmem, srcmem, size_in_bytes, checksum);
261   } else {
262     return AdlerMemcpyWarmC(dstmem, srcmem, size_in_bytes, checksum);
263   }
264 }
265 
266 
267 // Translate physical address to memory module/chip name.
268 // Assumes interleaving between two memory channels based on the XOR of
269 // all address bits in the 'channel_hash' mask, with repeated 'channel_width_'
270 // blocks with bits distributed from each chip in that channel.
FindDimm(uint64 addr,char * buf,int len)271 int OsLayer::FindDimm(uint64 addr, char *buf, int len) {
272   if (!channels_) {
273     snprintf(buf, len, "DIMM Unknown");
274     return -1;
275   }
276 
277   // Find channel by XORing address bits in channel_hash mask.
278   uint32 low = static_cast<uint32>(addr & channel_hash_);
279   uint32 high = static_cast<uint32>((addr & channel_hash_) >> 32);
280   vector<string>& channel = (*channels_)[
281       __builtin_parity(high) ^ __builtin_parity(low)];
282 
283   // Find dram chip by finding which byte within the channel
284   // by address mod channel width, then divide the channel
285   // evenly among the listed dram chips. Note, this will not work
286   // with x4 dram.
287   int chip = (addr % (channel_width_ / 8)) /
288              ((channel_width_ / 8) / channel.size());
289   string name = channel[chip];
290   snprintf(buf, len, "%s", name.c_str());
291   return 1;
292 }
293 
294 
295 // Classifies addresses according to "regions"
296 // This isn't really implemented meaningfully here..
FindRegion(uint64 addr)297 int32 OsLayer::FindRegion(uint64 addr) {
298   static bool warned = false;
299 
300   if (regionsize_ == 0) {
301     regionsize_ = totalmemsize_ / 8;
302     if (regionsize_ < 512 * kMegabyte)
303       regionsize_ = 512 * kMegabyte;
304     regioncount_ = totalmemsize_ / regionsize_;
305     if (regioncount_ < 1) regioncount_ = 1;
306   }
307 
308   int32 region_num = addr / regionsize_;
309   if (region_num >= regioncount_) {
310     if (!warned) {
311         logprintf(0, "Log: region number %d exceeds region count %d\n",
312                   region_num, regioncount_);
313         warned = true;
314     }
315     region_num = region_num % regioncount_;
316   }
317   return region_num;
318 }
319 
320 // Report which cores are associated with a given region.
FindCoreMask(int32 region)321 cpu_set_t *OsLayer::FindCoreMask(int32 region) {
322   sat_assert(region >= 0);
323   region %= num_nodes_;
324   if (!cpu_sets_valid_[region]) {
325     CPU_ZERO(&cpu_sets_[region]);
326     for (int i = 0; i < num_cpus_per_node_; ++i) {
327       CPU_SET(i + region * num_cpus_per_node_, &cpu_sets_[region]);
328     }
329     cpu_sets_valid_[region] = true;
330     logprintf(5, "Log: Region %d mask 0x%s\n",
331                  region, FindCoreMaskFormat(region).c_str());
332   }
333   return &cpu_sets_[region];
334 }
335 
336 // Return cores associated with a given region in hex string.
FindCoreMaskFormat(int32 region)337 string OsLayer::FindCoreMaskFormat(int32 region) {
338   cpu_set_t* mask = FindCoreMask(region);
339   string format = cpuset_format(mask);
340   if (format.size() < 8)
341     format = string(8 - format.size(), '0') + format;
342   return format;
343 }
344 
345 // Report an error in an easily parseable way.
ErrorReport(const char * part,const char * symptom,int count)346 bool OsLayer::ErrorReport(const char *part, const char *symptom, int count) {
347   time_t now = clock_->Now();
348   int ttf = now - time_initialized_;
349   if (strlen(symptom) && strlen(part)) {
350     logprintf(0, "Report Error: %s : %s : %d : %ds\n",
351               symptom, part, count, ttf);
352   } else {
353     // Log something so the error still shows up, but this won't break the
354     // parser.
355     logprintf(0, "Warning: Invalid Report Error: "
356               "%s : %s : %d : %ds\n", symptom, part, count, ttf);
357   }
358   return true;
359 }
360 
361 // Read the number of hugepages out of the kernel interface in proc.
FindHugePages()362 int64 OsLayer::FindHugePages() {
363   char buf[65] = "0";
364 
365   // This is a kernel interface to query the numebr of hugepages
366   // available in the system.
367   static const char *hugepages_info_file = "/proc/sys/vm/nr_hugepages";
368   int hpfile = open(hugepages_info_file, O_RDONLY);
369 
370   ssize_t bytes_read = read(hpfile, buf, 64);
371   close(hpfile);
372 
373   if (bytes_read <= 0) {
374     logprintf(12, "Log: /proc/sys/vm/nr_hugepages "
375                   "read did not provide data\n");
376     return 0;
377   }
378 
379   if (bytes_read == 64) {
380     logprintf(0, "Process Error: /proc/sys/vm/nr_hugepages "
381                  "is surprisingly large\n");
382     return 0;
383   }
384 
385   // Add a null termintation to be string safe.
386   buf[bytes_read] = '\0';
387   // Read the page count.
388   int64 pages = strtoull(buf, NULL, 10);  // NOLINT
389 
390   return pages;
391 }
392 
FindFreeMemSize()393 int64 OsLayer::FindFreeMemSize() {
394   int64 size = 0;
395   int64 minsize = 0;
396   if (totalmemsize_ > 0)
397     return totalmemsize_;
398 
399   int64 pages = sysconf(_SC_PHYS_PAGES);
400   int64 avpages = sysconf(_SC_AVPHYS_PAGES);
401   int64 pagesize = sysconf(_SC_PAGESIZE);
402   int64 physsize = pages * pagesize;
403   int64 avphyssize = avpages * pagesize;
404 
405   // Assume 2MB hugepages.
406   int64 hugepagesize = FindHugePages() * 2 * kMegabyte;
407 
408   if ((pages == -1) || (pagesize == -1)) {
409     logprintf(0, "Process Error: sysconf could not determine memory size.\n");
410     return 0;
411   }
412 
413   // We want to leave enough stuff for things to run.
414   // If the user specified a minimum amount of memory to expect, require that.
415   // Otherwise, if more than 2GB is present, leave 192M + 5% for other stuff.
416   // If less than 2GB is present use 85% of what's available.
417   // These are fairly arbitrary numbers that seem to work OK.
418   //
419   // TODO(nsanders): is there a more correct way to determine target
420   // memory size?
421   if (hugepagesize > 0) {
422     if (min_hugepages_bytes_ > 0) {
423       minsize = min_hugepages_bytes_;
424     } else {
425       minsize = hugepagesize;
426     }
427   } else {
428     if (physsize < 2048LL * kMegabyte) {
429       minsize = ((pages * 85) / 100) * pagesize;
430     } else {
431       minsize = ((pages * 95) / 100) * pagesize - (192 * kMegabyte);
432     }
433     // Make sure that at least reserve_mb_ is left for the system.
434     if (reserve_mb_ > 0) {
435       int64 totalsize = pages * pagesize;
436       int64 reserve_kb = reserve_mb_ * kMegabyte;
437       if (reserve_kb > totalsize) {
438         logprintf(0, "Procedural Error: %lld is bigger than the total memory "
439                   "available %lld\n", reserve_kb, totalsize);
440       } else if (reserve_kb > totalsize - minsize) {
441         logprintf(5, "Warning: Overriding memory to use: original %lld, "
442                   "current %lld\n", minsize, totalsize - reserve_kb);
443         minsize = totalsize - reserve_kb;
444       }
445     }
446   }
447 
448   // Use hugepage sizing if available.
449   if (hugepagesize > 0) {
450     if (hugepagesize < minsize) {
451       logprintf(0, "Procedural Error: Not enough hugepages. "
452                    "%lldMB available < %lldMB required.\n",
453                 hugepagesize / kMegabyte,
454                 minsize / kMegabyte);
455       // Require the calculated minimum amount of memory.
456       size = minsize;
457     } else {
458       // Require that we get all hugepages.
459       size = hugepagesize;
460     }
461   } else {
462     // Require the calculated minimum amount of memory.
463     size = minsize;
464   }
465 
466   logprintf(5, "Log: Total %lld MB. Free %lld MB. Hugepages %lld MB. "
467                "Targeting %lld MB (%lld%%)\n",
468             physsize / kMegabyte,
469             avphyssize / kMegabyte,
470             hugepagesize / kMegabyte,
471             size / kMegabyte,
472             size * 100 / physsize);
473 
474   totalmemsize_ = size;
475   return size;
476 }
477 
478 // Allocates all memory available.
AllocateAllMem()479 int64 OsLayer::AllocateAllMem() {
480   int64 length = FindFreeMemSize();
481   bool retval = AllocateTestMem(length, 0);
482   if (retval)
483     return length;
484   else
485     return 0;
486 }
487 
488 // Allocate the target memory. This may be from malloc, hugepage pool
489 // or other platform specific sources.
AllocateTestMem(int64 length,uint64 paddr_base)490 bool OsLayer::AllocateTestMem(int64 length, uint64 paddr_base) {
491   // Try hugepages first.
492   void *buf = 0;
493 
494   sat_assert(length >= 0);
495 
496   if (paddr_base)
497     logprintf(0, "Process Error: non zero paddr_base %#llx is not supported,"
498               " ignore.\n", paddr_base);
499 
500   // Determine optimal memory allocation path.
501   bool prefer_hugepages = false;
502   bool prefer_posix_shm = false;
503   bool prefer_dynamic_mapping = false;
504 
505   // Are there enough hugepages?
506   int64 hugepagesize = FindHugePages() * 2 * kMegabyte;
507   // TODO(nsanders): Is there enough /dev/shm? Is there enough free memeory?
508   if ((length >= 1400LL * kMegabyte) && (address_mode_ == 32)) {
509     prefer_dynamic_mapping = true;
510     prefer_posix_shm = true;
511     logprintf(3, "Log: Prefer POSIX shared memory allocation.\n");
512     logprintf(3, "Log: You may need to run "
513                  "'sudo mount -o remount,size=100\% /dev/shm.'\n");
514   } else if (hugepagesize >= length) {
515     prefer_hugepages = true;
516     logprintf(3, "Log: Prefer using hugepage allocation.\n");
517   } else {
518     logprintf(3, "Log: Prefer plain malloc memory allocation.\n");
519   }
520 
521 #ifdef HAVE_SYS_SHM_H
522   // Allocate hugepage mapped memory.
523   if (prefer_hugepages) {
524     do { // Allow break statement.
525       int shmid;
526       void *shmaddr;
527 
528       if ((shmid = shmget(2, length,
529               SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W)) < 0) {
530         int err = errno;
531         string errtxt = ErrorString(err);
532         logprintf(3, "Log: failed to allocate shared hugepage "
533                       "object - err %d (%s)\n",
534                   err, errtxt.c_str());
535         logprintf(3, "Log: sysctl -w vm.nr_hugepages=XXX allows hugepages.\n");
536         break;
537       }
538 
539       shmaddr = shmat(shmid, NULL, 0);
540       if (shmaddr == reinterpret_cast<void*>(-1)) {
541         int err = errno;
542         string errtxt = ErrorString(err);
543         logprintf(0, "Log: failed to attach shared "
544                      "hugepage object - err %d (%s).\n",
545                   err, errtxt.c_str());
546         if (shmctl(shmid, IPC_RMID, NULL) < 0) {
547           int err = errno;
548           string errtxt = ErrorString(err);
549           logprintf(0, "Log: failed to remove shared "
550                        "hugepage object - err %d (%s).\n",
551                     err, errtxt.c_str());
552         }
553         break;
554       }
555       use_hugepages_ = true;
556       shmid_ = shmid;
557       buf = shmaddr;
558       logprintf(0, "Log: Using shared hugepage object 0x%x at %p.\n",
559                 shmid, shmaddr);
560     } while (0);
561   }
562 
563   if ((!use_hugepages_) && prefer_posix_shm) {
564     do {
565       int shm_object;
566       void *shmaddr = NULL;
567 
568       shm_object = shm_open("/stressapptest", O_CREAT | O_RDWR, S_IRWXU);
569       if (shm_object < 0) {
570         int err = errno;
571         string errtxt = ErrorString(err);
572         logprintf(3, "Log: failed to allocate shared "
573                       "smallpage object - err %d (%s)\n",
574                   err, errtxt.c_str());
575         break;
576       }
577 
578       if (0 > ftruncate(shm_object, length)) {
579         int err = errno;
580         string errtxt = ErrorString(err);
581         logprintf(3, "Log: failed to ftruncate shared "
582                       "smallpage object - err %d (%s)\n",
583                   err, errtxt.c_str());
584         break;
585       }
586 
587       // 32 bit linux apps can only use ~1.4G of address space.
588       // Use dynamic mapping for allocations larger than that.
589       // Currently perf hit is ~10% for this.
590       if (prefer_dynamic_mapping) {
591         dynamic_mapped_shmem_ = true;
592       } else {
593         // Do a full mapping here otherwise.
594         shmaddr = mmap64(NULL, length, PROT_READ | PROT_WRITE,
595                          MAP_SHARED | MAP_NORESERVE | MAP_LOCKED | MAP_POPULATE,
596                          shm_object, 0);
597         if (shmaddr == reinterpret_cast<void*>(-1)) {
598           int err = errno;
599           string errtxt = ErrorString(err);
600           logprintf(0, "Log: failed to map shared "
601                        "smallpage object - err %d (%s).\n",
602                     err, errtxt.c_str());
603           break;
604         }
605       }
606 
607       use_posix_shm_ = true;
608       shmid_ = shm_object;
609       buf = shmaddr;
610       char location_message[256] = "";
611       if (dynamic_mapped_shmem_) {
612         sprintf(location_message, "mapped as needed");
613       } else {
614         sprintf(location_message, "at %p", shmaddr);
615       }
616       logprintf(0, "Log: Using posix shared memory object 0x%x %s.\n",
617                 shm_object, location_message);
618     } while (0);
619     shm_unlink("/stressapptest");
620   }
621 #endif  // HAVE_SYS_SHM_H
622 
623   if (!use_hugepages_ && !use_posix_shm_) {
624     // If the page size is what SAT is expecting explicitly perform mmap()
625     // allocation.
626     if (sysconf(_SC_PAGESIZE) >= 4096) {
627       void *map_buf = mmap(NULL, length, PROT_READ | PROT_WRITE,
628                            MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
629       if (map_buf != MAP_FAILED) {
630         buf = map_buf;
631         mmapped_allocation_ = true;
632         logprintf(0, "Log: Using mmap() allocation at %p.\n", buf);
633       }
634     }
635     if (!mmapped_allocation_) {
636       // Use memalign to ensure that blocks are aligned enough for disk direct
637       // IO.
638       buf = static_cast<char*>(memalign(4096, length));
639       if (buf) {
640         logprintf(0, "Log: Using memaligned allocation at %p.\n", buf);
641       } else {
642         logprintf(0, "Process Error: memalign returned 0\n");
643         if ((length >= 1499LL * kMegabyte) && (address_mode_ == 32)) {
644           logprintf(0, "Log: You are trying to allocate > 1.4G on a 32 "
645                        "bit process. Please setup shared memory.\n");
646         }
647       }
648     }
649   }
650 
651   testmem_ = buf;
652   if (buf || dynamic_mapped_shmem_) {
653     testmemsize_ = length;
654   } else {
655     testmemsize_ = 0;
656   }
657 
658   return (buf != 0) || dynamic_mapped_shmem_;
659 }
660 
661 // Free the test memory.
FreeTestMem()662 void OsLayer::FreeTestMem() {
663   if (testmem_) {
664     if (use_hugepages_) {
665 #ifdef HAVE_SYS_SHM_H
666       shmdt(testmem_);
667       shmctl(shmid_, IPC_RMID, NULL);
668 #endif
669     } else if (use_posix_shm_) {
670       if (!dynamic_mapped_shmem_) {
671         munmap(testmem_, testmemsize_);
672       }
673       close(shmid_);
674     } else if (mmapped_allocation_) {
675       munmap(testmem_, testmemsize_);
676     } else {
677       free(testmem_);
678     }
679     testmem_ = 0;
680     testmemsize_ = 0;
681   }
682 }
683 
684 
685 // Prepare the target memory. It may requre mapping in, or this may be a noop.
PrepareTestMem(uint64 offset,uint64 length)686 void *OsLayer::PrepareTestMem(uint64 offset, uint64 length) {
687   sat_assert((offset + length) <= testmemsize_);
688   if (dynamic_mapped_shmem_) {
689     // TODO(nsanders): Check if we can support MAP_NONBLOCK,
690     // and evaluate performance hit from not using it.
691 #ifdef HAVE_MMAP64
692     void * mapping = mmap64(NULL, length, PROT_READ | PROT_WRITE,
693                      MAP_SHARED | MAP_NORESERVE | MAP_LOCKED | MAP_POPULATE,
694                      shmid_, offset);
695 #else
696     void * mapping = mmap(NULL, length, PROT_READ | PROT_WRITE,
697                      MAP_SHARED | MAP_NORESERVE | MAP_LOCKED | MAP_POPULATE,
698                      shmid_, offset);
699 #endif
700     if (mapping == MAP_FAILED) {
701       string errtxt = ErrorString(errno);
702       logprintf(0, "Process Error: PrepareTestMem mmap64(%llx, %llx) failed. "
703                    "error: %s.\n",
704                 offset, length, errtxt.c_str());
705       sat_assert(0);
706     }
707     return mapping;
708   }
709 
710   return reinterpret_cast<void*>(reinterpret_cast<char*>(testmem_) + offset);
711 }
712 
713 // Release the test memory resources, if any.
ReleaseTestMem(void * addr,uint64 offset,uint64 length)714 void OsLayer::ReleaseTestMem(void *addr, uint64 offset, uint64 length) {
715   if (dynamic_mapped_shmem_) {
716     int retval = munmap(addr, length);
717     if (retval == -1) {
718       string errtxt = ErrorString(errno);
719       logprintf(0, "Process Error: ReleaseTestMem munmap(%p, %llx) failed. "
720                    "error: %s.\n",
721                 addr, length, errtxt.c_str());
722       sat_assert(0);
723     }
724   }
725 }
726 
727 // No error polling on unknown systems.
ErrorPoll()728 int OsLayer::ErrorPoll() {
729   return 0;
730 }
731 
732 // Generally, poll for errors once per second.
ErrorWait()733 void OsLayer::ErrorWait() {
734   sat_sleep(1);
735   return;
736 }
737 
738 // Open a PCI bus-dev-func as a file and return its file descriptor.
739 // Error is indicated by return value less than zero.
PciOpen(int bus,int device,int function)740 int OsLayer::PciOpen(int bus, int device, int function) {
741   char dev_file[256];
742 
743   snprintf(dev_file, sizeof(dev_file), "/proc/bus/pci/%02x/%02x.%x",
744            bus, device, function);
745 
746   int fd = open(dev_file, O_RDWR);
747   if (fd == -1) {
748     logprintf(0, "Process Error: Unable to open PCI bus %d, device %d, "
749                  "function %d (errno %d).\n",
750               bus, device, function, errno);
751     return -1;
752   }
753 
754   return fd;
755 }
756 
757 
758 // Read and write functions to access PCI config.
PciRead(int fd,uint32 offset,int width)759 uint32 OsLayer::PciRead(int fd, uint32 offset, int width) {
760   // Strict aliasing rules lawyers will cause data corruption
761   // on cast pointers in some gccs.
762   union {
763     uint32 l32;
764     uint16 l16;
765     uint8 l8;
766   } datacast;
767   datacast.l32 = 0;
768   uint32 size = width / 8;
769 
770   sat_assert((width == 32) || (width == 16) || (width == 8));
771   sat_assert(offset <= (256 - size));
772 
773   if (lseek(fd, offset, SEEK_SET) < 0) {
774     logprintf(0, "Process Error: Can't seek %x\n", offset);
775     return 0;
776   }
777   if (read(fd, &datacast, size) != static_cast<ssize_t>(size)) {
778     logprintf(0, "Process Error: Can't read %x\n", offset);
779     return 0;
780   }
781 
782   // Extract the data.
783   switch (width) {
784     case 8:
785       sat_assert(&(datacast.l8) == reinterpret_cast<uint8*>(&datacast));
786       return datacast.l8;
787     case 16:
788       sat_assert(&(datacast.l16) == reinterpret_cast<uint16*>(&datacast));
789       return datacast.l16;
790     case 32:
791       return datacast.l32;
792   }
793   return 0;
794 }
795 
PciWrite(int fd,uint32 offset,uint32 value,int width)796 void OsLayer::PciWrite(int fd, uint32 offset, uint32 value, int width) {
797   // Strict aliasing rules lawyers will cause data corruption
798   // on cast pointers in some gccs.
799   union {
800     uint32 l32;
801     uint16 l16;
802     uint8 l8;
803   } datacast;
804   datacast.l32 = 0;
805   uint32 size = width / 8;
806 
807   sat_assert((width == 32) || (width == 16) || (width == 8));
808   sat_assert(offset <= (256 - size));
809 
810   // Cram the data into the right alignment.
811   switch (width) {
812     case 8:
813       sat_assert(&(datacast.l8) == reinterpret_cast<uint8*>(&datacast));
814       datacast.l8 = value;
815     case 16:
816       sat_assert(&(datacast.l16) == reinterpret_cast<uint16*>(&datacast));
817       datacast.l16 = value;
818     case 32:
819       datacast.l32 = value;
820   }
821 
822   if (lseek(fd, offset, SEEK_SET) < 0) {
823     logprintf(0, "Process Error: Can't seek %x\n", offset);
824     return;
825   }
826   if (write(fd, &datacast, size) != static_cast<ssize_t>(size)) {
827     logprintf(0, "Process Error: Can't write %x to %x\n", datacast.l32, offset);
828     return;
829   }
830 
831   return;
832 }
833 
834 
835 
836 // Open dev msr.
OpenMSR(uint32 core,uint32 address)837 int OsLayer::OpenMSR(uint32 core, uint32 address) {
838   char buf[256];
839   snprintf(buf, sizeof(buf), "/dev/cpu/%d/msr", core);
840   int fd = open(buf, O_RDWR);
841   if (fd < 0)
842     return fd;
843 
844   uint32 pos = lseek(fd, address, SEEK_SET);
845   if (pos != address) {
846     close(fd);
847     logprintf(5, "Log: can't seek to msr %x, cpu %d\n", address, core);
848     return -1;
849   }
850 
851   return fd;
852 }
853 
ReadMSR(uint32 core,uint32 address,uint64 * data)854 bool OsLayer::ReadMSR(uint32 core, uint32 address, uint64 *data) {
855   int fd = OpenMSR(core, address);
856   if (fd < 0)
857     return false;
858 
859   // Read from the msr.
860   bool res = (sizeof(*data) == read(fd, data, sizeof(*data)));
861 
862   if (!res)
863     logprintf(5, "Log: Failed to read msr %x core %d\n", address, core);
864 
865   close(fd);
866 
867   return res;
868 }
869 
WriteMSR(uint32 core,uint32 address,uint64 * data)870 bool OsLayer::WriteMSR(uint32 core, uint32 address, uint64 *data) {
871   int fd = OpenMSR(core, address);
872   if (fd < 0)
873     return false;
874 
875   // Write to the msr
876   bool res = (sizeof(*data) == write(fd, data, sizeof(*data)));
877 
878   if (!res)
879     logprintf(5, "Log: Failed to write msr %x core %d\n", address, core);
880 
881   close(fd);
882 
883   return res;
884 }
885 
886 // Extract bits [n+len-1, n] from a 32 bit word.
887 // so GetBitField(0x0f00, 8, 4) == 0xf.
GetBitField(uint32 val,uint32 n,uint32 len)888 uint32 OsLayer::GetBitField(uint32 val, uint32 n, uint32 len) {
889   return (val >> n) & ((1<<len) - 1);
890 }
891 
892 // Generic CPU stress workload that would work on any CPU/Platform.
893 // Float-point array moving average calculation.
CpuStressWorkload()894 bool OsLayer::CpuStressWorkload() {
895   double float_arr[100];
896   double sum = 0;
897 #ifdef HAVE_RAND_R
898   unsigned int seed = 12345;
899 #endif
900 
901   // Initialize array with random numbers.
902   for (int i = 0; i < 100; i++) {
903 #ifdef HAVE_RAND_R
904     float_arr[i] = rand_r(&seed);
905     if (rand_r(&seed) % 2)
906       float_arr[i] *= -1.0;
907 #else
908     srand(time(NULL));
909     float_arr[i] = rand();  // NOLINT
910     if (rand() % 2)         // NOLINT
911       float_arr[i] *= -1.0;
912 #endif
913   }
914 
915   // Calculate moving average.
916   for (int i = 0; i < 100000000; i++) {
917     float_arr[i % 100] =
918       (float_arr[i % 100] + float_arr[(i + 1) % 100] +
919        float_arr[(i + 99) % 100]) / 3;
920     sum += float_arr[i % 100];
921   }
922 
923   // Artificial printf so the loops do not get optimized away.
924   if (sum == 0.0)
925     logprintf(12, "Log: I'm Feeling Lucky!\n");
926   return true;
927 }
928