1 // Copyright 2006 Google Inc. All Rights Reserved.
2 // Author: nsanders, menderico
3 
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 
16 // os.cc : os and machine specific implementation
17 // This file includes an abstracted interface
18 // for linux-distro specific and HW specific
19 // interfaces.
20 
21 #include "os.h"
22 
23 #include <errno.h>
24 #include <fcntl.h>
25 #include <linux/types.h>
26 #include <malloc.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <sys/mman.h>
31 #include <sys/ioctl.h>
32 #include <sys/time.h>
33 #include <sys/types.h>
34 #include <sys/ipc.h>
35 #ifdef HAVE_SYS_SHM_H
36 #include <sys/shm.h>
37 #endif
38 #include <unistd.h>
39 
40 #ifndef SHM_HUGETLB
41 #define SHM_HUGETLB      04000  // remove when glibc defines it
42 #endif
43 
44 #include <string>
45 #include <list>
46 
47 // This file must work with autoconf on its public version,
48 // so these includes are correct.
49 #include "sattypes.h"
50 #include "error_diag.h"
51 #include "clock.h"
52 
53 // OsLayer initialization.
OsLayer()54 OsLayer::OsLayer() {
55   testmem_ = 0;
56   testmemsize_ = 0;
57   totalmemsize_ = 0;
58   min_hugepages_bytes_ = 0;
59   reserve_mb_ = 0;
60   normal_mem_ = true;
61   use_hugepages_ = false;
62   use_posix_shm_ = false;
63   dynamic_mapped_shmem_ = false;
64   mmapped_allocation_ = false;
65   shmid_ = 0;
66   channels_ = NULL;
67 
68   time_initialized_ = 0;
69 
70   regionsize_ = 0;
71   regioncount_ = 1;
72   num_cpus_ = 0;
73   num_nodes_ = 0;
74   num_cpus_per_node_ = 0;
75   error_diagnoser_ = 0;
76   err_log_callback_ = 0;
77   error_injection_ = false;
78 
79   void *pvoid = 0;
80   address_mode_ = sizeof(pvoid) * 8;
81 
82   has_clflush_ = false;
83   has_vector_ = false;
84 
85   use_flush_page_cache_ = false;
86 
87   clock_ = NULL;
88 }
89 
90 // OsLayer cleanup.
~OsLayer()91 OsLayer::~OsLayer() {
92   if (error_diagnoser_)
93     delete error_diagnoser_;
94   if (clock_)
95     delete clock_;
96 }
97 
98 // OsLayer initialization.
Initialize()99 bool OsLayer::Initialize() {
100   if (!clock_) {
101     clock_ = new Clock();
102   }
103 
104   time_initialized_ = clock_->Now();
105   // Detect asm support.
106   GetFeatures();
107 
108   if (num_cpus_ == 0) {
109     num_nodes_ = 1;
110     num_cpus_ = sysconf(_SC_NPROCESSORS_ONLN);
111     num_cpus_per_node_ = num_cpus_ / num_nodes_;
112   }
113   logprintf(5, "Log: %d nodes, %d cpus.\n", num_nodes_, num_cpus_);
114   sat_assert(CPU_SETSIZE >= num_cpus_);
115   cpu_sets_.resize(num_nodes_);
116   cpu_sets_valid_.resize(num_nodes_);
117   // Create error diagnoser.
118   error_diagnoser_ = new ErrorDiag();
119   if (!error_diagnoser_->set_os(this))
120     return false;
121   return true;
122 }
123 
124 // Machine type detected. Can we implement all these functions correctly?
IsSupported()125 bool OsLayer::IsSupported() {
126   if (kOpenSource) {
127     // There are no explicitly supported systems in open source version.
128     return true;
129   }
130 
131   // This is the default empty implementation.
132   // SAT won't report full error information.
133   return false;
134 }
135 
AddressMode()136 int OsLayer::AddressMode() {
137   // Detect 32/64 bit binary.
138   void *pvoid = 0;
139   return sizeof(pvoid) * 8;
140 }
141 
142 // Translates user virtual to physical address.
VirtualToPhysical(void * vaddr)143 uint64 OsLayer::VirtualToPhysical(void *vaddr) {
144   uint64 frame, paddr, pfnmask, pagemask;
145   int pagesize = sysconf(_SC_PAGESIZE);
146   off64_t off = ((uintptr_t)vaddr) / pagesize * 8;
147   int fd = open(kPagemapPath, O_RDONLY);
148 
149   /*
150    * https://www.kernel.org/doc/Documentation/vm/pagemap.txt
151    * API change (July 2015)
152    * https://patchwork.kernel.org/patch/6787991/
153    */
154 
155   if (fd < 0)
156     return 0;
157 
158   if (lseek64(fd, off, SEEK_SET) != off || read(fd, &frame, 8) != 8) {
159     int err = errno;
160     string errtxt = ErrorString(err);
161     logprintf(0, "Process Error: failed to access %s with errno %d (%s)\n",
162               kPagemapPath, err, errtxt.c_str());
163     if (fd >= 0)
164       close(fd);
165     return 0;
166   }
167   close(fd);
168 
169   /* Check if page is present and not swapped. */
170   if (!(frame & (1ULL << 63)) || (frame & (1ULL << 62)))
171     return 0;
172 
173   /* pfn is bits 0-54. */
174   pfnmask = ((1ULL << 55) - 1);
175   /* Pagesize had better be a power of 2. */
176   pagemask = pagesize - 1;
177 
178   paddr = ((frame & pfnmask) * pagesize) | ((uintptr_t)vaddr & pagemask);
179   return paddr;
180 }
181 
182 // Returns the HD device that contains this file.
FindFileDevice(string filename)183 string OsLayer::FindFileDevice(string filename) {
184   return "hdUnknown";
185 }
186 
187 // Returns a list of locations corresponding to HD devices.
FindFileDevices()188 list<string> OsLayer::FindFileDevices() {
189   // No autodetection on unknown systems.
190   list<string> locations;
191   return locations;
192 }
193 
194 
195 // Get HW core features from cpuid instruction.
GetFeatures()196 void OsLayer::GetFeatures() {
197 #if defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
198   unsigned int eax = 1, ebx, ecx, edx;
199   cpuid(&eax, &ebx, &ecx, &edx);
200   has_clflush_ = (edx >> 19) & 1;
201   has_vector_ = (edx >> 26) & 1;  // SSE2 caps bit.
202 
203   logprintf(9, "Log: has clflush: %s, has sse2: %s\n",
204             has_clflush_ ? "true" : "false",
205             has_vector_ ? "true" : "false");
206 #elif defined(STRESSAPPTEST_CPU_PPC)
207   // All PPC implementations have cache flush instructions.
208   has_clflush_ = true;
209 #elif defined(STRESSAPPTEST_CPU_ARMV7A)
210   // TODO(nsanders): add detect from /proc/cpuinfo or /proc/self/auxv.
211   // For now assume neon and don't run -W if you don't have it.
212   has_vector_ = true; // NEON.
213 #warning "Unsupported CPU type ARMV7A: unable to determine feature set."
214 #else
215 #warning "Unsupported CPU type: unable to determine feature set."
216 #endif
217 }
218 
219 
220 // Enable FlushPageCache to be functional instead of a NOP.
ActivateFlushPageCache(void)221 void OsLayer::ActivateFlushPageCache(void) {
222   logprintf(9, "Log: page cache will be flushed as needed\n");
223   use_flush_page_cache_ = true;
224 }
225 
226 // Flush the page cache to ensure reads come from the disk.
FlushPageCache(void)227 bool OsLayer::FlushPageCache(void) {
228   if (!use_flush_page_cache_)
229     return true;
230 
231   // First, ask the kernel to write the cache to the disk.
232   sync();
233 
234   // Second, ask the kernel to empty the cache by writing "1" to
235   // "/proc/sys/vm/drop_caches".
236   static const char *drop_caches_file = "/proc/sys/vm/drop_caches";
237   int dcfile = open(drop_caches_file, O_WRONLY);
238   if (dcfile < 0) {
239     int err = errno;
240     string errtxt = ErrorString(err);
241     logprintf(3, "Log: failed to open %s - err %d (%s)\n",
242               drop_caches_file, err, errtxt.c_str());
243     return false;
244   }
245 
246   ssize_t bytes_written = write(dcfile, "1", 1);
247   close(dcfile);
248 
249   if (bytes_written != 1) {
250     int err = errno;
251     string errtxt = ErrorString(err);
252     logprintf(3, "Log: failed to write %s - err %d (%s)\n",
253               drop_caches_file, err, errtxt.c_str());
254     return false;
255   }
256   return true;
257 }
258 
259 
260 // We need to flush the cacheline here.
Flush(void * vaddr)261 void OsLayer::Flush(void *vaddr) {
262   // Use the generic flush. This function is just so we can override
263   // this if we are so inclined.
264   if (has_clflush_) {
265     OsLayer::FastFlush(vaddr);
266   }
267 }
268 
269 
270 // Run C or ASM copy as appropriate..
AdlerMemcpyWarm(uint64 * dstmem,uint64 * srcmem,unsigned int size_in_bytes,AdlerChecksum * checksum)271 bool OsLayer::AdlerMemcpyWarm(uint64 *dstmem, uint64 *srcmem,
272                               unsigned int size_in_bytes,
273                               AdlerChecksum *checksum) {
274   if (has_vector_) {
275     return AdlerMemcpyAsm(dstmem, srcmem, size_in_bytes, checksum);
276   } else {
277     return AdlerMemcpyWarmC(dstmem, srcmem, size_in_bytes, checksum);
278   }
279 }
280 
281 
282 // Translate physical address to memory module/chip name.
283 // Assumes interleaving between two memory channels based on the XOR of
284 // all address bits in the 'channel_hash' mask, with repeated 'channel_width_'
285 // blocks with bits distributed from each chip in that channel.
FindDimm(uint64 addr,char * buf,int len)286 int OsLayer::FindDimm(uint64 addr, char *buf, int len) {
287   if (!channels_) {
288     snprintf(buf, len, "DIMM Unknown");
289     return -1;
290   }
291 
292   // Find channel by XORing address bits in channel_hash mask.
293   uint32 low = static_cast<uint32>(addr & channel_hash_);
294   uint32 high = static_cast<uint32>((addr & channel_hash_) >> 32);
295   vector<string>& channel = (*channels_)[
296       __builtin_parity(high) ^ __builtin_parity(low)];
297 
298   // Find dram chip by finding which byte within the channel
299   // by address mod channel width, then divide the channel
300   // evenly among the listed dram chips. Note, this will not work
301   // with x4 dram.
302   int chip = (addr % (channel_width_ / 8)) /
303              ((channel_width_ / 8) / channel.size());
304   string name = channel[chip];
305   snprintf(buf, len, "%s", name.c_str());
306   return 1;
307 }
308 
309 
310 // Classifies addresses according to "regions"
311 // This isn't really implemented meaningfully here..
FindRegion(uint64 addr)312 int32 OsLayer::FindRegion(uint64 addr) {
313   static bool warned = false;
314 
315   if (regionsize_ == 0) {
316     regionsize_ = totalmemsize_ / 8;
317     if (regionsize_ < 512 * kMegabyte)
318       regionsize_ = 512 * kMegabyte;
319     regioncount_ = totalmemsize_ / regionsize_;
320     if (regioncount_ < 1) regioncount_ = 1;
321   }
322 
323   int32 region_num = addr / regionsize_;
324   if (region_num >= regioncount_) {
325     if (!warned) {
326         logprintf(0, "Log: region number %d exceeds region count %d\n",
327                   region_num, regioncount_);
328         warned = true;
329     }
330     region_num = region_num % regioncount_;
331   }
332   return region_num;
333 }
334 
335 // Report which cores are associated with a given region.
FindCoreMask(int32 region)336 cpu_set_t *OsLayer::FindCoreMask(int32 region) {
337   sat_assert(region >= 0);
338   region %= num_nodes_;
339   if (!cpu_sets_valid_[region]) {
340     CPU_ZERO(&cpu_sets_[region]);
341     for (int i = 0; i < num_cpus_per_node_; ++i) {
342       CPU_SET(i + region * num_cpus_per_node_, &cpu_sets_[region]);
343     }
344     cpu_sets_valid_[region] = true;
345     logprintf(5, "Log: Region %d mask 0x%s\n",
346                  region, FindCoreMaskFormat(region).c_str());
347   }
348   return &cpu_sets_[region];
349 }
350 
351 // Return cores associated with a given region in hex string.
FindCoreMaskFormat(int32 region)352 string OsLayer::FindCoreMaskFormat(int32 region) {
353   cpu_set_t* mask = FindCoreMask(region);
354   string format = cpuset_format(mask);
355   if (format.size() < 8)
356     format = string(8 - format.size(), '0') + format;
357   return format;
358 }
359 
360 // Report an error in an easily parseable way.
ErrorReport(const char * part,const char * symptom,int count)361 bool OsLayer::ErrorReport(const char *part, const char *symptom, int count) {
362   time_t now = clock_->Now();
363   int ttf = now - time_initialized_;
364   if (strlen(symptom) && strlen(part)) {
365     logprintf(0, "Report Error: %s : %s : %d : %ds\n",
366               symptom, part, count, ttf);
367   } else {
368     // Log something so the error still shows up, but this won't break the
369     // parser.
370     logprintf(0, "Warning: Invalid Report Error: "
371               "%s : %s : %d : %ds\n", symptom, part, count, ttf);
372   }
373   return true;
374 }
375 
376 // Read the number of hugepages out of the kernel interface in proc.
FindHugePages()377 int64 OsLayer::FindHugePages() {
378   char buf[65] = "0";
379 
380   // This is a kernel interface to query the numebr of hugepages
381   // available in the system.
382   static const char *hugepages_info_file = "/proc/sys/vm/nr_hugepages";
383   int hpfile = open(hugepages_info_file, O_RDONLY);
384 
385   ssize_t bytes_read = read(hpfile, buf, 64);
386   close(hpfile);
387 
388   if (bytes_read <= 0) {
389     logprintf(12, "Log: /proc/sys/vm/nr_hugepages "
390                   "read did not provide data\n");
391     return 0;
392   }
393 
394   if (bytes_read == 64) {
395     logprintf(0, "Process Error: /proc/sys/vm/nr_hugepages "
396                  "is surprisingly large\n");
397     return 0;
398   }
399 
400   // Add a null termintation to be string safe.
401   buf[bytes_read] = '\0';
402   // Read the page count.
403   int64 pages = strtoull(buf, NULL, 10);  // NOLINT
404 
405   return pages;
406 }
407 
FindFreeMemSize()408 int64 OsLayer::FindFreeMemSize() {
409   int64 size = 0;
410   int64 minsize = 0;
411   if (totalmemsize_ > 0)
412     return totalmemsize_;
413 
414   int64 pages = sysconf(_SC_PHYS_PAGES);
415   int64 avpages = sysconf(_SC_AVPHYS_PAGES);
416   int64 pagesize = sysconf(_SC_PAGESIZE);
417   int64 physsize = pages * pagesize;
418   int64 avphyssize = avpages * pagesize;
419 
420   // Assume 2MB hugepages.
421   int64 hugepagesize = FindHugePages() * 2 * kMegabyte;
422 
423   if ((pages == -1) || (pagesize == -1)) {
424     logprintf(0, "Process Error: sysconf could not determine memory size.\n");
425     return 0;
426   }
427 
428   // We want to leave enough stuff for things to run.
429   // If the user specified a minimum amount of memory to expect, require that.
430   // Otherwise, if more than 2GB is present, leave 192M + 5% for other stuff.
431   // If less than 2GB is present use 85% of what's available.
432   // These are fairly arbitrary numbers that seem to work OK.
433   //
434   // TODO(nsanders): is there a more correct way to determine target
435   // memory size?
436   if (hugepagesize > 0) {
437     if (min_hugepages_bytes_ > 0) {
438       minsize = min_hugepages_bytes_;
439     } else {
440       minsize = hugepagesize;
441     }
442   } else {
443     if (physsize < 2048LL * kMegabyte) {
444       minsize = ((pages * 85) / 100) * pagesize;
445     } else {
446       minsize = ((pages * 95) / 100) * pagesize - (192 * kMegabyte);
447     }
448     // Make sure that at least reserve_mb_ is left for the system.
449     if (reserve_mb_ > 0) {
450       int64 totalsize = pages * pagesize;
451       int64 reserve_kb = reserve_mb_ * kMegabyte;
452       if (reserve_kb > totalsize) {
453         logprintf(0, "Procedural Error: %lld is bigger than the total memory "
454                   "available %lld\n", reserve_kb, totalsize);
455       } else if (reserve_kb > totalsize - minsize) {
456         logprintf(5, "Warning: Overriding memory to use: original %lld, "
457                   "current %lld\n", minsize, totalsize - reserve_kb);
458         minsize = totalsize - reserve_kb;
459       }
460     }
461   }
462 
463   // Use hugepage sizing if available.
464   if (hugepagesize > 0) {
465     if (hugepagesize < minsize) {
466       logprintf(0, "Procedural Error: Not enough hugepages. "
467                    "%lldMB available < %lldMB required.\n",
468                 hugepagesize / kMegabyte,
469                 minsize / kMegabyte);
470       // Require the calculated minimum amount of memory.
471       size = minsize;
472     } else {
473       // Require that we get all hugepages.
474       size = hugepagesize;
475     }
476   } else {
477     // Require the calculated minimum amount of memory.
478     size = minsize;
479   }
480 
481   logprintf(5, "Log: Total %lld MB. Free %lld MB. Hugepages %lld MB. "
482                "Targeting %lld MB (%lld%%)\n",
483             physsize / kMegabyte,
484             avphyssize / kMegabyte,
485             hugepagesize / kMegabyte,
486             size / kMegabyte,
487             size * 100 / physsize);
488 
489   totalmemsize_ = size;
490   return size;
491 }
492 
493 // Allocates all memory available.
AllocateAllMem()494 int64 OsLayer::AllocateAllMem() {
495   int64 length = FindFreeMemSize();
496   bool retval = AllocateTestMem(length, 0);
497   if (retval)
498     return length;
499   else
500     return 0;
501 }
502 
503 // Allocate the target memory. This may be from malloc, hugepage pool
504 // or other platform specific sources.
AllocateTestMem(int64 length,uint64 paddr_base)505 bool OsLayer::AllocateTestMem(int64 length, uint64 paddr_base) {
506   // Try hugepages first.
507   void *buf = 0;
508 
509   sat_assert(length >= 0);
510 
511   if (paddr_base)
512     logprintf(0, "Process Error: non zero paddr_base %#llx is not supported,"
513               " ignore.\n", paddr_base);
514 
515   // Determine optimal memory allocation path.
516   bool prefer_hugepages = false;
517   bool prefer_posix_shm = false;
518   bool prefer_dynamic_mapping = false;
519 
520   // Are there enough hugepages?
521   int64 hugepagesize = FindHugePages() * 2 * kMegabyte;
522   // TODO(nsanders): Is there enough /dev/shm? Is there enough free memeory?
523   if ((length >= 1400LL * kMegabyte) && (address_mode_ == 32)) {
524     prefer_dynamic_mapping = true;
525     prefer_posix_shm = true;
526     logprintf(3, "Log: Prefer POSIX shared memory allocation.\n");
527     logprintf(3, "Log: You may need to run "
528                  "'sudo mount -o remount,size=100\% /dev/shm.'\n");
529   } else if (hugepagesize >= length) {
530     prefer_hugepages = true;
531     logprintf(3, "Log: Prefer using hugepage allocation.\n");
532   } else {
533     logprintf(3, "Log: Prefer plain malloc memory allocation.\n");
534   }
535 
536 #ifdef HAVE_SYS_SHM_H
537   // Allocate hugepage mapped memory.
538   if (prefer_hugepages) {
539     do { // Allow break statement.
540       int shmid;
541       void *shmaddr;
542 
543       if ((shmid = shmget(2, length,
544               SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W)) < 0) {
545         int err = errno;
546         string errtxt = ErrorString(err);
547         logprintf(3, "Log: failed to allocate shared hugepage "
548                       "object - err %d (%s)\n",
549                   err, errtxt.c_str());
550         logprintf(3, "Log: sysctl -w vm.nr_hugepages=XXX allows hugepages.\n");
551         break;
552       }
553 
554       shmaddr = shmat(shmid, NULL, 0);
555       if (shmaddr == reinterpret_cast<void*>(-1)) {
556         int err = errno;
557         string errtxt = ErrorString(err);
558         logprintf(0, "Log: failed to attach shared "
559                      "hugepage object - err %d (%s).\n",
560                   err, errtxt.c_str());
561         if (shmctl(shmid, IPC_RMID, NULL) < 0) {
562           int err = errno;
563           string errtxt = ErrorString(err);
564           logprintf(0, "Log: failed to remove shared "
565                        "hugepage object - err %d (%s).\n",
566                     err, errtxt.c_str());
567         }
568         break;
569       }
570       use_hugepages_ = true;
571       shmid_ = shmid;
572       buf = shmaddr;
573       logprintf(0, "Log: Using shared hugepage object 0x%x at %p.\n",
574                 shmid, shmaddr);
575     } while (0);
576   }
577 
578   if ((!use_hugepages_) && prefer_posix_shm) {
579     do {
580       int shm_object;
581       void *shmaddr = NULL;
582 
583       shm_object = shm_open("/stressapptest", O_CREAT | O_RDWR, S_IRWXU);
584       if (shm_object < 0) {
585         int err = errno;
586         string errtxt = ErrorString(err);
587         logprintf(3, "Log: failed to allocate shared "
588                       "smallpage object - err %d (%s)\n",
589                   err, errtxt.c_str());
590         break;
591       }
592 
593       if (0 > ftruncate(shm_object, length)) {
594         int err = errno;
595         string errtxt = ErrorString(err);
596         logprintf(3, "Log: failed to ftruncate shared "
597                       "smallpage object - err %d (%s)\n",
598                   err, errtxt.c_str());
599         break;
600       }
601 
602       // 32 bit linux apps can only use ~1.4G of address space.
603       // Use dynamic mapping for allocations larger than that.
604       // Currently perf hit is ~10% for this.
605       if (prefer_dynamic_mapping) {
606         dynamic_mapped_shmem_ = true;
607       } else {
608         // Do a full mapping here otherwise.
609         shmaddr = mmap64(NULL, length, PROT_READ | PROT_WRITE,
610                          MAP_SHARED | MAP_NORESERVE | MAP_LOCKED | MAP_POPULATE,
611                          shm_object, 0);
612         if (shmaddr == reinterpret_cast<void*>(-1)) {
613           int err = errno;
614           string errtxt = ErrorString(err);
615           logprintf(0, "Log: failed to map shared "
616                        "smallpage object - err %d (%s).\n",
617                     err, errtxt.c_str());
618           break;
619         }
620       }
621 
622       use_posix_shm_ = true;
623       shmid_ = shm_object;
624       buf = shmaddr;
625       char location_message[256] = "";
626       if (dynamic_mapped_shmem_) {
627         sprintf(location_message, "mapped as needed");
628       } else {
629         sprintf(location_message, "at %p", shmaddr);
630       }
631       logprintf(0, "Log: Using posix shared memory object 0x%x %s.\n",
632                 shm_object, location_message);
633     } while (0);
634     shm_unlink("/stressapptest");
635   }
636 #endif  // HAVE_SYS_SHM_H
637 
638   if (!use_hugepages_ && !use_posix_shm_) {
639     // If the page size is what SAT is expecting explicitly perform mmap()
640     // allocation.
641     if (sysconf(_SC_PAGESIZE) >= 4096) {
642       void *map_buf = mmap(NULL, length, PROT_READ | PROT_WRITE,
643                            MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
644       if (map_buf != MAP_FAILED) {
645         buf = map_buf;
646         mmapped_allocation_ = true;
647         logprintf(0, "Log: Using mmap() allocation at %p.\n", buf);
648       }
649     }
650     if (!mmapped_allocation_) {
651       // Use memalign to ensure that blocks are aligned enough for disk direct
652       // IO.
653       buf = static_cast<char*>(memalign(4096, length));
654       if (buf) {
655         logprintf(0, "Log: Using memaligned allocation at %p.\n", buf);
656       } else {
657         logprintf(0, "Process Error: memalign returned 0\n");
658         if ((length >= 1499LL * kMegabyte) && (address_mode_ == 32)) {
659           logprintf(0, "Log: You are trying to allocate > 1.4G on a 32 "
660                        "bit process. Please setup shared memory.\n");
661         }
662       }
663     }
664   }
665 
666   testmem_ = buf;
667   if (buf || dynamic_mapped_shmem_) {
668     testmemsize_ = length;
669   } else {
670     testmemsize_ = 0;
671   }
672 
673   return (buf != 0) || dynamic_mapped_shmem_;
674 }
675 
676 // Free the test memory.
FreeTestMem()677 void OsLayer::FreeTestMem() {
678   if (testmem_) {
679     if (use_hugepages_) {
680 #ifdef HAVE_SYS_SHM_H
681       shmdt(testmem_);
682       shmctl(shmid_, IPC_RMID, NULL);
683 #endif
684     } else if (use_posix_shm_) {
685       if (!dynamic_mapped_shmem_) {
686         munmap(testmem_, testmemsize_);
687       }
688       close(shmid_);
689     } else if (mmapped_allocation_) {
690       munmap(testmem_, testmemsize_);
691     } else {
692       free(testmem_);
693     }
694     testmem_ = 0;
695     testmemsize_ = 0;
696   }
697 }
698 
699 
700 // Prepare the target memory. It may requre mapping in, or this may be a noop.
PrepareTestMem(uint64 offset,uint64 length)701 void *OsLayer::PrepareTestMem(uint64 offset, uint64 length) {
702   sat_assert((offset + length) <= testmemsize_);
703   if (dynamic_mapped_shmem_) {
704     // TODO(nsanders): Check if we can support MAP_NONBLOCK,
705     // and evaluate performance hit from not using it.
706 #ifdef HAVE_MMAP64
707     void * mapping = mmap64(NULL, length, PROT_READ | PROT_WRITE,
708                      MAP_SHARED | MAP_NORESERVE | MAP_LOCKED | MAP_POPULATE,
709                      shmid_, offset);
710 #else
711     void * mapping = mmap(NULL, length, PROT_READ | PROT_WRITE,
712                      MAP_SHARED | MAP_NORESERVE | MAP_LOCKED | MAP_POPULATE,
713                      shmid_, offset);
714 #endif
715     if (mapping == MAP_FAILED) {
716       string errtxt = ErrorString(errno);
717       logprintf(0, "Process Error: PrepareTestMem mmap64(%llx, %llx) failed. "
718                    "error: %s.\n",
719                 offset, length, errtxt.c_str());
720       sat_assert(0);
721     }
722     return mapping;
723   }
724 
725   return reinterpret_cast<void*>(reinterpret_cast<char*>(testmem_) + offset);
726 }
727 
728 // Release the test memory resources, if any.
ReleaseTestMem(void * addr,uint64 offset,uint64 length)729 void OsLayer::ReleaseTestMem(void *addr, uint64 offset, uint64 length) {
730   if (dynamic_mapped_shmem_) {
731     int retval = munmap(addr, length);
732     if (retval == -1) {
733       string errtxt = ErrorString(errno);
734       logprintf(0, "Process Error: ReleaseTestMem munmap(%p, %llx) failed. "
735                    "error: %s.\n",
736                 addr, length, errtxt.c_str());
737       sat_assert(0);
738     }
739   }
740 }
741 
742 // No error polling on unknown systems.
ErrorPoll()743 int OsLayer::ErrorPoll() {
744   return 0;
745 }
746 
747 // Generally, poll for errors once per second.
ErrorWait()748 void OsLayer::ErrorWait() {
749   sat_sleep(1);
750   return;
751 }
752 
753 // Open a PCI bus-dev-func as a file and return its file descriptor.
754 // Error is indicated by return value less than zero.
PciOpen(int bus,int device,int function)755 int OsLayer::PciOpen(int bus, int device, int function) {
756   char dev_file[256];
757 
758   snprintf(dev_file, sizeof(dev_file), "/proc/bus/pci/%02x/%02x.%x",
759            bus, device, function);
760 
761   int fd = open(dev_file, O_RDWR);
762   if (fd == -1) {
763     logprintf(0, "Process Error: Unable to open PCI bus %d, device %d, "
764                  "function %d (errno %d).\n",
765               bus, device, function, errno);
766     return -1;
767   }
768 
769   return fd;
770 }
771 
772 
773 // Read and write functions to access PCI config.
PciRead(int fd,uint32 offset,int width)774 uint32 OsLayer::PciRead(int fd, uint32 offset, int width) {
775   // Strict aliasing rules lawyers will cause data corruption
776   // on cast pointers in some gccs.
777   union {
778     uint32 l32;
779     uint16 l16;
780     uint8 l8;
781   } datacast;
782   datacast.l32 = 0;
783   uint32 size = width / 8;
784 
785   sat_assert((width == 32) || (width == 16) || (width == 8));
786   sat_assert(offset <= (256 - size));
787 
788   if (lseek(fd, offset, SEEK_SET) < 0) {
789     logprintf(0, "Process Error: Can't seek %x\n", offset);
790     return 0;
791   }
792   if (read(fd, &datacast, size) != static_cast<ssize_t>(size)) {
793     logprintf(0, "Process Error: Can't read %x\n", offset);
794     return 0;
795   }
796 
797   // Extract the data.
798   switch (width) {
799     case 8:
800       sat_assert(&(datacast.l8) == reinterpret_cast<uint8*>(&datacast));
801       return datacast.l8;
802     case 16:
803       sat_assert(&(datacast.l16) == reinterpret_cast<uint16*>(&datacast));
804       return datacast.l16;
805     case 32:
806       return datacast.l32;
807   }
808   return 0;
809 }
810 
PciWrite(int fd,uint32 offset,uint32 value,int width)811 void OsLayer::PciWrite(int fd, uint32 offset, uint32 value, int width) {
812   // Strict aliasing rules lawyers will cause data corruption
813   // on cast pointers in some gccs.
814   union {
815     uint32 l32;
816     uint16 l16;
817     uint8 l8;
818   } datacast;
819   datacast.l32 = 0;
820   uint32 size = width / 8;
821 
822   sat_assert((width == 32) || (width == 16) || (width == 8));
823   sat_assert(offset <= (256 - size));
824 
825   // Cram the data into the right alignment.
826   switch (width) {
827     case 8:
828       sat_assert(&(datacast.l8) == reinterpret_cast<uint8*>(&datacast));
829       datacast.l8 = value;
830     case 16:
831       sat_assert(&(datacast.l16) == reinterpret_cast<uint16*>(&datacast));
832       datacast.l16 = value;
833     case 32:
834       datacast.l32 = value;
835   }
836 
837   if (lseek(fd, offset, SEEK_SET) < 0) {
838     logprintf(0, "Process Error: Can't seek %x\n", offset);
839     return;
840   }
841   if (write(fd, &datacast, size) != static_cast<ssize_t>(size)) {
842     logprintf(0, "Process Error: Can't write %x to %x\n", datacast.l32, offset);
843     return;
844   }
845 
846   return;
847 }
848 
849 
850 
851 // Open dev msr.
OpenMSR(uint32 core,uint32 address)852 int OsLayer::OpenMSR(uint32 core, uint32 address) {
853   char buf[256];
854   snprintf(buf, sizeof(buf), "/dev/cpu/%d/msr", core);
855   int fd = open(buf, O_RDWR);
856   if (fd < 0)
857     return fd;
858 
859   uint32 pos = lseek(fd, address, SEEK_SET);
860   if (pos != address) {
861     close(fd);
862     logprintf(5, "Log: can't seek to msr %x, cpu %d\n", address, core);
863     return -1;
864   }
865 
866   return fd;
867 }
868 
ReadMSR(uint32 core,uint32 address,uint64 * data)869 bool OsLayer::ReadMSR(uint32 core, uint32 address, uint64 *data) {
870   int fd = OpenMSR(core, address);
871   if (fd < 0)
872     return false;
873 
874   // Read from the msr.
875   bool res = (sizeof(*data) == read(fd, data, sizeof(*data)));
876 
877   if (!res)
878     logprintf(5, "Log: Failed to read msr %x core %d\n", address, core);
879 
880   close(fd);
881 
882   return res;
883 }
884 
WriteMSR(uint32 core,uint32 address,uint64 * data)885 bool OsLayer::WriteMSR(uint32 core, uint32 address, uint64 *data) {
886   int fd = OpenMSR(core, address);
887   if (fd < 0)
888     return false;
889 
890   // Write to the msr
891   bool res = (sizeof(*data) == write(fd, data, sizeof(*data)));
892 
893   if (!res)
894     logprintf(5, "Log: Failed to write msr %x core %d\n", address, core);
895 
896   close(fd);
897 
898   return res;
899 }
900 
901 // Extract bits [n+len-1, n] from a 32 bit word.
902 // so GetBitField(0x0f00, 8, 4) == 0xf.
GetBitField(uint32 val,uint32 n,uint32 len)903 uint32 OsLayer::GetBitField(uint32 val, uint32 n, uint32 len) {
904   return (val >> n) & ((1<<len) - 1);
905 }
906 
907 // Generic CPU stress workload that would work on any CPU/Platform.
908 // Float-point array moving average calculation.
CpuStressWorkload()909 bool OsLayer::CpuStressWorkload() {
910   double float_arr[100];
911   double sum = 0;
912 #ifdef HAVE_RAND_R
913   unsigned int seed = 12345;
914 #endif
915 
916   // Initialize array with random numbers.
917   for (int i = 0; i < 100; i++) {
918 #ifdef HAVE_RAND_R
919     float_arr[i] = rand_r(&seed);
920     if (rand_r(&seed) % 2)
921       float_arr[i] *= -1.0;
922 #else
923     srand(time(NULL));
924     float_arr[i] = rand();  // NOLINT
925     if (rand() % 2)         // NOLINT
926       float_arr[i] *= -1.0;
927 #endif
928   }
929 
930   // Calculate moving average.
931   for (int i = 0; i < 100000000; i++) {
932     float_arr[i % 100] =
933       (float_arr[i % 100] + float_arr[(i + 1) % 100] +
934        float_arr[(i + 99) % 100]) / 3;
935     sum += float_arr[i % 100];
936   }
937 
938   // Artificial printf so the loops do not get optimized away.
939   if (sum == 0.0)
940     logprintf(12, "Log: I'm Feeling Lucky!\n");
941   return true;
942 }
943