1 // Copyright 2006 Google Inc. All Rights Reserved.
2 // Author: nsanders, menderico
3 
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 
16 #ifndef STRESSAPPTEST_OS_H_  // NOLINT
17 #define STRESSAPPTEST_OS_H_
18 
19 #include <dirent.h>
20 #include <unistd.h>
21 #include <sys/syscall.h>
22 
23 #include <string>
24 #include <list>
25 #include <map>
26 #include <vector>
27 
28 // This file must work with autoconf on its public version,
29 // so these includes are correct.
30 #include "adler32memcpy.h"  // NOLINT
31 #include "sattypes.h"       // NOLINT
32 #include "clock.h"          // NOLINT
33 
34 const char kPagemapPath[] = "/proc/self/pagemap";
35 
36 struct PCIDevice {
37   int32 domain;
38   uint16 bus;
39   uint8 dev;
40   uint8 func;
41   uint16 vendor_id;
42   uint16 device_id;
43   uint64 base_addr[6];
44   uint64 size[6];
45 };
46 
47 typedef vector<PCIDevice*> PCIDevices;
48 
49 class ErrorDiag;
50 
51 class Clock;
52 
53 // This class implements OS/Platform specific funtions.
54 class OsLayer {
55  public:
56   OsLayer();
57   virtual ~OsLayer();
58 
59   // Set the minimum amount of hugepages that should be available for testing.
60   // Must be set before Initialize().
SetMinimumHugepagesSize(int64 min_bytes)61   void SetMinimumHugepagesSize(int64 min_bytes) {
62     min_hugepages_bytes_ = min_bytes;
63   }
64 
65   // Set the minium amount of memory that should not be allocated. This only
66   // has any affect if hugepages are not used.
67   // Must be set before Initialize().
SetReserveSize(int64 reserve_mb)68   void SetReserveSize(int64 reserve_mb) {
69     reserve_mb_ = reserve_mb;
70   }
71 
72   // Set parameters needed to translate physical address to memory module.
SetDramMappingParams(uintptr_t channel_hash,int channel_width,vector<vector<string>> * channels)73   void SetDramMappingParams(uintptr_t channel_hash, int channel_width,
74                             vector< vector<string> > *channels) {
75     channel_hash_ = channel_hash;
76     channel_width_ = channel_width;
77     channels_ = channels;
78   }
79 
80   // Initializes data strctures and open files.
81   // Returns false on error.
82   virtual bool Initialize();
83 
84   // Virtual to physical. This implementation is optional for
85   // subclasses to implement.
86   // Takes a pointer, and returns the corresponding bus address.
87   virtual uint64 VirtualToPhysical(void *vaddr);
88 
89   // Prints failed dimm. This implementation is optional for
90   // subclasses to implement.
91   // Takes a bus address and string, and prints the DIMM name
92   // into the string. Returns the DIMM number that corresponds to the
93   // address given, or -1 if unable to identify the DIMM number.
94   // Note that subclass implementations of FindDimm() MUST fill
95   // buf with at LEAST one non-whitespace character (provided len > 0).
96   virtual int FindDimm(uint64 addr, char *buf, int len);
97 
98   // Classifies addresses according to "regions"
99   // This may mean different things on different platforms.
100   virtual int32 FindRegion(uint64 paddr);
101   // Find cpu cores associated with a region. Either NUMA or arbitrary.
102   virtual cpu_set_t *FindCoreMask(int32 region);
103   // Return cpu cores associated with a region in a hex string.
104   virtual string FindCoreMaskFormat(int32 region);
105 
106   // Returns the HD device that contains this file.
107   virtual string FindFileDevice(string filename);
108 
109   // Returns a list of paths coresponding to HD devices found on this machine.
110   virtual list<string> FindFileDevices();
111 
112   // Polls for errors. This implementation is optional.
113   // This will poll once for errors and return zero iff no errors were found.
114   virtual int ErrorPoll();
115 
116   // Delay an appropriate amount of time between polling.
117   virtual void ErrorWait();
118 
119   // Report errors. This implementation is mandatory.
120   // This will output a machine readable line regarding the error.
121   virtual bool ErrorReport(const char *part, const char *symptom, int count);
122 
123   // Flushes page cache. Used to circumvent the page cache when doing disk
124   // I/O.  This will be a NOP until ActivateFlushPageCache() is called, which
125   // is typically done when opening a file with O_DIRECT fails.
126   // Returns false on error, true on success or NOP.
127   // Subclasses may implement this in machine specific ways..
128   virtual bool FlushPageCache(void);
129   // Enable FlushPageCache() to actually do the flush instead of being a NOP.
130   virtual void ActivateFlushPageCache(void);
131 
132   // Flushes cacheline. Used to distinguish read or write errors.
133   // Subclasses may implement this in machine specific ways..
134   // Takes a pointer, and flushed the cacheline containing that pointer.
135   virtual void Flush(void *vaddr);
136 
137   // Fast flush, for use in performance critical code.
138   // This is bound at compile time, and will not pick up
139   // any runtime machine configuration info.
FastFlush(void * vaddr)140   inline static void FastFlush(void *vaddr) {
141 #ifdef STRESSAPPTEST_CPU_PPC
142     asm volatile("dcbf 0,%0; sync" : : "r" (vaddr));
143 #elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
144     // Put mfence before and after clflush to make sure:
145     // 1. The write before the clflush is committed to memory bus;
146     // 2. The read after the clflush is hitting the memory bus.
147     //
148     // From Intel manual:
149     // CLFLUSH is only ordered by the MFENCE instruction. It is not guaranteed
150     // to be ordered by any other fencing, serializing or other CLFLUSH
151     // instruction. For example, software can use an MFENCE instruction to
152     // insure that previous stores are included in the write-back.
153     asm volatile("mfence");
154     asm volatile("clflush (%0)" : : "r" (vaddr));
155     asm volatile("mfence");
156 #elif defined(STRESSAPPTEST_CPU_ARMV7A)
157     // ARMv7a cachelines are 8 words (32 bytes).
158     syscall(__ARM_NR_cacheflush, vaddr, reinterpret_cast<char*>(vaddr) + 32, 0);
159 #elif defined(STRESSAPPTEST_CPU_AARCH64)
160     asm volatile("dc cvau, %0" : : "r" (vaddr));
161     asm volatile("dsb ish");
162     asm volatile("ic ivau, %0" : : "r" (vaddr));
163     asm volatile("dsb ish");
164     asm volatile("isb");
165 #else
166   #warning "Unsupported CPU type: Unable to force cache flushes."
167 #endif
168   }
169 
170   // Fast flush, for use in performance critical code.
171   // This is bound at compile time, and will not pick up
172   // any runtime machine configuration info.  Takes a NULL-terminated
173   // array of addresses to flush.
FastFlushList(void ** vaddrs)174   inline static void FastFlushList(void **vaddrs) {
175 #ifdef STRESSAPPTEST_CPU_PPC
176     while (*vaddrs) {
177       asm volatile("dcbf 0,%0" : : "r" (*vaddrs++));
178     }
179     asm volatile("sync");
180 #elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
181     // Put mfence before and after clflush to make sure:
182     // 1. The write before the clflush is committed to memory bus;
183     // 2. The read after the clflush is hitting the memory bus.
184     //
185     // From Intel manual:
186     // CLFLUSH is only ordered by the MFENCE instruction. It is not guaranteed
187     // to be ordered by any other fencing, serializing or other CLFLUSH
188     // instruction. For example, software can use an MFENCE instruction to
189     // insure that previous stores are included in the write-back.
190     asm volatile("mfence");
191     while (*vaddrs) {
192       asm volatile("clflush (%0)" : : "r" (*vaddrs++));
193     }
194     asm volatile("mfence");
195 #elif defined(STRESSAPPTEST_CPU_ARMV7A) || defined(STRESSAPPTEST_CPU_AARCH64)
196     while (*vaddrs) {
197       FastFlush(*vaddrs++);
198     }
199 #else
200     #warning "Unsupported CPU type: Unable to force cache flushes."
201 #endif
202   }
203 
204   // Fast flush hint, for use in performance critical code.
205   // This is bound at compile time, and will not pick up
206   // any runtime machine configuration info.  Note that this
207   // will not guarantee that a flush happens, but will at least
208   // hint that it should.  This is useful for speeding up
209   // parallel march algorithms.
FastFlushHint(void * vaddr)210   inline static void FastFlushHint(void *vaddr) {
211 #ifdef STRESSAPPTEST_CPU_PPC
212     asm volatile("dcbf 0,%0" : : "r" (vaddr));
213 #elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
214     // From Intel manual:
215     // CLFLUSH is only ordered by the MFENCE instruction. It is not guaranteed
216     // to be ordered by any other fencing, serializing or other CLFLUSH
217     // instruction. For example, software can use an MFENCE instruction to
218     // insure that previous stores are included in the write-back.
219     asm volatile("clflush (%0)" : : "r" (vaddr));
220 #elif defined(STRESSAPPTEST_CPU_ARMV7A) || defined(STRESSAPPTEST_CPU_AARCH64)
221     FastFlush(vaddr);
222 #else
223     #warning "Unsupported CPU type: Unable to force cache flushes."
224 #endif
225   }
226 
227   // Fast flush, for use in performance critical code.
228   // This is bound at compile time, and will not pick up
229   // any runtime machine configuration info.  Sync's any
230   // transactions for ordering FastFlushHints.
FastFlushSync()231   inline static void FastFlushSync() {
232 #ifdef STRESSAPPTEST_CPU_PPC
233     asm volatile("sync");
234 #elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
235     // Put mfence before and after clflush to make sure:
236     // 1. The write before the clflush is committed to memory bus;
237     // 2. The read after the clflush is hitting the memory bus.
238     //
239     // From Intel manual:
240     // CLFLUSH is only ordered by the MFENCE instruction. It is not guaranteed
241     // to be ordered by any other fencing, serializing or other CLFLUSH
242     // instruction. For example, software can use an MFENCE instruction to
243     // insure that previous stores are included in the write-back.
244     asm volatile("mfence");
245 #elif defined(STRESSAPPTEST_CPU_ARMV7A) || defined(STRESSAPPTEST_CPU_AARCH64)
246     // This is a NOP, FastFlushHint() always does a full flush, so there's
247     // nothing to do for FastFlushSync().
248 #else
249   #warning "Unsupported CPU type: Unable to force cache flushes."
250 #endif
251   }
252 
253   // Get time in cpu timer ticks. Useful for matching MCEs with software
254   // actions.
GetTimestamp(void)255   inline static uint64 GetTimestamp(void) {
256     uint64 tsc;
257 #ifdef STRESSAPPTEST_CPU_PPC
258     uint32 tbl, tbu, temp;
259     __asm __volatile(
260       "1:\n"
261       "mftbu  %2\n"
262       "mftb   %0\n"
263       "mftbu  %1\n"
264       "cmpw   %2,%1\n"
265       "bne    1b\n"
266       : "=r"(tbl), "=r"(tbu), "=r"(temp)
267       :
268       : "cc");
269 
270     tsc = (static_cast<uint64>(tbu) << 32) | static_cast<uint64>(tbl);
271 #elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
272     datacast_t data;
273     __asm __volatile("rdtsc" : "=a" (data.l32.l), "=d"(data.l32.h));
274     tsc = data.l64;
275 #elif defined(STRESSAPPTEST_CPU_ARMV7A)
276     #warning "Unsupported CPU type ARMV7A: your timer may not function correctly"
277     tsc = 0;
278 #elif defined(STRESSAPPTEST_CPU_AARCH64)
279     __asm __volatile("mrs %0, CNTVCT_EL0" : "=r" (tsc) : : );
280 #else
281     #warning "Unsupported CPU type: your timer may not function correctly"
282     tsc = 0;
283 #endif
284     return (tsc);
285   }
286 
287   // Find the free memory on the machine.
288   virtual int64 FindFreeMemSize();
289 
290   // Allocates test memory of length bytes.
291   // Subclasses must implement this.
292   // Call PepareTestMem to get a pointer.
293   virtual int64 AllocateAllMem();  // Returns length.
294   // Returns success.
295   virtual bool AllocateTestMem(int64 length, uint64 paddr_base);
296   virtual void FreeTestMem();
297 
298   // Prepares the memory for use. You must call this
299   // before using test memory, and after you are done.
300   virtual void *PrepareTestMem(uint64 offset, uint64 length);
301   virtual void ReleaseTestMem(void *addr, uint64 offset, uint64 length);
302 
303   // Machine type detected. Can we implement all these functions correctly?
304   // Returns true if machine type is detected and implemented.
305   virtual bool IsSupported();
306 
307   // Returns 32 for 32-bit, 64 for 64-bit.
308   virtual int AddressMode();
309   // Update OsLayer state regarding cpu support for various features.
310   virtual void GetFeatures();
311 
312   // Open, read, write pci cfg through /proc/bus/pci. fd is /proc/pci file.
313   virtual int PciOpen(int bus, int device, int function);
314   virtual void PciWrite(int fd, uint32 offset, uint32 value, int width);
315   virtual uint32 PciRead(int fd, uint32 offset, int width);
316 
317   // Read MSRs
318   virtual bool ReadMSR(uint32 core, uint32 address, uint64 *data);
319   virtual bool WriteMSR(uint32 core, uint32 address, uint64 *data);
320 
321   // Extract bits [n+len-1, n] from a 32 bit word.
322   // so GetBitField(0x0f00, 8, 4) == 0xf.
323   virtual uint32 GetBitField(uint32 val, uint32 n, uint32 len);
324 
325   // Platform and CPU specific CPU-stressing function.
326   // Returns true on success, false otherwise.
327   virtual bool CpuStressWorkload();
328 
329   // Causes false errors for unittesting.
330   // Setting to "true" causes errors to be injected.
set_error_injection(bool errors)331   void set_error_injection(bool errors) { error_injection_ = errors; }
error_injection()332   bool error_injection() const { return error_injection_; }
333 
334   // Is SAT using normal malloc'd memory, or exotic mmap'd memory.
normal_mem()335   bool normal_mem() const { return normal_mem_; }
336 
337   // Get numa config, if available..
num_nodes()338   int num_nodes() const { return num_nodes_; }
num_cpus()339   int num_cpus() const { return num_cpus_; }
340 
341   // Handle to platform-specific error diagnoser.
342   ErrorDiag *error_diagnoser_;
343 
344   // Disambiguate between different "warm" memcopies.
345   virtual bool AdlerMemcpyWarm(uint64 *dstmem, uint64 *srcmem,
346                                unsigned int size_in_bytes,
347                                AdlerChecksum *checksum);
348 
349   // Store a callback to use to print
350   // app-specific info about the last error location.
351   // This call back is called with a physical address, and the app can fill in
352   // the most recent transaction that occurred at that address.
353   typedef bool (*ErrCallback)(uint64 paddr, string *buf);
set_err_log_callback(ErrCallback err_log_callback)354   void set_err_log_callback(
355     ErrCallback err_log_callback) {
356     err_log_callback_ = err_log_callback;
357   }
get_err_log_callback()358   ErrCallback get_err_log_callback() { return err_log_callback_; }
359 
360   // Set a clock object that can be overridden for use with unit tests.
SetClock(Clock * clock)361   void SetClock(Clock *clock) {
362     if (clock_) {
363       delete clock_;
364     }
365     clock_ = clock;
366     time_initialized_ = clock_->Now();
367   }
368 
369  protected:
370   void *testmem_;                // Location of test memory.
371   uint64 testmemsize_;           // Size of test memory.
372   int64 totalmemsize_;           // Size of available memory.
373   int64 min_hugepages_bytes_;    // Minimum hugepages size.
374   int64 reserve_mb_;             // Minimum amount of memory to reserve in MB.
375   bool  error_injection_;        // Do error injection?
376   bool  normal_mem_;             // Memory DMA capable?
377   bool  use_hugepages_;          // Use hugepage shmem?
378   bool  use_posix_shm_;          // Use 4k page shmem?
379   bool  dynamic_mapped_shmem_;   // Conserve virtual address space.
380   bool  mmapped_allocation_;     // Was memory allocated using mmap()?
381   int   shmid_;                  // Handle to shmem
382   vector< vector<string> > *channels_;  // Memory module names per channel.
383   uint64 channel_hash_;          // Mask of address bits XORed for channel.
384   int channel_width_;            // Channel width in bits.
385 
386   int64 regionsize_;             // Size of memory "regions"
387   int   regioncount_;            // Number of memory "regions"
388   int   num_cpus_;               // Number of cpus in the system.
389   int   num_nodes_;              // Number of nodes in the system.
390   int   num_cpus_per_node_;      // Number of cpus per node in the system.
391   int   address_mode_;           // Are we running 32 or 64 bit?
392   bool  has_vector_;             // Do we have sse2/neon instructions?
393   bool  has_clflush_;            // Do we have clflush instructions?
394   bool  use_flush_page_cache_;   // Do we need to flush the page cache?
395 
396 
397   time_t time_initialized_;      // Start time of test.
398 
399   vector<cpu_set_t> cpu_sets_;   // Cache for cpu masks.
400   vector<bool> cpu_sets_valid_;  // If the cpu mask cache is valid.
401 
402   // Get file descriptor for dev msr.
403   virtual int OpenMSR(uint32 core, uint32 address);
404 
405   // Look up how many hugepages there are.
406   virtual int64 FindHugePages();
407 
408   // Link to find last transaction at an error location.
409   ErrCallback err_log_callback_;
410 
411   // Object to wrap the time function.
412   Clock *clock_;
413 
414  private:
415   DISALLOW_COPY_AND_ASSIGN(OsLayer);
416 };
417 
418 // Selects and returns the proper OS and hardware interface.  Does not call
419 // OsLayer::Initialize() on the new object.
420 OsLayer *OsLayerFactory(const std::map<std::string, std::string> &options);
421 
422 #endif  // STRESSAPPTEST_OS_H_ NOLINT
423