1 // Copyright 2006 Google Inc. All Rights Reserved.
2 
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 // sat.cc : a stress test for stressful testing
16 
17 // stressapptest (or SAT, from Stressful Application Test) is a test
18 // designed to stress the system, as well as provide a comprehensive
19 // memory interface test.
20 
21 // stressapptest can be run using memory only, or using many system components.
22 
23 #include <errno.h>
24 #include <pthread.h>
25 #include <signal.h>
26 #include <stdarg.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <unistd.h>
31 
32 #include <sys/stat.h>
33 #include <sys/times.h>
34 
35 // #define __USE_GNU
36 // #define __USE_LARGEFILE64
37 #include <fcntl.h>
38 
39 #include <list>
40 #include <string>
41 
42 // This file must work with autoconf on its public version,
43 // so these includes are correct.
44 #include "disk_blocks.h"
45 #include "logger.h"
46 #include "os.h"
47 #include "sat.h"
48 #include "sattypes.h"
49 #include "worker.h"
50 
51 // stressapptest versioning here.
52 #ifndef PACKAGE_VERSION
53 static const char* kVersion = "1.0.0";
54 #else
55 static const char* kVersion = PACKAGE_VERSION;
56 #endif
57 
58 // Global stressapptest reference, for use by signal handler.
59 // This makes Sat objects not safe for multiple instances.
60 namespace {
61   Sat *g_sat = NULL;
62 
63   // Signal handler for catching break or kill.
64   //
65   // This must be installed after g_sat is assigned and while there is a single
66   // thread.
67   //
68   // This must be uninstalled while there is only a single thread, and of course
69   // before g_sat is cleared or deleted.
SatHandleBreak(int signal)70   void SatHandleBreak(int signal) {
71     g_sat->Break();
72   }
73 }
74 
75 // Opens the logfile for writing if necessary
InitializeLogfile()76 bool Sat::InitializeLogfile() {
77   // Open logfile.
78   if (use_logfile_) {
79     logfile_ = open(logfilename_,
80 #if defined(O_DSYNC)
81                     O_DSYNC |
82 #elif defined(O_SYNC)
83                     O_SYNC |
84 #elif defined(O_FSYNC)
85                     O_FSYNC |
86 #endif
87                     O_WRONLY | O_CREAT,
88                     S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
89     if (logfile_ < 0) {
90       printf("Fatal Error: cannot open file %s for logging\n",
91              logfilename_);
92       bad_status();
93       return false;
94     }
95     // We seek to the end once instead of opening in append mode because no
96     // other processes should be writing to it while this one exists.
97     if (lseek(logfile_, 0, SEEK_END) == -1) {
98       printf("Fatal Error: cannot seek to end of logfile (%s)\n",
99              logfilename_);
100       bad_status();
101       return false;
102     }
103     Logger::GlobalLogger()->SetLogFd(logfile_);
104   }
105   return true;
106 }
107 
108 // Check that the environment is known and safe to run on.
109 // Return 1 if good, 0 if unsuppported.
CheckEnvironment()110 bool Sat::CheckEnvironment() {
111   // Check that this is not a debug build. Debug builds lack
112   // enough performance to stress the system.
113 #if !defined NDEBUG
114   if (run_on_anything_) {
115     logprintf(1, "Log: Running DEBUG version of SAT, "
116                  "with significantly reduced coverage.\n");
117   } else {
118     logprintf(0, "Process Error: Running DEBUG version of SAT, "
119                  "with significantly reduced coverage.\n");
120     logprintf(0, "Log: Command line option '-A' bypasses this error.\n");
121     bad_status();
122     return false;
123   }
124 #elif !defined CHECKOPTS
125   #error Build system regression - COPTS disregarded.
126 #endif
127 
128   // Check if the cpu frequency test is enabled and able to run.
129   if (cpu_freq_test_) {
130     if (!CpuFreqThread::CanRun()) {
131       logprintf(0, "Process Error: This platform does not support this "
132                 "test.\n");
133       bad_status();
134       return false;
135     } else if (cpu_freq_threshold_ <= 0) {
136       logprintf(0, "Process Error: The cpu frequency test requires "
137                 "--cpu_freq_threshold set to a value > 0\n");
138       bad_status();
139       return false;
140     } else if (cpu_freq_round_ < 0) {
141       logprintf(0, "Process Error: The --cpu_freq_round option must be greater"
142                 " than or equal to zero. A value of zero means no rounding.\n");
143       bad_status();
144       return false;
145     }
146   }
147 
148   // Use all CPUs if nothing is specified.
149   if (memory_threads_ == -1) {
150     memory_threads_ = os_->num_cpus();
151     logprintf(7, "Log: Defaulting to %d copy threads\n", memory_threads_);
152   }
153 
154   // Use all memory if no size is specified.
155   if (size_mb_ == 0)
156     size_mb_ = os_->FindFreeMemSize() / kMegabyte;
157   size_ = static_cast<int64>(size_mb_) * kMegabyte;
158 
159   // Autodetect file locations.
160   if (findfiles_ && (file_threads_ == 0)) {
161     // Get a space separated sting of disk locations.
162     list<string> locations = os_->FindFileDevices();
163 
164     // Extract each one.
165     while (!locations.empty()) {
166       // Copy and remove the disk name.
167       string disk = locations.back();
168       locations.pop_back();
169 
170       logprintf(12, "Log: disk at %s\n", disk.c_str());
171       file_threads_++;
172       filename_.push_back(disk + "/sat_disk.a");
173       file_threads_++;
174       filename_.push_back(disk + "/sat_disk.b");
175     }
176   }
177 
178   // We'd better have some memory by this point.
179   if (size_ < 1) {
180     logprintf(0, "Process Error: No memory found to test.\n");
181     bad_status();
182     return false;
183   }
184 
185   if (tag_mode_ && ((file_threads_ > 0) ||
186                     (disk_threads_ > 0) ||
187                     (net_threads_ > 0))) {
188     logprintf(0, "Process Error: Memory tag mode incompatible "
189                  "with disk/network DMA.\n");
190     bad_status();
191     return false;
192   }
193 
194   // If platform is 32 bit Xeon, floor memory size to multiple of 4.
195   if (address_mode_ == 32) {
196     size_mb_ = (size_mb_ / 4) * 4;
197     size_ = size_mb_ * kMegabyte;
198     logprintf(1, "Log: Flooring memory allocation to multiple of 4: %lldMB\n",
199               size_mb_);
200   }
201 
202   // Check if this system is on the whitelist for supported systems.
203   if (!os_->IsSupported()) {
204     if (run_on_anything_) {
205       logprintf(1, "Log: Unsupported system. Running with reduced coverage.\n");
206       // This is ok, continue on.
207     } else {
208       logprintf(0, "Process Error: Unsupported system, "
209                    "no error reporting available\n");
210       logprintf(0, "Log: Command line option '-A' bypasses this error.\n");
211       bad_status();
212       return false;
213     }
214   }
215 
216   return true;
217 }
218 
219 // Allocates memory to run the test on
AllocateMemory()220 bool Sat::AllocateMemory() {
221   // Allocate our test memory.
222   bool result = os_->AllocateTestMem(size_, paddr_base_);
223   if (!result) {
224     logprintf(0, "Process Error: failed to allocate memory\n");
225     bad_status();
226     return false;
227   }
228   return true;
229 }
230 
231 // Sets up access to data patterns
InitializePatterns()232 bool Sat::InitializePatterns() {
233   // Initialize pattern data.
234   patternlist_ = new PatternList();
235   if (!patternlist_) {
236     logprintf(0, "Process Error: failed to allocate patterns\n");
237     bad_status();
238     return false;
239   }
240   if (!patternlist_->Initialize()) {
241     logprintf(0, "Process Error: failed to initialize patternlist\n");
242     bad_status();
243     return false;
244   }
245   return true;
246 }
247 
248 // Get any valid page, no tag specified.
GetValid(struct page_entry * pe)249 bool Sat::GetValid(struct page_entry *pe) {
250   return GetValid(pe, kDontCareTag);
251 }
252 
253 
254 // Fetch and return empty and full pages into the empty and full pools.
GetValid(struct page_entry * pe,int32 tag)255 bool Sat::GetValid(struct page_entry *pe, int32 tag) {
256   bool result = false;
257   // Get valid page depending on implementation.
258   if (pe_q_implementation_ == SAT_FINELOCK)
259     result = finelock_q_->GetValid(pe, tag);
260   else if (pe_q_implementation_ == SAT_ONELOCK)
261     result = valid_->PopRandom(pe);
262 
263   if (result) {
264     pe->addr = os_->PrepareTestMem(pe->offset, page_length_);  // Map it.
265 
266     // Tag this access and current pattern.
267     pe->ts = os_->GetTimestamp();
268     pe->lastpattern = pe->pattern;
269 
270     return (pe->addr != 0);     // Return success or failure.
271   }
272   return false;
273 }
274 
PutValid(struct page_entry * pe)275 bool Sat::PutValid(struct page_entry *pe) {
276   if (pe->addr != 0)
277     os_->ReleaseTestMem(pe->addr, pe->offset, page_length_);  // Unmap the page.
278   pe->addr = 0;
279 
280   // Put valid page depending on implementation.
281   if (pe_q_implementation_ == SAT_FINELOCK)
282     return finelock_q_->PutValid(pe);
283   else if (pe_q_implementation_ == SAT_ONELOCK)
284     return valid_->Push(pe);
285   else
286     return false;
287 }
288 
289 // Get an empty page with any tag.
GetEmpty(struct page_entry * pe)290 bool Sat::GetEmpty(struct page_entry *pe) {
291   return GetEmpty(pe, kDontCareTag);
292 }
293 
GetEmpty(struct page_entry * pe,int32 tag)294 bool Sat::GetEmpty(struct page_entry *pe, int32 tag) {
295   bool result = false;
296   // Get empty page depending on implementation.
297   if (pe_q_implementation_ == SAT_FINELOCK)
298     result = finelock_q_->GetEmpty(pe, tag);
299   else if (pe_q_implementation_ == SAT_ONELOCK)
300     result = empty_->PopRandom(pe);
301 
302   if (result) {
303     pe->addr = os_->PrepareTestMem(pe->offset, page_length_);  // Map it.
304     return (pe->addr != 0);     // Return success or failure.
305   }
306   return false;
307 }
308 
PutEmpty(struct page_entry * pe)309 bool Sat::PutEmpty(struct page_entry *pe) {
310   if (pe->addr != 0)
311     os_->ReleaseTestMem(pe->addr, pe->offset, page_length_);  // Unmap the page.
312   pe->addr = 0;
313 
314   // Put empty page depending on implementation.
315   if (pe_q_implementation_ == SAT_FINELOCK)
316     return finelock_q_->PutEmpty(pe);
317   else if (pe_q_implementation_ == SAT_ONELOCK)
318     return empty_->Push(pe);
319   else
320     return false;
321 }
322 
323 // Set up the bitmap of physical pages in case we want to see which pages were
324 // accessed under this run of SAT.
AddrMapInit()325 void Sat::AddrMapInit() {
326   if (!do_page_map_)
327     return;
328   // Find about how much physical mem is in the system.
329   // TODO(nsanders): Find some way to get the max
330   // and min phys addr in the system.
331   uint64 maxsize = os_->FindFreeMemSize() * 4;
332   sat_assert(maxsize != 0);
333 
334   // Make a bitmask of this many pages. Assume that the memory is relatively
335   // zero based. This is true on x86, typically.
336   // This is one bit per page.
337   uint64 arraysize = maxsize / 4096 / 8;
338   unsigned char *bitmap = new unsigned char[arraysize];
339   sat_assert(bitmap);
340 
341   // Mark every page as 0, not seen.
342   memset(bitmap, 0, arraysize);
343 
344   page_bitmap_size_ = maxsize;
345   page_bitmap_ = bitmap;
346 }
347 
348 // Add the 4k pages in this block to the array of pages SAT has seen.
AddrMapUpdate(struct page_entry * pe)349 void Sat::AddrMapUpdate(struct page_entry *pe) {
350   if (!do_page_map_)
351     return;
352 
353   // Go through 4k page blocks.
354   uint64 arraysize = page_bitmap_size_ / 4096 / 8;
355 
356   char *base = reinterpret_cast<char*>(pe->addr);
357   for (int i = 0; i < page_length_; i += 4096) {
358     uint64 paddr = os_->VirtualToPhysical(base + i);
359 
360     uint32 offset = paddr / 4096 / 8;
361     unsigned char mask = 1 << ((paddr / 4096) % 8);
362 
363     if (offset >= arraysize) {
364       logprintf(0, "Process Error: Physical address %#llx is "
365                    "greater than expected %#llx.\n",
366                 paddr, page_bitmap_size_);
367       sat_assert(0);
368     }
369     page_bitmap_[offset] |= mask;
370   }
371 }
372 
373 // Print out the physical memory ranges that SAT has accessed.
AddrMapPrint()374 void Sat::AddrMapPrint() {
375   if (!do_page_map_)
376     return;
377 
378   uint64 pages = page_bitmap_size_ / 4096;
379 
380   uint64 last_page = 0;
381   bool valid_range = false;
382 
383   logprintf(4, "Log: Printing tested physical ranges.\n");
384 
385   for (uint64 i = 0; i < pages; i ++) {
386     int offset = i / 8;
387     unsigned char mask = 1 << (i % 8);
388 
389     bool touched = page_bitmap_[offset] & mask;
390     if (touched && !valid_range) {
391       valid_range = true;
392       last_page = i * 4096;
393     } else if (!touched && valid_range) {
394       valid_range = false;
395       logprintf(4, "Log: %#016llx - %#016llx\n", last_page, (i * 4096) - 1);
396     }
397   }
398   logprintf(4, "Log: Done printing physical ranges.\n");
399 }
400 
401 // Initializes page lists and fills pages with data patterns.
InitializePages()402 bool Sat::InitializePages() {
403   int result = 1;
404   // Calculate needed page totals.
405   int64 neededpages = memory_threads_ +
406     invert_threads_ +
407     check_threads_ +
408     net_threads_ +
409     file_threads_;
410 
411   // Empty-valid page ratio is adjusted depending on queue implementation.
412   // since fine-grain-locked queue keeps both valid and empty entries in the
413   // same queue and randomly traverse to find pages, the empty-valid ratio
414   // should be more even.
415   if (pe_q_implementation_ == SAT_FINELOCK)
416     freepages_ = pages_ / 5 * 2;  // Mark roughly 2/5 of all pages as Empty.
417   else
418     freepages_ = (pages_ / 100) + (2 * neededpages);
419 
420   if (freepages_ < neededpages) {
421     logprintf(0, "Process Error: freepages < neededpages.\n");
422     logprintf(1, "Stats: Total: %lld, Needed: %lld, Marked free: %lld\n",
423               static_cast<int64>(pages_),
424               static_cast<int64>(neededpages),
425               static_cast<int64>(freepages_));
426     bad_status();
427     return false;
428   }
429 
430   if (freepages_ >  pages_/2) {
431     logprintf(0, "Process Error: not enough pages for IO\n");
432     logprintf(1, "Stats: Total: %lld, Needed: %lld, Available: %lld\n",
433               static_cast<int64>(pages_),
434               static_cast<int64>(freepages_),
435               static_cast<int64>(pages_/2));
436     bad_status();
437     return false;
438   }
439   logprintf(12, "Log: Allocating pages, Total: %lld Free: %lld\n",
440             pages_,
441             freepages_);
442 
443   // Initialize page locations.
444   for (int64 i = 0; i < pages_; i++) {
445     struct page_entry pe;
446     init_pe(&pe);
447     pe.offset = i * page_length_;
448     result &= PutEmpty(&pe);
449   }
450 
451   if (!result) {
452     logprintf(0, "Process Error: while initializing empty_ list\n");
453     bad_status();
454     return false;
455   }
456 
457   // Fill valid pages with test patterns.
458   // Use fill threads to do this.
459   WorkerStatus fill_status;
460   WorkerVector fill_vector;
461 
462   logprintf(12, "Starting Fill threads: %d threads, %d pages\n",
463             fill_threads_, pages_);
464   // Initialize the fill threads.
465   for (int i = 0; i < fill_threads_; i++) {
466     FillThread *thread = new FillThread();
467     thread->InitThread(i, this, os_, patternlist_, &fill_status);
468     if (i != fill_threads_ - 1) {
469         logprintf(12, "Starting Fill Threads %d: %d pages\n",
470                   i, pages_ / fill_threads_);
471         thread->SetFillPages(pages_ / fill_threads_);
472       // The last thread finishes up all the leftover pages.
473     } else {
474       logprintf(12, "Starting Fill Threads %d: %d pages\n",
475                 i, pages_ - pages_ / fill_threads_ * i);
476         thread->SetFillPages(pages_ - pages_ / fill_threads_ * i);
477     }
478     fill_vector.push_back(thread);
479   }
480 
481   // Spawn the fill threads.
482   fill_status.Initialize();
483   for (WorkerVector::const_iterator it = fill_vector.begin();
484        it != fill_vector.end(); ++it)
485     (*it)->SpawnThread();
486 
487   // Reap the finished fill threads.
488   for (WorkerVector::const_iterator it = fill_vector.begin();
489        it != fill_vector.end(); ++it) {
490     (*it)->JoinThread();
491     if ((*it)->GetStatus() != 1) {
492       logprintf(0, "Thread %d failed with status %d at %.2f seconds\n",
493                 (*it)->ThreadID(), (*it)->GetStatus(),
494                 (*it)->GetRunDurationUSec() * 1.0/1000000);
495       bad_status();
496       return false;
497     }
498     delete (*it);
499   }
500   fill_vector.clear();
501   fill_status.Destroy();
502   logprintf(12, "Log: Done filling pages.\n");
503   logprintf(12, "Log: Allocating pages.\n");
504 
505   AddrMapInit();
506 
507   // Initialize page locations.
508   for (int64 i = 0; i < pages_; i++) {
509     struct page_entry pe;
510     // Only get valid pages with uninitialized tags here.
511     if (GetValid(&pe, kInvalidTag)) {
512       int64 paddr = os_->VirtualToPhysical(pe.addr);
513       int32 region = os_->FindRegion(paddr);
514       region_[region]++;
515       pe.paddr = paddr;
516       pe.tag = 1 << region;
517       region_mask_ |= pe.tag;
518 
519       // Generate a physical region map
520       AddrMapUpdate(&pe);
521 
522       // Note: this does not allocate free pages among all regions
523       // fairly. However, with large enough (thousands) random number
524       // of pages being marked free in each region, the free pages
525       // count in each region end up pretty balanced.
526       if (i < freepages_) {
527         result &= PutEmpty(&pe);
528       } else {
529         result &= PutValid(&pe);
530       }
531     } else {
532       logprintf(0, "Log: didn't tag all pages. %d - %d = %d\n",
533                 pages_, i, pages_ - i);
534       return false;
535     }
536   }
537   logprintf(12, "Log: Done allocating pages.\n");
538 
539   AddrMapPrint();
540 
541   for (int i = 0; i < 32; i++) {
542     if (region_mask_ & (1 << i)) {
543       region_count_++;
544       logprintf(12, "Log: Region %d: %d.\n", i, region_[i]);
545     }
546   }
547   logprintf(5, "Log: Region mask: 0x%x\n", region_mask_);
548 
549   return true;
550 }
551 
552 // Print SAT version info.
PrintVersion()553 bool Sat::PrintVersion() {
554   logprintf(1, "Stats: SAT revision %s, %d bit binary\n",
555             kVersion, address_mode_);
556   logprintf(5, "Log: %s from %s\n", Timestamp(), BuildChangelist());
557 
558   return true;
559 }
560 
561 
562 // Initializes the resources that SAT needs to run.
563 // This needs to be called before Run(), and after ParseArgs().
564 // Returns true on success, false on error, and will exit() on help message.
Initialize()565 bool Sat::Initialize() {
566   g_sat = this;
567 
568   // Initializes sync'd log file to ensure output is saved.
569   if (!InitializeLogfile())
570     return false;
571   Logger::GlobalLogger()->SetTimestampLogging(log_timestamps_);
572   Logger::GlobalLogger()->StartThread();
573 
574   logprintf(5, "Log: Commandline - %s\n", cmdline_.c_str());
575   PrintVersion();
576 
577   std::map<std::string, std::string> options;
578 
579   GoogleOsOptions(&options);
580 
581   // Initialize OS/Hardware interface.
582   os_ = OsLayerFactory(options);
583   if (!os_) {
584     bad_status();
585     return false;
586   }
587 
588   if (min_hugepages_mbytes_ > 0)
589     os_->SetMinimumHugepagesSize(min_hugepages_mbytes_ * kMegabyte);
590 
591   if (reserve_mb_ > 0)
592     os_->SetReserveSize(reserve_mb_);
593 
594   if (channels_.size() > 0) {
595     logprintf(6, "Log: Decoding memory: %dx%d bit channels,"
596         "%d modules per channel (x%d), decoding hash 0x%x\n",
597         channels_.size(), channel_width_, channels_[0].size(),
598         channel_width_/channels_[0].size(), channel_hash_);
599     os_->SetDramMappingParams(channel_hash_, channel_width_, &channels_);
600   }
601 
602   if (!os_->Initialize()) {
603     logprintf(0, "Process Error: Failed to initialize OS layer\n");
604     bad_status();
605     delete os_;
606     return false;
607   }
608 
609   // Checks that OS/Build/Platform is supported.
610   if (!CheckEnvironment())
611     return false;
612 
613   if (error_injection_)
614     os_->set_error_injection(true);
615 
616   // Run SAT in monitor only mode, do not continue to allocate resources.
617   if (monitor_mode_) {
618     logprintf(5, "Log: Running in monitor-only mode. "
619                  "Will not allocate any memory nor run any stress test. "
620                  "Only polling ECC errors.\n");
621     return true;
622   }
623 
624   // Allocate the memory to test.
625   if (!AllocateMemory())
626     return false;
627 
628   logprintf(5, "Stats: Starting SAT, %dM, %d seconds\n",
629             static_cast<int>(size_/kMegabyte),
630             runtime_seconds_);
631 
632   if (!InitializePatterns())
633     return false;
634 
635   // Initialize memory allocation.
636   pages_ = size_ / page_length_;
637 
638   // Allocate page queue depending on queue implementation switch.
639   if (pe_q_implementation_ == SAT_FINELOCK) {
640       finelock_q_ = new FineLockPEQueue(pages_, page_length_);
641       if (finelock_q_ == NULL)
642         return false;
643       finelock_q_->set_os(os_);
644       os_->set_err_log_callback(finelock_q_->get_err_log_callback());
645   } else if (pe_q_implementation_ == SAT_ONELOCK) {
646       empty_ = new PageEntryQueue(pages_);
647       valid_ = new PageEntryQueue(pages_);
648       if ((empty_ == NULL) || (valid_ == NULL))
649         return false;
650   }
651 
652   if (!InitializePages()) {
653     logprintf(0, "Process Error: Initialize Pages failed\n");
654     return false;
655   }
656 
657   return true;
658 }
659 
660 // Constructor and destructor.
Sat()661 Sat::Sat() {
662   // Set defaults, command line might override these.
663   runtime_seconds_ = 20;
664   page_length_ = kSatPageSize;
665   disk_pages_ = kSatDiskPage;
666   pages_ = 0;
667   size_mb_ = 0;
668   size_ = size_mb_ * kMegabyte;
669   reserve_mb_ = 0;
670   min_hugepages_mbytes_ = 0;
671   freepages_ = 0;
672   paddr_base_ = 0;
673   channel_hash_ = kCacheLineSize;
674   channel_width_ = 64;
675 
676   user_break_ = false;
677   verbosity_ = 8;
678   Logger::GlobalLogger()->SetVerbosity(verbosity_);
679   print_delay_ = 10;
680   strict_ = 1;
681   warm_ = 0;
682   run_on_anything_ = 0;
683   use_logfile_ = false;
684   logfile_ = 0;
685   log_timestamps_ = true;
686   // Detect 32/64 bit binary.
687   void *pvoid = 0;
688   address_mode_ = sizeof(pvoid) * 8;
689   error_injection_ = false;
690   crazy_error_injection_ = false;
691   max_errorcount_ = 0;  // Zero means no early exit.
692   stop_on_error_ = false;
693   error_poll_ = true;
694   findfiles_ = false;
695 
696   do_page_map_ = false;
697   page_bitmap_ = 0;
698   page_bitmap_size_ = 0;
699 
700   // Cache coherency data initialization.
701   cc_test_ = false;         // Flag to trigger cc threads.
702   cc_cacheline_count_ = 2;  // Two datastructures of cache line size.
703   cc_cacheline_size_ = 0;   // Size of a cacheline (0 for auto-detect).
704   cc_inc_count_ = 1000;     // Number of times to increment the shared variable.
705   cc_cacheline_data_ = 0;   // Cache Line size datastructure.
706 
707   // Cpu frequency data initialization.
708   cpu_freq_test_ = false;   // Flag to trigger cpu frequency thread.
709   cpu_freq_threshold_ = 0;  // Threshold, in MHz, at which a cpu fails.
710   cpu_freq_round_ = 10;     // Round the computed frequency to this value.
711 
712   sat_assert(0 == pthread_mutex_init(&worker_lock_, NULL));
713   file_threads_ = 0;
714   net_threads_ = 0;
715   listen_threads_ = 0;
716   // Default to autodetect number of cpus, and run that many threads.
717   memory_threads_ = -1;
718   invert_threads_ = 0;
719   fill_threads_ = 8;
720   check_threads_ = 0;
721   cpu_stress_threads_ = 0;
722   disk_threads_ = 0;
723   total_threads_ = 0;
724 
725   region_mask_ = 0;
726   region_count_ = 0;
727   for (int i = 0; i < 32; i++) {
728     region_[i] = 0;
729   }
730   region_mode_ = 0;
731 
732   errorcount_ = 0;
733   statuscount_ = 0;
734 
735   valid_ = 0;
736   empty_ = 0;
737   finelock_q_ = 0;
738   // Default to use fine-grain lock for better performance.
739   pe_q_implementation_ = SAT_FINELOCK;
740 
741   os_ = 0;
742   patternlist_ = 0;
743   logfilename_[0] = 0;
744 
745   read_block_size_ = 512;
746   write_block_size_ = -1;
747   segment_size_ = -1;
748   cache_size_ = -1;
749   blocks_per_segment_ = -1;
750   read_threshold_ = -1;
751   write_threshold_ = -1;
752   non_destructive_ = 1;
753   monitor_mode_ = 0;
754   tag_mode_ = 0;
755   random_threads_ = 0;
756 
757   pause_delay_ = 600;
758   pause_duration_ = 15;
759 }
760 
761 // Destructor.
~Sat()762 Sat::~Sat() {
763   // We need to have called Cleanup() at this point.
764   // We should probably enforce this.
765 }
766 
767 
768 #define ARG_KVALUE(argument, variable, value)         \
769   if (!strcmp(argv[i], argument)) {                   \
770     variable = value;                                 \
771     continue;                                         \
772   }
773 
774 #define ARG_IVALUE(argument, variable)                \
775   if (!strcmp(argv[i], argument)) {                   \
776     i++;                                              \
777     if (i < argc)                                     \
778       variable = strtoull(argv[i], NULL, 0);          \
779     continue;                                         \
780   }
781 
782 #define ARG_SVALUE(argument, variable)                     \
783   if (!strcmp(argv[i], argument)) {                        \
784     i++;                                                   \
785     if (i < argc)                                          \
786       snprintf(variable, sizeof(variable), "%s", argv[i]); \
787     continue;                                              \
788   }
789 
790 // Configures SAT from command line arguments.
791 // This will call exit() given a request for
792 // self-documentation or unexpected args.
ParseArgs(int argc,char ** argv)793 bool Sat::ParseArgs(int argc, char **argv) {
794   int i;
795   uint64 filesize = page_length_ * disk_pages_;
796 
797   // Parse each argument.
798   for (i = 1; i < argc; i++) {
799     // Switch to fall back to corase-grain-lock queue. (for benchmarking)
800     ARG_KVALUE("--coarse_grain_lock", pe_q_implementation_, SAT_ONELOCK);
801 
802     // Set number of megabyte to use.
803     ARG_IVALUE("-M", size_mb_);
804 
805     // Specify the amount of megabytes to be reserved for system.
806     ARG_IVALUE("--reserve_memory", reserve_mb_);
807 
808     // Set minimum megabytes of hugepages to require.
809     ARG_IVALUE("-H", min_hugepages_mbytes_);
810 
811     // Set number of seconds to run.
812     ARG_IVALUE("-s", runtime_seconds_);
813 
814     // Set number of memory copy threads.
815     ARG_IVALUE("-m", memory_threads_);
816 
817     // Set number of memory invert threads.
818     ARG_IVALUE("-i", invert_threads_);
819 
820     // Set number of check-only threads.
821     ARG_IVALUE("-c", check_threads_);
822 
823     // Set number of cache line size datastructures.
824     ARG_IVALUE("--cc_inc_count", cc_inc_count_);
825 
826     // Set number of cache line size datastructures
827     ARG_IVALUE("--cc_line_count", cc_cacheline_count_);
828 
829     // Override the detected or assumed cache line size.
830     ARG_IVALUE("--cc_line_size", cc_cacheline_size_);
831 
832     // Flag set when cache coherency tests need to be run
833     ARG_KVALUE("--cc_test", cc_test_, true);
834 
835     // Set when the cpu_frequency test needs to be run
836     ARG_KVALUE("--cpu_freq_test", cpu_freq_test_, true);
837 
838     // Set the threshold in MHz at which the cpu frequency test will fail.
839     ARG_IVALUE("--cpu_freq_threshold", cpu_freq_threshold_);
840 
841     // Set the rounding value for the cpu frequency test. The default is to
842     // round to the nearest 10s value.
843     ARG_IVALUE("--cpu_freq_round", cpu_freq_round_);
844 
845     // Set number of CPU stress threads.
846     ARG_IVALUE("-C", cpu_stress_threads_);
847 
848     // Set logfile name.
849     ARG_SVALUE("-l", logfilename_);
850 
851     // Verbosity level.
852     ARG_IVALUE("-v", verbosity_);
853 
854     // Chatty printout level.
855     ARG_IVALUE("--printsec", print_delay_);
856 
857     // Turn off timestamps logging.
858     ARG_KVALUE("--no_timestamps", log_timestamps_, false);
859 
860     // Set maximum number of errors to collect. Stop running after this many.
861     ARG_IVALUE("--max_errors", max_errorcount_);
862 
863     // Set pattern block size.
864     ARG_IVALUE("-p", page_length_);
865 
866     // Set pattern block size.
867     ARG_IVALUE("--filesize", filesize);
868 
869     // NUMA options.
870     ARG_KVALUE("--local_numa", region_mode_, kLocalNuma);
871     ARG_KVALUE("--remote_numa", region_mode_, kRemoteNuma);
872 
873     // Autodetect tempfile locations.
874     ARG_KVALUE("--findfiles", findfiles_, 1);
875 
876     // Inject errors to force miscompare code paths
877     ARG_KVALUE("--force_errors", error_injection_, true);
878     ARG_KVALUE("--force_errors_like_crazy", crazy_error_injection_, true);
879     if (crazy_error_injection_)
880       error_injection_ = true;
881 
882     // Stop immediately on any arror, for debugging HW problems.
883     ARG_KVALUE("--stop_on_errors", stop_on_error_, 1);
884 
885     // Don't use internal error polling, allow external detection.
886     ARG_KVALUE("--no_errors", error_poll_, 0);
887 
888     // Never check data as you go.
889     ARG_KVALUE("-F", strict_, 0);
890 
891     // Warm the cpu as you go.
892     ARG_KVALUE("-W", warm_, 1);
893 
894     // Allow runnign on unknown systems with base unimplemented OsLayer
895     ARG_KVALUE("-A", run_on_anything_, 1);
896 
897     // Size of read blocks for disk test.
898     ARG_IVALUE("--read-block-size", read_block_size_);
899 
900     // Size of write blocks for disk test.
901     ARG_IVALUE("--write-block-size", write_block_size_);
902 
903     // Size of segment for disk test.
904     ARG_IVALUE("--segment-size", segment_size_);
905 
906     // Size of disk cache size for disk test.
907     ARG_IVALUE("--cache-size", cache_size_);
908 
909     // Number of blocks to test per segment.
910     ARG_IVALUE("--blocks-per-segment", blocks_per_segment_);
911 
912     // Maximum time a block read should take before warning.
913     ARG_IVALUE("--read-threshold", read_threshold_);
914 
915     // Maximum time a block write should take before warning.
916     ARG_IVALUE("--write-threshold", write_threshold_);
917 
918     // Do not write anything to disk in the disk test.
919     ARG_KVALUE("--destructive", non_destructive_, 0);
920 
921     // Run SAT in monitor mode. No test load at all.
922     ARG_KVALUE("--monitor_mode", monitor_mode_, true);
923 
924     // Run SAT in address mode. Tag all cachelines by virt addr.
925     ARG_KVALUE("--tag_mode", tag_mode_, true);
926 
927     // Dump range map of tested pages..
928     ARG_KVALUE("--do_page_map", do_page_map_, true);
929 
930     // Specify the physical address base to test.
931     ARG_IVALUE("--paddr_base", paddr_base_);
932 
933     // Specify the frequency for power spikes.
934     ARG_IVALUE("--pause_delay", pause_delay_);
935 
936     // Specify the duration of each pause (for power spikes).
937     ARG_IVALUE("--pause_duration", pause_duration_);
938 
939     // Disk device names
940     if (!strcmp(argv[i], "-d")) {
941       i++;
942       if (i < argc) {
943         disk_threads_++;
944         diskfilename_.push_back(string(argv[i]));
945         blocktables_.push_back(new DiskBlockTable());
946       }
947       continue;
948     }
949 
950     // Set number of disk random threads for each disk write thread.
951     ARG_IVALUE("--random-threads", random_threads_);
952 
953     // Set a tempfile to use in a file thread.
954     if (!strcmp(argv[i], "-f")) {
955       i++;
956       if (i < argc) {
957         file_threads_++;
958         filename_.push_back(string(argv[i]));
959       }
960       continue;
961     }
962 
963     // Set a hostname to use in a network thread.
964     if (!strcmp(argv[i], "-n")) {
965       i++;
966       if (i < argc) {
967         net_threads_++;
968         ipaddrs_.push_back(string(argv[i]));
969       }
970       continue;
971     }
972 
973     // Run threads that listen for incoming SAT net connections.
974     ARG_KVALUE("--listen", listen_threads_, 1);
975 
976     if (CheckGoogleSpecificArgs(argc, argv, &i)) {
977       continue;
978     }
979 
980     ARG_IVALUE("--channel_hash", channel_hash_);
981     ARG_IVALUE("--channel_width", channel_width_);
982 
983     if (!strcmp(argv[i], "--memory_channel")) {
984       i++;
985       if (i < argc) {
986         char *channel = argv[i];
987         channels_.push_back(vector<string>());
988         while (char* next = strchr(channel, ',')) {
989           channels_.back().push_back(string(channel, next - channel));
990           channel = next + 1;
991         }
992         channels_.back().push_back(string(channel));
993       }
994       continue;
995     }
996 
997     // Default:
998     PrintVersion();
999     PrintHelp();
1000     if (strcmp(argv[i], "-h") && strcmp(argv[i], "--help")) {
1001       printf("\n Unknown argument %s\n", argv[i]);
1002       bad_status();
1003       exit(1);
1004     }
1005     // Forget it, we printed the help, just bail.
1006     // We don't want to print test status, or any log parser stuff.
1007     exit(0);
1008   }
1009 
1010   Logger::GlobalLogger()->SetVerbosity(verbosity_);
1011 
1012   // Update relevant data members with parsed input.
1013   // Translate MB into bytes.
1014   size_ = static_cast<int64>(size_mb_) * kMegabyte;
1015 
1016   // Set logfile flag.
1017   if (strcmp(logfilename_, ""))
1018     use_logfile_ = true;
1019   // Checks valid page length.
1020   if (page_length_ &&
1021       !(page_length_ & (page_length_ - 1)) &&
1022       (page_length_ > 1023)) {
1023     // Prints if we have changed from default.
1024     if (page_length_ != kSatPageSize)
1025       logprintf(12, "Log: Updating page size to %d\n", page_length_);
1026   } else {
1027     // Revert to default page length.
1028     logprintf(6, "Process Error: "
1029               "Invalid page size %d\n", page_length_);
1030     page_length_ = kSatPageSize;
1031     return false;
1032   }
1033 
1034   // Set disk_pages_ if filesize or page size changed.
1035   if (filesize != static_cast<uint64>(page_length_) *
1036                   static_cast<uint64>(disk_pages_)) {
1037     disk_pages_ = filesize / page_length_;
1038     if (disk_pages_ == 0)
1039       disk_pages_ = 1;
1040   }
1041 
1042   // Validate memory channel parameters if supplied
1043   if (channels_.size()) {
1044     if (channels_.size() == 1) {
1045       channel_hash_ = 0;
1046       logprintf(7, "Log: "
1047           "Only one memory channel...deactivating interleave decoding.\n");
1048     } else if (channels_.size() > 2) {
1049       logprintf(6, "Process Error: "
1050           "Triple-channel mode not yet supported... sorry.\n");
1051       bad_status();
1052       return false;
1053     }
1054     for (uint i = 0; i < channels_.size(); i++)
1055       if (channels_[i].size() != channels_[0].size()) {
1056         logprintf(6, "Process Error: "
1057             "Channels 0 and %d have a different count of dram modules.\n", i);
1058         bad_status();
1059         return false;
1060       }
1061     if (channels_[0].size() & (channels_[0].size() - 1)) {
1062       logprintf(6, "Process Error: "
1063           "Amount of modules per memory channel is not a power of 2.\n");
1064       bad_status();
1065       return false;
1066     }
1067     if (channel_width_ < 16
1068         || channel_width_ & (channel_width_ - 1)) {
1069       logprintf(6, "Process Error: "
1070           "Channel width %d is invalid.\n", channel_width_);
1071       bad_status();
1072       return false;
1073     }
1074     if (channel_width_ / channels_[0].size() < 8) {
1075       logprintf(6, "Process Error: Chip width x%d must be x8 or greater.\n",
1076           channel_width_ / channels_[0].size());
1077       bad_status();
1078       return false;
1079     }
1080   }
1081 
1082 
1083   // Print each argument.
1084   for (int i = 0; i < argc; i++) {
1085     if (i)
1086       cmdline_ += " ";
1087     cmdline_ += argv[i];
1088   }
1089 
1090   return true;
1091 }
1092 
PrintHelp()1093 void Sat::PrintHelp() {
1094   printf("Usage: ./sat(32|64) [options]\n"
1095          " -M mbytes        megabytes of ram to test\n"
1096          " --reserve-memory If not using hugepages, the amount of memory to "
1097          " reserve for the system\n"
1098          " -H mbytes        minimum megabytes of hugepages to require\n"
1099          " -s seconds       number of seconds to run\n"
1100          " -m threads       number of memory copy threads to run\n"
1101          " -i threads       number of memory invert threads to run\n"
1102          " -C threads       number of memory CPU stress threads to run\n"
1103          " --findfiles      find locations to do disk IO automatically\n"
1104          " -d device        add a direct write disk thread with block "
1105          "device (or file) 'device'\n"
1106          " -f filename      add a disk thread with "
1107          "tempfile 'filename'\n"
1108          " -l logfile       log output to file 'logfile'\n"
1109          " --no_timestamps  do not prefix timestamps to log messages\n"
1110          " --max_errors n   exit early after finding 'n' errors\n"
1111          " -v level         verbosity (0-20), default is 8\n"
1112          " --printsec secs  How often to print 'seconds remaining'\n"
1113          " -W               Use more CPU-stressful memory copy\n"
1114          " -A               run in degraded mode on incompatible systems\n"
1115          " -p pagesize      size in bytes of memory chunks\n"
1116          " --filesize size  size of disk IO tempfiles\n"
1117          " -n ipaddr        add a network thread connecting to "
1118          "system at 'ipaddr'\n"
1119          " --listen         run a thread to listen for and respond "
1120          "to network threads.\n"
1121          " --no_errors      run without checking for ECC or other errors\n"
1122          " --force_errors   inject false errors to test error handling\n"
1123          " --force_errors_like_crazy   inject a lot of false errors "
1124          "to test error handling\n"
1125          " -F               don't result check each transaction\n"
1126          " --stop_on_errors  Stop after finding the first error.\n"
1127          " --read-block-size     size of block for reading (-d)\n"
1128          " --write-block-size    size of block for writing (-d). If not "
1129          "defined, the size of block for writing will be defined as the "
1130          "size of block for reading\n"
1131          " --segment-size   size of segments to split disk into (-d)\n"
1132          " --cache-size     size of disk cache (-d)\n"
1133          " --blocks-per-segment  number of blocks to read/write per "
1134          "segment per iteration (-d)\n"
1135          " --read-threshold      maximum time (in us) a block read should "
1136          "take (-d)\n"
1137          " --write-threshold     maximum time (in us) a block write "
1138          "should take (-d)\n"
1139          " --random-threads      number of random threads for each disk "
1140          "write thread (-d)\n"
1141          " --destructive    write/wipe disk partition (-d)\n"
1142          " --monitor_mode   only do ECC error polling, no stress load.\n"
1143          " --cc_test        do the cache coherency testing\n"
1144          " --cc_inc_count   number of times to increment the "
1145          "cacheline's member\n"
1146          " --cc_line_count  number of cache line sized datastructures "
1147          "to allocate for the cache coherency threads to operate\n"
1148          " --cc_line_size   override the auto-detected cache line size\n"
1149          " --cpu_freq_test  enable the cpu frequency test (requires the "
1150          "--cpu_freq_threshold argument to be set)\n"
1151          " --cpu_freq_threshold  fail the cpu frequency test if the frequency "
1152          "goes below this value (specified in MHz)\n"
1153          " --cpu_freq_round round the computed frequency to this value, if set"
1154          " to zero, only round to the nearest MHz\n"
1155          " --paddr_base     allocate memory starting from this address\n"
1156          " --pause_delay    delay (in seconds) between power spikes\n"
1157          " --pause_duration duration (in seconds) of each pause\n"
1158          " --local_numa     choose memory regions associated with "
1159          "each CPU to be tested by that CPU\n"
1160          " --remote_numa    choose memory regions not associated with "
1161          "each CPU to be tested by that CPU\n"
1162          " --channel_hash   mask of address bits XORed to determine channel. "
1163          "Mask 0x40 interleaves cachelines between channels\n"
1164          " --channel_width bits     width in bits of each memory channel\n"
1165          " --memory_channel u1,u2   defines a comma-separated list of names "
1166          "for dram packages in a memory channel. Use multiple times to "
1167          "define multiple channels.\n");
1168 }
1169 
CheckGoogleSpecificArgs(int argc,char ** argv,int * i)1170 bool Sat::CheckGoogleSpecificArgs(int argc, char **argv, int *i) {
1171   // Do nothing, no google-specific argument on public stressapptest
1172   return false;
1173 }
1174 
GoogleOsOptions(std::map<std::string,std::string> * options)1175 void Sat::GoogleOsOptions(std::map<std::string, std::string> *options) {
1176   // Do nothing, no OS-specific argument on public stressapptest
1177 }
1178 
1179 // Launch the SAT task threads. Returns 0 on error.
InitializeThreads()1180 void Sat::InitializeThreads() {
1181   // Memory copy threads.
1182   AcquireWorkerLock();
1183 
1184   logprintf(12, "Log: Starting worker threads\n");
1185   WorkerVector *memory_vector = new WorkerVector();
1186 
1187   // Error polling thread.
1188   // This may detect ECC corrected errors, disk problems, or
1189   // any other errors normally hidden from userspace.
1190   WorkerVector *error_vector = new WorkerVector();
1191   if (error_poll_) {
1192     ErrorPollThread *thread = new ErrorPollThread();
1193     thread->InitThread(total_threads_++, this, os_, patternlist_,
1194                        &continuous_status_);
1195 
1196     error_vector->insert(error_vector->end(), thread);
1197   } else {
1198     logprintf(5, "Log: Skipping error poll thread due to --no_errors flag\n");
1199   }
1200   workers_map_.insert(make_pair(kErrorType, error_vector));
1201 
1202   // Only start error poll threads for monitor-mode SAT,
1203   // skip all other types of worker threads.
1204   if (monitor_mode_) {
1205     ReleaseWorkerLock();
1206     return;
1207   }
1208 
1209   for (int i = 0; i < memory_threads_; i++) {
1210     CopyThread *thread = new CopyThread();
1211     thread->InitThread(total_threads_++, this, os_, patternlist_,
1212                        &power_spike_status_);
1213 
1214     if ((region_count_ > 1) && (region_mode_)) {
1215       int32 region = region_find(i % region_count_);
1216       cpu_set_t *cpuset = os_->FindCoreMask(region);
1217       sat_assert(cpuset);
1218       if (region_mode_ == kLocalNuma) {
1219         // Choose regions associated with this CPU.
1220         thread->set_cpu_mask(cpuset);
1221         thread->set_tag(1 << region);
1222       } else if (region_mode_ == kRemoteNuma) {
1223         // Choose regions not associated with this CPU..
1224         thread->set_cpu_mask(cpuset);
1225         thread->set_tag(region_mask_ & ~(1 << region));
1226       }
1227     } else {
1228       cpu_set_t available_cpus;
1229       thread->AvailableCpus(&available_cpus);
1230       int cores = cpuset_count(&available_cpus);
1231       // Don't restrict thread location if we have more than one
1232       // thread per core. Not so good for performance.
1233       if (cpu_stress_threads_ + memory_threads_ <= cores) {
1234         // Place a thread on alternating cores first.
1235         // This assures interleaved core use with no overlap.
1236         int nthcore = i;
1237         int nthbit = (((2 * nthcore) % cores) +
1238                       (((2 * nthcore) / cores) % 2)) % cores;
1239         cpu_set_t all_cores;
1240         cpuset_set_ab(&all_cores, 0, cores);
1241         if (!cpuset_isequal(&available_cpus, &all_cores)) {
1242           // We are assuming the bits are contiguous.
1243           // Complain if this is not so.
1244           logprintf(0, "Log: cores = %s, expected %s\n",
1245                     cpuset_format(&available_cpus).c_str(),
1246                     cpuset_format(&all_cores).c_str());
1247         }
1248 
1249         // Set thread affinity.
1250         thread->set_cpu_mask_to_cpu(nthbit);
1251       }
1252     }
1253     memory_vector->insert(memory_vector->end(), thread);
1254   }
1255   workers_map_.insert(make_pair(kMemoryType, memory_vector));
1256 
1257   // File IO threads.
1258   WorkerVector *fileio_vector = new WorkerVector();
1259   for (int i = 0; i < file_threads_; i++) {
1260     FileThread *thread = new FileThread();
1261     thread->InitThread(total_threads_++, this, os_, patternlist_,
1262                        &power_spike_status_);
1263     thread->SetFile(filename_[i].c_str());
1264     // Set disk threads high priority. They don't take much processor time,
1265     // but blocking them will delay disk IO.
1266     thread->SetPriority(WorkerThread::High);
1267 
1268     fileio_vector->insert(fileio_vector->end(), thread);
1269   }
1270   workers_map_.insert(make_pair(kFileIOType, fileio_vector));
1271 
1272   // Net IO threads.
1273   WorkerVector *netio_vector = new WorkerVector();
1274   WorkerVector *netslave_vector = new WorkerVector();
1275   if (listen_threads_ > 0) {
1276     // Create a network slave thread. This listens for connections.
1277     NetworkListenThread *thread = new NetworkListenThread();
1278     thread->InitThread(total_threads_++, this, os_, patternlist_,
1279                        &continuous_status_);
1280 
1281     netslave_vector->insert(netslave_vector->end(), thread);
1282   }
1283   for (int i = 0; i < net_threads_; i++) {
1284     NetworkThread *thread = new NetworkThread();
1285     thread->InitThread(total_threads_++, this, os_, patternlist_,
1286                        &continuous_status_);
1287     thread->SetIP(ipaddrs_[i].c_str());
1288 
1289     netio_vector->insert(netio_vector->end(), thread);
1290   }
1291   workers_map_.insert(make_pair(kNetIOType, netio_vector));
1292   workers_map_.insert(make_pair(kNetSlaveType, netslave_vector));
1293 
1294   // Result check threads.
1295   WorkerVector *check_vector = new WorkerVector();
1296   for (int i = 0; i < check_threads_; i++) {
1297     CheckThread *thread = new CheckThread();
1298     thread->InitThread(total_threads_++, this, os_, patternlist_,
1299                        &continuous_status_);
1300 
1301     check_vector->insert(check_vector->end(), thread);
1302   }
1303   workers_map_.insert(make_pair(kCheckType, check_vector));
1304 
1305   // Memory invert threads.
1306   logprintf(12, "Log: Starting invert threads\n");
1307   WorkerVector *invert_vector = new WorkerVector();
1308   for (int i = 0; i < invert_threads_; i++) {
1309     InvertThread *thread = new InvertThread();
1310     thread->InitThread(total_threads_++, this, os_, patternlist_,
1311                        &continuous_status_);
1312 
1313     invert_vector->insert(invert_vector->end(), thread);
1314   }
1315   workers_map_.insert(make_pair(kInvertType, invert_vector));
1316 
1317   // Disk stress threads.
1318   WorkerVector *disk_vector = new WorkerVector();
1319   WorkerVector *random_vector = new WorkerVector();
1320   logprintf(12, "Log: Starting disk stress threads\n");
1321   for (int i = 0; i < disk_threads_; i++) {
1322     // Creating write threads
1323     DiskThread *thread = new DiskThread(blocktables_[i]);
1324     thread->InitThread(total_threads_++, this, os_, patternlist_,
1325                        &power_spike_status_);
1326     thread->SetDevice(diskfilename_[i].c_str());
1327     if (thread->SetParameters(read_block_size_, write_block_size_,
1328                               segment_size_, cache_size_,
1329                               blocks_per_segment_,
1330                               read_threshold_, write_threshold_,
1331                               non_destructive_)) {
1332       disk_vector->insert(disk_vector->end(), thread);
1333     } else {
1334       logprintf(12, "Log: DiskThread::SetParameters() failed\n");
1335       delete thread;
1336     }
1337 
1338     for (int j = 0; j < random_threads_; j++) {
1339       // Creating random threads
1340       RandomDiskThread *rthread = new RandomDiskThread(blocktables_[i]);
1341       rthread->InitThread(total_threads_++, this, os_, patternlist_,
1342                           &power_spike_status_);
1343       rthread->SetDevice(diskfilename_[i].c_str());
1344       if (rthread->SetParameters(read_block_size_, write_block_size_,
1345                                  segment_size_, cache_size_,
1346                                  blocks_per_segment_,
1347                                  read_threshold_, write_threshold_,
1348                                  non_destructive_)) {
1349         random_vector->insert(random_vector->end(), rthread);
1350       } else {
1351       logprintf(12, "Log: RandomDiskThread::SetParameters() failed\n");
1352         delete rthread;
1353       }
1354     }
1355   }
1356 
1357   workers_map_.insert(make_pair(kDiskType, disk_vector));
1358   workers_map_.insert(make_pair(kRandomDiskType, random_vector));
1359 
1360   // CPU stress threads.
1361   WorkerVector *cpu_vector = new WorkerVector();
1362   logprintf(12, "Log: Starting cpu stress threads\n");
1363   for (int i = 0; i < cpu_stress_threads_; i++) {
1364     CpuStressThread *thread = new CpuStressThread();
1365     thread->InitThread(total_threads_++, this, os_, patternlist_,
1366                        &continuous_status_);
1367 
1368     // Don't restrict thread location if we have more than one
1369     // thread per core. Not so good for performance.
1370     cpu_set_t available_cpus;
1371     thread->AvailableCpus(&available_cpus);
1372     int cores = cpuset_count(&available_cpus);
1373     if (cpu_stress_threads_ + memory_threads_ <= cores) {
1374       // Place a thread on alternating cores first.
1375       // Go in reverse order for CPU stress threads. This assures interleaved
1376       // core use with no overlap.
1377       int nthcore = (cores - 1) - i;
1378       int nthbit = (((2 * nthcore) % cores) +
1379                     (((2 * nthcore) / cores) % 2)) % cores;
1380       cpu_set_t all_cores;
1381       cpuset_set_ab(&all_cores, 0, cores);
1382       if (!cpuset_isequal(&available_cpus, &all_cores)) {
1383         logprintf(0, "Log: cores = %s, expected %s\n",
1384                   cpuset_format(&available_cpus).c_str(),
1385                   cpuset_format(&all_cores).c_str());
1386       }
1387 
1388       // Set thread affinity.
1389       thread->set_cpu_mask_to_cpu(nthbit);
1390     }
1391 
1392 
1393     cpu_vector->insert(cpu_vector->end(), thread);
1394   }
1395   workers_map_.insert(make_pair(kCPUType, cpu_vector));
1396 
1397   // CPU Cache Coherency Threads - one for each core available.
1398   if (cc_test_) {
1399     WorkerVector *cc_vector = new WorkerVector();
1400     logprintf(12, "Log: Starting cpu cache coherency threads\n");
1401 
1402     // Allocate the shared datastructure to be worked on by the threads.
1403     cc_cacheline_data_ = reinterpret_cast<cc_cacheline_data*>(
1404         malloc(sizeof(cc_cacheline_data) * cc_cacheline_count_));
1405     sat_assert(cc_cacheline_data_ != NULL);
1406 
1407     // Initialize the strucutre.
1408     memset(cc_cacheline_data_, 0,
1409            sizeof(cc_cacheline_data) * cc_cacheline_count_);
1410 
1411     int num_cpus = CpuCount();
1412     char *num;
1413     // Calculate the number of cache lines needed just to give each core
1414     // its own counter.
1415     int line_size = cc_cacheline_size_;
1416     if (line_size <= 0) {
1417       line_size = CacheLineSize();
1418       if (line_size < kCacheLineSize)
1419         line_size = kCacheLineSize;
1420       logprintf(12, "Log: Using %d as cache line size\n", line_size);
1421     }
1422     // The number of cache lines needed to hold an array of num_cpus.
1423     // "num" must be the same type as cc_cacheline_data[X].num or the memory
1424     // size calculations will fail.
1425     int needed_lines = (sizeof(*num) * num_cpus + line_size - 1) / line_size;
1426     // Allocate all the nums once so that we get a single chunk
1427     // of contiguous memory.
1428 #ifdef HAVE_POSIX_MEMALIGN
1429     int err_result = posix_memalign(
1430         reinterpret_cast<void**>(&num),
1431         line_size, line_size * needed_lines * cc_cacheline_count_);
1432 #else
1433     num = reinterpret_cast<int*>(memalign(
1434         line_size, line_size * needed_lines * cc_cacheline_count_));
1435     int err_result = (num == 0);
1436 #endif
1437     sat_assert(err_result == 0);
1438 
1439     int cline;
1440     for (cline = 0; cline < cc_cacheline_count_; cline++) {
1441       memset(num, 0, sizeof(*num) * num_cpus);
1442       cc_cacheline_data_[cline].num = num;
1443       num += (line_size * needed_lines) / sizeof(*num);
1444     }
1445 
1446     int tnum;
1447     for (tnum = 0; tnum < num_cpus; tnum++) {
1448       CpuCacheCoherencyThread *thread =
1449           new CpuCacheCoherencyThread(cc_cacheline_data_, cc_cacheline_count_,
1450                                       tnum, num_cpus, cc_inc_count_);
1451       thread->InitThread(total_threads_++, this, os_, patternlist_,
1452                          &continuous_status_);
1453       // Pin the thread to a particular core.
1454       thread->set_cpu_mask_to_cpu(tnum);
1455 
1456       // Insert the thread into the vector.
1457       cc_vector->insert(cc_vector->end(), thread);
1458     }
1459     workers_map_.insert(make_pair(kCCType, cc_vector));
1460   }
1461 
1462   if (cpu_freq_test_) {
1463     // Create the frequency test thread.
1464     logprintf(5, "Log: Running cpu frequency test: threshold set to %dMHz.\n",
1465               cpu_freq_threshold_);
1466     CpuFreqThread *thread = new CpuFreqThread(CpuCount(), cpu_freq_threshold_,
1467                                               cpu_freq_round_);
1468     // This thread should be paused when other threads are paused.
1469     thread->InitThread(total_threads_++, this, os_, NULL,
1470                        &power_spike_status_);
1471 
1472     WorkerVector *cpu_freq_vector = new WorkerVector();
1473     cpu_freq_vector->insert(cpu_freq_vector->end(), thread);
1474     workers_map_.insert(make_pair(kCPUFreqType, cpu_freq_vector));
1475   }
1476 
1477   ReleaseWorkerLock();
1478 }
1479 
1480 // Return the number of cpus actually present in the machine.
CpuCount()1481 int Sat::CpuCount() {
1482   return sysconf(_SC_NPROCESSORS_CONF);
1483 }
1484 
1485 // Return the worst case (largest) cache line size of the various levels of
1486 // cache actually prsent in the machine.
CacheLineSize()1487 int Sat::CacheLineSize() {
1488   int max_linesize = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
1489   int linesize = sysconf(_SC_LEVEL2_CACHE_LINESIZE);
1490   if (linesize > max_linesize) max_linesize = linesize;
1491   linesize = sysconf(_SC_LEVEL3_CACHE_LINESIZE);
1492   if (linesize > max_linesize) max_linesize = linesize;
1493   linesize = sysconf(_SC_LEVEL4_CACHE_LINESIZE);
1494   if (linesize > max_linesize) max_linesize = linesize;
1495   return max_linesize;
1496 }
1497 
1498 // Notify and reap worker threads.
JoinThreads()1499 void Sat::JoinThreads() {
1500   logprintf(12, "Log: Joining worker threads\n");
1501   power_spike_status_.StopWorkers();
1502   continuous_status_.StopWorkers();
1503 
1504   AcquireWorkerLock();
1505   for (WorkerMap::const_iterator map_it = workers_map_.begin();
1506        map_it != workers_map_.end(); ++map_it) {
1507     for (WorkerVector::const_iterator it = map_it->second->begin();
1508          it != map_it->second->end(); ++it) {
1509       logprintf(12, "Log: Joining thread %d\n", (*it)->ThreadID());
1510       (*it)->JoinThread();
1511     }
1512   }
1513   ReleaseWorkerLock();
1514 
1515   QueueStats();
1516 
1517   // Finish up result checking.
1518   // Spawn 4 check threads to minimize check time.
1519   logprintf(12, "Log: Finished countdown, begin to result check\n");
1520   WorkerStatus reap_check_status;
1521   WorkerVector reap_check_vector;
1522 
1523   // No need for check threads for monitor mode.
1524   if (!monitor_mode_) {
1525     // Initialize the check threads.
1526     for (int i = 0; i < fill_threads_; i++) {
1527       CheckThread *thread = new CheckThread();
1528       thread->InitThread(total_threads_++, this, os_, patternlist_,
1529                          &reap_check_status);
1530       logprintf(12, "Log: Finished countdown, begin to result check\n");
1531       reap_check_vector.push_back(thread);
1532     }
1533   }
1534 
1535   reap_check_status.Initialize();
1536   // Check threads should be marked to stop ASAP.
1537   reap_check_status.StopWorkers();
1538 
1539   // Spawn the check threads.
1540   for (WorkerVector::const_iterator it = reap_check_vector.begin();
1541        it != reap_check_vector.end(); ++it) {
1542     logprintf(12, "Log: Spawning thread %d\n", (*it)->ThreadID());
1543     (*it)->SpawnThread();
1544   }
1545 
1546   // Join the check threads.
1547   for (WorkerVector::const_iterator it = reap_check_vector.begin();
1548        it != reap_check_vector.end(); ++it) {
1549     logprintf(12, "Log: Joining thread %d\n", (*it)->ThreadID());
1550     (*it)->JoinThread();
1551   }
1552 
1553   // Reap all children. Stopped threads should have already ended.
1554   // Result checking threads will end when they have finished
1555   // result checking.
1556   logprintf(12, "Log: Join all outstanding threads\n");
1557 
1558   // Find all errors.
1559   errorcount_ = GetTotalErrorCount();
1560 
1561   AcquireWorkerLock();
1562   for (WorkerMap::const_iterator map_it = workers_map_.begin();
1563        map_it != workers_map_.end(); ++map_it) {
1564     for (WorkerVector::const_iterator it = map_it->second->begin();
1565          it != map_it->second->end(); ++it) {
1566       logprintf(12, "Log: Reaping thread status %d\n", (*it)->ThreadID());
1567       if ((*it)->GetStatus() != 1) {
1568         logprintf(0, "Process Error: Thread %d failed with status %d at "
1569                   "%.2f seconds\n",
1570                   (*it)->ThreadID(), (*it)->GetStatus(),
1571                   (*it)->GetRunDurationUSec()*1.0/1000000);
1572         bad_status();
1573       }
1574       int priority = 12;
1575       if ((*it)->GetErrorCount())
1576         priority = 5;
1577       logprintf(priority, "Log: Thread %d found %lld hardware incidents\n",
1578                 (*it)->ThreadID(), (*it)->GetErrorCount());
1579     }
1580   }
1581   ReleaseWorkerLock();
1582 
1583 
1584   // Add in any errors from check threads.
1585   for (WorkerVector::const_iterator it = reap_check_vector.begin();
1586        it != reap_check_vector.end(); ++it) {
1587     logprintf(12, "Log: Reaping thread status %d\n", (*it)->ThreadID());
1588     if ((*it)->GetStatus() != 1) {
1589       logprintf(0, "Process Error: Thread %d failed with status %d at "
1590                 "%.2f seconds\n",
1591                 (*it)->ThreadID(), (*it)->GetStatus(),
1592                 (*it)->GetRunDurationUSec()*1.0/1000000);
1593       bad_status();
1594     }
1595     errorcount_ += (*it)->GetErrorCount();
1596     int priority = 12;
1597     if ((*it)->GetErrorCount())
1598       priority = 5;
1599     logprintf(priority, "Log: Thread %d found %lld hardware incidents\n",
1600               (*it)->ThreadID(), (*it)->GetErrorCount());
1601     delete (*it);
1602   }
1603   reap_check_vector.clear();
1604   reap_check_status.Destroy();
1605 }
1606 
1607 // Print queuing information.
QueueStats()1608 void Sat::QueueStats() {
1609   finelock_q_->QueueAnalysis();
1610 }
1611 
AnalysisAllStats()1612 void Sat::AnalysisAllStats() {
1613   float max_runtime_sec = 0.;
1614   float total_data = 0.;
1615   float total_bandwidth = 0.;
1616   float thread_runtime_sec = 0.;
1617 
1618   for (WorkerMap::const_iterator map_it = workers_map_.begin();
1619        map_it != workers_map_.end(); ++map_it) {
1620     for (WorkerVector::const_iterator it = map_it->second->begin();
1621          it != map_it->second->end(); ++it) {
1622       thread_runtime_sec = (*it)->GetRunDurationUSec()*1.0/1000000.;
1623       total_data += (*it)->GetMemoryCopiedData();
1624       total_data += (*it)->GetDeviceCopiedData();
1625       if (thread_runtime_sec > max_runtime_sec) {
1626         max_runtime_sec = thread_runtime_sec;
1627       }
1628     }
1629   }
1630 
1631   total_bandwidth = total_data / max_runtime_sec;
1632 
1633   logprintf(0, "Stats: Completed: %.2fM in %.2fs %.2fMB/s, "
1634             "with %d hardware incidents, %d errors\n",
1635             total_data,
1636             max_runtime_sec,
1637             total_bandwidth,
1638             errorcount_,
1639             statuscount_);
1640 }
1641 
MemoryStats()1642 void Sat::MemoryStats() {
1643   float memcopy_data = 0.;
1644   float memcopy_bandwidth = 0.;
1645   WorkerMap::const_iterator mem_it = workers_map_.find(
1646       static_cast<int>(kMemoryType));
1647   WorkerMap::const_iterator file_it = workers_map_.find(
1648       static_cast<int>(kFileIOType));
1649   sat_assert(mem_it != workers_map_.end());
1650   sat_assert(file_it != workers_map_.end());
1651   for (WorkerVector::const_iterator it = mem_it->second->begin();
1652        it != mem_it->second->end(); ++it) {
1653     memcopy_data += (*it)->GetMemoryCopiedData();
1654     memcopy_bandwidth += (*it)->GetMemoryBandwidth();
1655   }
1656   for (WorkerVector::const_iterator it = file_it->second->begin();
1657        it != file_it->second->end(); ++it) {
1658     memcopy_data += (*it)->GetMemoryCopiedData();
1659     memcopy_bandwidth += (*it)->GetMemoryBandwidth();
1660   }
1661   GoogleMemoryStats(&memcopy_data, &memcopy_bandwidth);
1662   logprintf(4, "Stats: Memory Copy: %.2fM at %.2fMB/s\n",
1663             memcopy_data,
1664             memcopy_bandwidth);
1665 }
1666 
GoogleMemoryStats(float * memcopy_data,float * memcopy_bandwidth)1667 void Sat::GoogleMemoryStats(float *memcopy_data,
1668                             float *memcopy_bandwidth) {
1669   // Do nothing, should be implemented by subclasses.
1670 }
1671 
FileStats()1672 void Sat::FileStats() {
1673   float file_data = 0.;
1674   float file_bandwidth = 0.;
1675   WorkerMap::const_iterator file_it = workers_map_.find(
1676       static_cast<int>(kFileIOType));
1677   sat_assert(file_it != workers_map_.end());
1678   for (WorkerVector::const_iterator it = file_it->second->begin();
1679        it != file_it->second->end(); ++it) {
1680     file_data += (*it)->GetDeviceCopiedData();
1681     file_bandwidth += (*it)->GetDeviceBandwidth();
1682   }
1683   logprintf(4, "Stats: File Copy: %.2fM at %.2fMB/s\n",
1684             file_data,
1685             file_bandwidth);
1686 }
1687 
CheckStats()1688 void Sat::CheckStats() {
1689   float check_data = 0.;
1690   float check_bandwidth = 0.;
1691   WorkerMap::const_iterator check_it = workers_map_.find(
1692       static_cast<int>(kCheckType));
1693   sat_assert(check_it != workers_map_.end());
1694   for (WorkerVector::const_iterator it = check_it->second->begin();
1695        it != check_it->second->end(); ++it) {
1696     check_data += (*it)->GetMemoryCopiedData();
1697     check_bandwidth += (*it)->GetMemoryBandwidth();
1698   }
1699   logprintf(4, "Stats: Data Check: %.2fM at %.2fMB/s\n",
1700             check_data,
1701             check_bandwidth);
1702 }
1703 
NetStats()1704 void Sat::NetStats() {
1705   float net_data = 0.;
1706   float net_bandwidth = 0.;
1707   WorkerMap::const_iterator netio_it = workers_map_.find(
1708       static_cast<int>(kNetIOType));
1709   WorkerMap::const_iterator netslave_it = workers_map_.find(
1710       static_cast<int>(kNetSlaveType));
1711   sat_assert(netio_it != workers_map_.end());
1712   sat_assert(netslave_it != workers_map_.end());
1713   for (WorkerVector::const_iterator it = netio_it->second->begin();
1714        it != netio_it->second->end(); ++it) {
1715     net_data += (*it)->GetDeviceCopiedData();
1716     net_bandwidth += (*it)->GetDeviceBandwidth();
1717   }
1718   for (WorkerVector::const_iterator it = netslave_it->second->begin();
1719        it != netslave_it->second->end(); ++it) {
1720     net_data += (*it)->GetDeviceCopiedData();
1721     net_bandwidth += (*it)->GetDeviceBandwidth();
1722   }
1723   logprintf(4, "Stats: Net Copy: %.2fM at %.2fMB/s\n",
1724             net_data,
1725             net_bandwidth);
1726 }
1727 
InvertStats()1728 void Sat::InvertStats() {
1729   float invert_data = 0.;
1730   float invert_bandwidth = 0.;
1731   WorkerMap::const_iterator invert_it = workers_map_.find(
1732       static_cast<int>(kInvertType));
1733   sat_assert(invert_it != workers_map_.end());
1734   for (WorkerVector::const_iterator it = invert_it->second->begin();
1735        it != invert_it->second->end(); ++it) {
1736     invert_data += (*it)->GetMemoryCopiedData();
1737     invert_bandwidth += (*it)->GetMemoryBandwidth();
1738   }
1739   logprintf(4, "Stats: Invert Data: %.2fM at %.2fMB/s\n",
1740             invert_data,
1741             invert_bandwidth);
1742 }
1743 
DiskStats()1744 void Sat::DiskStats() {
1745   float disk_data = 0.;
1746   float disk_bandwidth = 0.;
1747   WorkerMap::const_iterator disk_it = workers_map_.find(
1748       static_cast<int>(kDiskType));
1749   WorkerMap::const_iterator random_it = workers_map_.find(
1750       static_cast<int>(kRandomDiskType));
1751   sat_assert(disk_it != workers_map_.end());
1752   sat_assert(random_it != workers_map_.end());
1753   for (WorkerVector::const_iterator it = disk_it->second->begin();
1754        it != disk_it->second->end(); ++it) {
1755     disk_data += (*it)->GetDeviceCopiedData();
1756     disk_bandwidth += (*it)->GetDeviceBandwidth();
1757   }
1758   for (WorkerVector::const_iterator it = random_it->second->begin();
1759        it != random_it->second->end(); ++it) {
1760     disk_data += (*it)->GetDeviceCopiedData();
1761     disk_bandwidth += (*it)->GetDeviceBandwidth();
1762   }
1763 
1764   logprintf(4, "Stats: Disk: %.2fM at %.2fMB/s\n",
1765             disk_data,
1766             disk_bandwidth);
1767 }
1768 
1769 // Process worker thread data for bandwidth information, and error results.
1770 // You can add more methods here just subclassing SAT.
RunAnalysis()1771 void Sat::RunAnalysis() {
1772   AnalysisAllStats();
1773   MemoryStats();
1774   FileStats();
1775   NetStats();
1776   CheckStats();
1777   InvertStats();
1778   DiskStats();
1779 }
1780 
1781 // Get total error count, summing across all threads..
GetTotalErrorCount()1782 int64 Sat::GetTotalErrorCount() {
1783   int64 errors = 0;
1784 
1785   AcquireWorkerLock();
1786   for (WorkerMap::const_iterator map_it = workers_map_.begin();
1787        map_it != workers_map_.end(); ++map_it) {
1788     for (WorkerVector::const_iterator it = map_it->second->begin();
1789          it != map_it->second->end(); ++it) {
1790       errors += (*it)->GetErrorCount();
1791     }
1792   }
1793   ReleaseWorkerLock();
1794   return errors;
1795 }
1796 
1797 
SpawnThreads()1798 void Sat::SpawnThreads() {
1799   logprintf(12, "Log: Initializing WorkerStatus objects\n");
1800   power_spike_status_.Initialize();
1801   continuous_status_.Initialize();
1802   logprintf(12, "Log: Spawning worker threads\n");
1803   for (WorkerMap::const_iterator map_it = workers_map_.begin();
1804        map_it != workers_map_.end(); ++map_it) {
1805     for (WorkerVector::const_iterator it = map_it->second->begin();
1806          it != map_it->second->end(); ++it) {
1807       logprintf(12, "Log: Spawning thread %d\n", (*it)->ThreadID());
1808       (*it)->SpawnThread();
1809     }
1810   }
1811 }
1812 
1813 // Delete used worker thread objects.
DeleteThreads()1814 void Sat::DeleteThreads() {
1815   logprintf(12, "Log: Deleting worker threads\n");
1816   for (WorkerMap::const_iterator map_it = workers_map_.begin();
1817        map_it != workers_map_.end(); ++map_it) {
1818     for (WorkerVector::const_iterator it = map_it->second->begin();
1819          it != map_it->second->end(); ++it) {
1820       logprintf(12, "Log: Deleting thread %d\n", (*it)->ThreadID());
1821       delete (*it);
1822     }
1823     delete map_it->second;
1824   }
1825   workers_map_.clear();
1826   logprintf(12, "Log: Destroying WorkerStatus objects\n");
1827   power_spike_status_.Destroy();
1828   continuous_status_.Destroy();
1829 }
1830 
1831 namespace {
1832 // Calculates the next time an action in Sat::Run() should occur, based on a
1833 // schedule derived from a start point and a regular frequency.
1834 //
1835 // Using frequencies instead of intervals with their accompanying drift allows
1836 // users to better predict when the actions will occur throughout a run.
1837 //
1838 // Arguments:
1839 //   frequency: seconds
1840 //   start: unixtime
1841 //   now: unixtime
1842 //
1843 // Returns: unixtime
NextOccurance(time_t frequency,time_t start,time_t now)1844 inline time_t NextOccurance(time_t frequency, time_t start, time_t now) {
1845   return start + frequency + (((now - start) / frequency) * frequency);
1846 }
1847 }
1848 
1849 // Run the actual test.
Run()1850 bool Sat::Run() {
1851   // Install signal handlers to gracefully exit in the middle of a run.
1852   //
1853   // Why go through this whole rigmarole?  It's the only standards-compliant
1854   // (C++ and POSIX) way to handle signals in a multithreaded program.
1855   // Specifically:
1856   //
1857   // 1) (C++) The value of a variable not of type "volatile sig_atomic_t" is
1858   //    unspecified upon entering a signal handler and, if modified by the
1859   //    handler, is unspecified after leaving the handler.
1860   //
1861   // 2) (POSIX) After the value of a variable is changed in one thread, another
1862   //    thread is only guaranteed to see the new value after both threads have
1863   //    acquired or released the same mutex or rwlock, synchronized to the
1864   //    same barrier, or similar.
1865   //
1866   // #1 prevents the use of #2 in a signal handler, so the signal handler must
1867   // be called in the same thread that reads the "volatile sig_atomic_t"
1868   // variable it sets.  We enforce that by blocking the signals in question in
1869   // the worker threads, forcing them to be handled by this thread.
1870   logprintf(12, "Log: Installing signal handlers\n");
1871   sigset_t new_blocked_signals;
1872   sigemptyset(&new_blocked_signals);
1873   sigaddset(&new_blocked_signals, SIGINT);
1874   sigaddset(&new_blocked_signals, SIGTERM);
1875   sigset_t prev_blocked_signals;
1876   pthread_sigmask(SIG_BLOCK, &new_blocked_signals, &prev_blocked_signals);
1877   sighandler_t prev_sigint_handler = signal(SIGINT, SatHandleBreak);
1878   sighandler_t prev_sigterm_handler = signal(SIGTERM, SatHandleBreak);
1879 
1880   // Kick off all the worker threads.
1881   logprintf(12, "Log: Launching worker threads\n");
1882   InitializeThreads();
1883   SpawnThreads();
1884   pthread_sigmask(SIG_SETMASK, &prev_blocked_signals, NULL);
1885 
1886   logprintf(12, "Log: Starting countdown with %d seconds\n", runtime_seconds_);
1887 
1888   // In seconds.
1889   static const time_t kSleepFrequency = 5;
1890   // All of these are in seconds.  You probably want them to be >=
1891   // kSleepFrequency and multiples of kSleepFrequency, but neither is necessary.
1892   static const time_t kInjectionFrequency = 10;
1893   // print_delay_ determines "seconds remaining" chatty update.
1894 
1895   const time_t start = time(NULL);
1896   const time_t end = start + runtime_seconds_;
1897   time_t now = start;
1898   time_t next_print = start + print_delay_;
1899   time_t next_pause = start + pause_delay_;
1900   time_t next_resume = 0;
1901   time_t next_injection;
1902   if (crazy_error_injection_) {
1903     next_injection = start + kInjectionFrequency;
1904   } else {
1905     next_injection = 0;
1906   }
1907 
1908   while (now < end) {
1909     // This is an int because it's for logprintf().
1910     const int seconds_remaining = end - now;
1911 
1912     if (user_break_) {
1913       // Handle early exit.
1914       logprintf(0, "Log: User exiting early (%d seconds remaining)\n",
1915                 seconds_remaining);
1916       break;
1917     }
1918 
1919     // If we have an error limit, check it here and see if we should exit.
1920     if (max_errorcount_ != 0) {
1921       uint64 errors = GetTotalErrorCount();
1922       if (errors > max_errorcount_) {
1923         logprintf(0, "Log: Exiting early (%d seconds remaining) "
1924                      "due to excessive failures (%lld)\n",
1925                   seconds_remaining,
1926                   errors);
1927         break;
1928       }
1929     }
1930 
1931     if (now >= next_print) {
1932       // Print a count down message.
1933       logprintf(5, "Log: Seconds remaining: %d\n", seconds_remaining);
1934       next_print = NextOccurance(print_delay_, start, now);
1935     }
1936 
1937     if (next_injection && now >= next_injection) {
1938       // Inject an error.
1939       logprintf(4, "Log: Injecting error (%d seconds remaining)\n",
1940                 seconds_remaining);
1941       struct page_entry src;
1942       GetValid(&src);
1943       src.pattern = patternlist_->GetPattern(0);
1944       PutValid(&src);
1945       next_injection = NextOccurance(kInjectionFrequency, start, now);
1946     }
1947 
1948     if (next_pause && now >= next_pause) {
1949       // Tell worker threads to pause in preparation for a power spike.
1950       logprintf(4, "Log: Pausing worker threads in preparation for power spike "
1951                 "(%d seconds remaining)\n", seconds_remaining);
1952       power_spike_status_.PauseWorkers();
1953       logprintf(12, "Log: Worker threads paused\n");
1954       next_pause = 0;
1955       next_resume = now + pause_duration_;
1956     }
1957 
1958     if (next_resume && now >= next_resume) {
1959       // Tell worker threads to resume in order to cause a power spike.
1960       logprintf(4, "Log: Resuming worker threads to cause a power spike (%d "
1961                 "seconds remaining)\n", seconds_remaining);
1962       power_spike_status_.ResumeWorkers();
1963       logprintf(12, "Log: Worker threads resumed\n");
1964       next_pause = NextOccurance(pause_delay_, start, now);
1965       next_resume = 0;
1966     }
1967 
1968     sat_sleep(NextOccurance(kSleepFrequency, start, now) - now);
1969     now = time(NULL);
1970   }
1971 
1972   JoinThreads();
1973 
1974   logprintf(0, "Stats: Found %lld hardware incidents\n", errorcount_);
1975 
1976   if (!monitor_mode_)
1977     RunAnalysis();
1978 
1979   DeleteThreads();
1980 
1981   logprintf(12, "Log: Uninstalling signal handlers\n");
1982   signal(SIGINT, prev_sigint_handler);
1983   signal(SIGTERM, prev_sigterm_handler);
1984 
1985   return true;
1986 }
1987 
1988 // Clean up all resources.
Cleanup()1989 bool Sat::Cleanup() {
1990   g_sat = NULL;
1991   Logger::GlobalLogger()->StopThread();
1992   Logger::GlobalLogger()->SetStdoutOnly();
1993   if (logfile_) {
1994     close(logfile_);
1995     logfile_ = 0;
1996   }
1997   if (patternlist_) {
1998     patternlist_->Destroy();
1999     delete patternlist_;
2000     patternlist_ = 0;
2001   }
2002   if (os_) {
2003     os_->FreeTestMem();
2004     delete os_;
2005     os_ = 0;
2006   }
2007   if (empty_) {
2008     delete empty_;
2009     empty_ = 0;
2010   }
2011   if (valid_) {
2012     delete valid_;
2013     valid_ = 0;
2014   }
2015   if (finelock_q_) {
2016     delete finelock_q_;
2017     finelock_q_ = 0;
2018   }
2019   if (page_bitmap_) {
2020     delete[] page_bitmap_;
2021   }
2022 
2023   for (size_t i = 0; i < blocktables_.size(); i++) {
2024     delete blocktables_[i];
2025   }
2026 
2027   if (cc_cacheline_data_) {
2028     // The num integer arrays for all the cacheline structures are
2029     // allocated as a single chunk. The pointers in the cacheline struct
2030     // are populated accordingly. Hence calling free on the first
2031     // cacheline's num's address is going to free the entire array.
2032     // TODO(aganti): Refactor this to have a class for the cacheline
2033     // structure (currently defined in worker.h) and clean this up
2034     // in the destructor of that class.
2035     if (cc_cacheline_data_[0].num) {
2036       free(cc_cacheline_data_[0].num);
2037     }
2038     free(cc_cacheline_data_);
2039   }
2040 
2041   sat_assert(0 == pthread_mutex_destroy(&worker_lock_));
2042 
2043   return true;
2044 }
2045 
2046 
2047 // Pretty print really obvious results.
PrintResults()2048 bool Sat::PrintResults() {
2049   bool result = true;
2050 
2051   logprintf(4, "\n");
2052   if (statuscount_) {
2053     logprintf(4, "Status: FAIL - test encountered procedural errors\n");
2054     result = false;
2055   } else if (errorcount_) {
2056     logprintf(4, "Status: FAIL - test discovered HW problems\n");
2057     result = false;
2058   } else {
2059     logprintf(4, "Status: PASS - please verify no corrected errors\n");
2060   }
2061   logprintf(4, "\n");
2062 
2063   return result;
2064 }
2065 
2066 // Helper functions.
AcquireWorkerLock()2067 void Sat::AcquireWorkerLock() {
2068   sat_assert(0 == pthread_mutex_lock(&worker_lock_));
2069 }
ReleaseWorkerLock()2070 void Sat::ReleaseWorkerLock() {
2071   sat_assert(0 == pthread_mutex_unlock(&worker_lock_));
2072 }
2073 
logprintf(int priority,const char * format,...)2074 void logprintf(int priority, const char *format, ...) {
2075   va_list args;
2076   va_start(args, format);
2077   Logger::GlobalLogger()->VLogF(priority, format, args);
2078   va_end(args);
2079 }
2080 
2081 // Stop the logging thread and verify any pending data is written to the log.
logstop()2082 void logstop() {
2083   Logger::GlobalLogger()->StopThread();
2084 }
2085 
2086