1 // Copyright 2015 Google Inc. All rights reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "benchmark/benchmark.h"
16 #include "internal_macros.h"
17 
18 #ifndef BENCHMARK_OS_WINDOWS
19 #include <sys/time.h>
20 #include <sys/resource.h>
21 #include <unistd.h>
22 #endif
23 
24 #include <cstdlib>
25 #include <cstring>
26 #include <cstdio>
27 #include <algorithm>
28 #include <atomic>
29 #include <condition_variable>
30 #include <iostream>
31 #include <memory>
32 #include <thread>
33 
34 #include "check.h"
35 #include "commandlineflags.h"
36 #include "log.h"
37 #include "mutex.h"
38 #include "re.h"
39 #include "stat.h"
40 #include "string_util.h"
41 #include "sysinfo.h"
42 #include "walltime.h"
43 
44 DEFINE_bool(benchmark_list_tests, false,
45             "Print a list of benchmarks. This option overrides all other "
46             "options.");
47 
48 DEFINE_string(benchmark_filter, ".",
49               "A regular expression that specifies the set of benchmarks "
50               "to execute.  If this flag is empty, no benchmarks are run.  "
51               "If this flag is the string \"all\", all benchmarks linked "
52               "into the process are run.");
53 
54 DEFINE_double(benchmark_min_time, 0.5,
55               "Minimum number of seconds we should run benchmark before "
56               "results are considered significant.  For cpu-time based "
57               "tests, this is the lower bound on the total cpu time "
58               "used by all threads that make up the test.  For real-time "
59               "based tests, this is the lower bound on the elapsed time "
60               "of the benchmark execution, regardless of number of "
61               "threads.");
62 
63 DEFINE_int32(benchmark_repetitions, 1,
64              "The number of runs of each benchmark. If greater than 1, the "
65              "mean and standard deviation of the runs will be reported.");
66 
67 DEFINE_string(benchmark_format, "tabular",
68               "The format to use for console output. Valid values are "
69               "'tabular', 'json', or 'csv'.");
70 
71 DEFINE_bool(color_print, true, "Enables colorized logging.");
72 
73 DEFINE_int32(v, 0, "The level of verbose logging to output");
74 
75 
76 namespace benchmark {
77 
78 namespace internal {
79 
UseCharPointer(char const volatile *)80 void UseCharPointer(char const volatile*) {}
81 
82 // NOTE: This is a dummy "mutex" type used to denote the actual mutex
83 // returned by GetBenchmarkLock(). This is only used to placate the thread
84 // safety warnings by giving the return of GetBenchmarkLock() a name.
85 struct CAPABILITY("mutex") BenchmarkLockType {};
86 BenchmarkLockType BenchmarkLockVar;
87 
88 } // end namespace internal
89 
RETURN_CAPABILITY(::benchmark::internal::BenchmarkLockVar)90 inline Mutex& RETURN_CAPABILITY(::benchmark::internal::BenchmarkLockVar)
91 GetBenchmarkLock()
92 {
93   static Mutex lock;
94   return lock;
95 }
96 
97 namespace {
98 
IsZero(double n)99 bool IsZero(double n) {
100     return std::abs(n) < std::numeric_limits<double>::epsilon();
101 }
102 
103 // For non-dense Range, intermediate values are powers of kRangeMultiplier.
104 static const int kRangeMultiplier = 8;
105 static const size_t kMaxIterations = 1000000000;
106 
107 bool running_benchmark = false;
108 
109 // Global variable so that a benchmark can cause a little extra printing
GetReportLabel()110 std::string* GetReportLabel() {
111     static std::string label GUARDED_BY(GetBenchmarkLock());
112     return &label;
113 }
114 
115 // TODO(ericwf): support MallocCounter.
116 //static benchmark::MallocCounter *benchmark_mc;
117 
118 struct ThreadStats {
ThreadStatsbenchmark::__anonc646188b0111::ThreadStats119     ThreadStats() : bytes_processed(0), items_processed(0) {}
120     int64_t bytes_processed;
121     int64_t items_processed;
122 };
123 
124 // Timer management class
125 class TimerManager {
126  public:
TimerManager(int num_threads,Notification * done)127   TimerManager(int num_threads, Notification* done)
128       : num_threads_(num_threads),
129         done_(done),
130         running_(false),
131         real_time_used_(0),
132         cpu_time_used_(0),
133         num_finalized_(0),
134         phase_number_(0),
135         entered_(0) {
136   }
137 
138   // Called by each thread
StartTimer()139   void StartTimer() EXCLUDES(lock_) {
140     bool last_thread = false;
141     {
142       MutexLock ml(lock_);
143       last_thread = Barrier(ml);
144       if (last_thread) {
145         CHECK(!running_) << "Called StartTimer when timer is already running";
146         running_ = true;
147         start_real_time_ = walltime::Now();
148         start_cpu_time_ = MyCPUUsage() + ChildrenCPUUsage();
149        }
150      }
151      if (last_thread) {
152        phase_condition_.notify_all();
153      }
154   }
155 
156   // Called by each thread
StopTimer()157   void StopTimer() EXCLUDES(lock_) {
158     bool last_thread = false;
159     {
160       MutexLock ml(lock_);
161       last_thread = Barrier(ml);
162       if (last_thread) {
163         CHECK(running_) << "Called StopTimer when timer is already stopped";
164         InternalStop();
165       }
166     }
167     if (last_thread) {
168       phase_condition_.notify_all();
169     }
170   }
171 
172   // Called by each thread
Finalize()173   void Finalize() EXCLUDES(lock_) {
174     MutexLock l(lock_);
175     num_finalized_++;
176     if (num_finalized_ == num_threads_) {
177       CHECK(!running_) <<
178         "The timer should be stopped before the timer is finalized";
179       done_->Notify();
180     }
181   }
182 
183   // REQUIRES: timer is not running
real_time_used()184   double real_time_used() EXCLUDES(lock_) {
185     MutexLock l(lock_);
186     CHECK(!running_);
187     return real_time_used_;
188   }
189 
190   // REQUIRES: timer is not running
cpu_time_used()191   double cpu_time_used() EXCLUDES(lock_) {
192     MutexLock l(lock_);
193     CHECK(!running_);
194     return cpu_time_used_;
195   }
196 
197  private:
198   Mutex lock_;
199   Condition phase_condition_;
200   int num_threads_;
201   Notification* done_;
202 
203   bool running_;                // Is the timer running
204   double start_real_time_;      // If running_
205   double start_cpu_time_;       // If running_
206 
207   // Accumulated time so far (does not contain current slice if running_)
208   double real_time_used_;
209   double cpu_time_used_;
210 
211   // How many threads have called Finalize()
212   int num_finalized_;
213 
214   // State for barrier management
215   int phase_number_;
216   int entered_;         // Number of threads that have entered this barrier
217 
InternalStop()218   void InternalStop() REQUIRES(lock_) {
219     CHECK(running_);
220     running_ = false;
221     real_time_used_ += walltime::Now() - start_real_time_;
222     cpu_time_used_ += ((MyCPUUsage() + ChildrenCPUUsage())
223                        - start_cpu_time_);
224   }
225 
226   // Enter the barrier and wait until all other threads have also
227   // entered the barrier.  Returns iff this is the last thread to
228   // enter the barrier.
Barrier(MutexLock & ml)229   bool Barrier(MutexLock& ml) REQUIRES(lock_) {
230     CHECK_LT(entered_, num_threads_);
231     entered_++;
232     if (entered_ < num_threads_) {
233       // Wait for all threads to enter
234       int phase_number_cp = phase_number_;
235       auto cb = [this, phase_number_cp]() {
236         return this->phase_number_ > phase_number_cp;
237       };
238       phase_condition_.wait(ml.native_handle(), cb);
239       return false;  // I was not the last one
240     } else {
241       // Last thread has reached the barrier
242       phase_number_++;
243       entered_ = 0;
244       return true;
245     }
246   }
247 };
248 
249 // TimerManager for current run.
250 static std::unique_ptr<TimerManager> timer_manager = nullptr;
251 
252 } // end namespace
253 
254 namespace internal {
255 
256 // Information kept per benchmark we may want to run
257 struct Benchmark::Instance {
258   std::string    name;
259   Benchmark*     benchmark;
260   bool           has_arg1;
261   int            arg1;
262   bool           has_arg2;
263   int            arg2;
264   bool           use_real_time;
265   double         min_time;
266   int            threads;    // Number of concurrent threads to use
267   bool           multithreaded;  // Is benchmark multi-threaded?
268 };
269 
270 // Class for managing registered benchmarks.  Note that each registered
271 // benchmark identifies a family of related benchmarks to run.
272 class BenchmarkFamilies {
273  public:
274   static BenchmarkFamilies* GetInstance();
275 
276   // Registers a benchmark family and returns the index assigned to it.
277   size_t AddBenchmark(std::unique_ptr<Benchmark> family);
278 
279   // Extract the list of benchmark instances that match the specified
280   // regular expression.
281   bool FindBenchmarks(const std::string& re,
282                       std::vector<Benchmark::Instance>* benchmarks);
283  private:
BenchmarkFamilies()284   BenchmarkFamilies() {}
285 
286   std::vector<std::unique_ptr<Benchmark>> families_;
287   Mutex mutex_;
288 };
289 
290 
291 class BenchmarkImp {
292 public:
293   explicit BenchmarkImp(const char* name);
294   ~BenchmarkImp();
295 
296   void Arg(int x);
297   void Range(int start, int limit);
298   void DenseRange(int start, int limit);
299   void ArgPair(int start, int limit);
300   void RangePair(int lo1, int hi1, int lo2, int hi2);
301   void MinTime(double n);
302   void UseRealTime();
303   void Threads(int t);
304   void ThreadRange(int min_threads, int max_threads);
305   void ThreadPerCpu();
306   void SetName(const char* name);
307 
308   static void AddRange(std::vector<int>* dst, int lo, int hi, int mult);
309 
310 private:
311   friend class BenchmarkFamilies;
312 
313   std::string name_;
314   int arg_count_;
315   std::vector< std::pair<int, int> > args_;  // Args for all benchmark runs
316   double min_time_;
317   bool use_real_time_;
318   std::vector<int> thread_counts_;
319 
320   BenchmarkImp& operator=(BenchmarkImp const&);
321 };
322 
GetInstance()323 BenchmarkFamilies* BenchmarkFamilies::GetInstance() {
324   static BenchmarkFamilies instance;
325   return &instance;
326 }
327 
328 
AddBenchmark(std::unique_ptr<Benchmark> family)329 size_t BenchmarkFamilies::AddBenchmark(std::unique_ptr<Benchmark> family) {
330   MutexLock l(mutex_);
331   size_t index = families_.size();
332   families_.push_back(std::move(family));
333   return index;
334 }
335 
FindBenchmarks(const std::string & spec,std::vector<Benchmark::Instance> * benchmarks)336 bool BenchmarkFamilies::FindBenchmarks(
337     const std::string& spec,
338     std::vector<Benchmark::Instance>* benchmarks) {
339   // Make regular expression out of command-line flag
340   std::string error_msg;
341   Regex re;
342   if (!re.Init(spec, &error_msg)) {
343     std::cerr << "Could not compile benchmark re: " << error_msg << std::endl;
344     return false;
345   }
346 
347   // Special list of thread counts to use when none are specified
348   std::vector<int> one_thread;
349   one_thread.push_back(1);
350 
351   MutexLock l(mutex_);
352   for (std::unique_ptr<Benchmark>& bench_family : families_) {
353     // Family was deleted or benchmark doesn't match
354     if (!bench_family) continue;
355     BenchmarkImp* family = bench_family->imp_;
356 
357     if (family->arg_count_ == -1) {
358       family->arg_count_ = 0;
359       family->args_.emplace_back(-1, -1);
360     }
361     for (auto const& args : family->args_) {
362       const std::vector<int>* thread_counts =
363         (family->thread_counts_.empty()
364          ? &one_thread
365          : &family->thread_counts_);
366       for (int num_threads : *thread_counts) {
367 
368         Benchmark::Instance instance;
369         instance.name = family->name_;
370         instance.benchmark = bench_family.get();
371         instance.has_arg1 = family->arg_count_ >= 1;
372         instance.arg1 = args.first;
373         instance.has_arg2 = family->arg_count_ == 2;
374         instance.arg2 = args.second;
375         instance.min_time = family->min_time_;
376         instance.use_real_time = family->use_real_time_;
377         instance.threads = num_threads;
378         instance.multithreaded = !(family->thread_counts_.empty());
379 
380         // Add arguments to instance name
381         if (family->arg_count_ >= 1) {
382           AppendHumanReadable(instance.arg1, &instance.name);
383         }
384         if (family->arg_count_ >= 2) {
385           AppendHumanReadable(instance.arg2, &instance.name);
386         }
387         if (!IsZero(family->min_time_)) {
388           instance.name +=  StringPrintF("/min_time:%0.3f",  family->min_time_);
389         }
390         if (family->use_real_time_) {
391           instance.name +=  "/real_time";
392         }
393 
394         // Add the number of threads used to the name
395         if (!family->thread_counts_.empty()) {
396           instance.name += StringPrintF("/threads:%d", instance.threads);
397         }
398 
399         if (re.Match(instance.name)) {
400           benchmarks->push_back(instance);
401         }
402       }
403     }
404   }
405   return true;
406 }
407 
BenchmarkImp(const char * name)408 BenchmarkImp::BenchmarkImp(const char* name)
409     : name_(name), arg_count_(-1),
410       min_time_(0.0), use_real_time_(false) {
411 }
412 
~BenchmarkImp()413 BenchmarkImp::~BenchmarkImp() {
414 }
415 
Arg(int x)416 void BenchmarkImp::Arg(int x) {
417   CHECK(arg_count_ == -1 || arg_count_ == 1);
418   arg_count_ = 1;
419   args_.emplace_back(x, -1);
420 }
421 
Range(int start,int limit)422 void BenchmarkImp::Range(int start, int limit) {
423   CHECK(arg_count_ == -1 || arg_count_ == 1);
424   arg_count_ = 1;
425   std::vector<int> arglist;
426   AddRange(&arglist, start, limit, kRangeMultiplier);
427 
428   for (int i : arglist) {
429     args_.emplace_back(i, -1);
430   }
431 }
432 
DenseRange(int start,int limit)433 void BenchmarkImp::DenseRange(int start, int limit) {
434   CHECK(arg_count_ == -1 || arg_count_ == 1);
435   arg_count_ = 1;
436   CHECK_GE(start, 0);
437   CHECK_LE(start, limit);
438   for (int arg = start; arg <= limit; arg++) {
439     args_.emplace_back(arg, -1);
440   }
441 }
442 
ArgPair(int x,int y)443 void BenchmarkImp::ArgPair(int x, int y) {
444   CHECK(arg_count_ == -1 || arg_count_ == 2);
445   arg_count_ = 2;
446   args_.emplace_back(x, y);
447 }
448 
RangePair(int lo1,int hi1,int lo2,int hi2)449 void BenchmarkImp::RangePair(int lo1, int hi1, int lo2, int hi2) {
450   CHECK(arg_count_ == -1 || arg_count_ == 2);
451   arg_count_ = 2;
452   std::vector<int> arglist1, arglist2;
453   AddRange(&arglist1, lo1, hi1, kRangeMultiplier);
454   AddRange(&arglist2, lo2, hi2, kRangeMultiplier);
455 
456   for (int i : arglist1) {
457     for (int j : arglist2) {
458       args_.emplace_back(i, j);
459     }
460   }
461 }
462 
MinTime(double t)463 void BenchmarkImp::MinTime(double t) {
464   CHECK(t > 0.0);
465   min_time_ = t;
466 }
467 
UseRealTime()468 void BenchmarkImp::UseRealTime() {
469   use_real_time_ = true;
470 }
471 
Threads(int t)472 void BenchmarkImp::Threads(int t) {
473   CHECK_GT(t, 0);
474   thread_counts_.push_back(t);
475 }
476 
ThreadRange(int min_threads,int max_threads)477 void BenchmarkImp::ThreadRange(int min_threads, int max_threads) {
478   CHECK_GT(min_threads, 0);
479   CHECK_GE(max_threads, min_threads);
480 
481   AddRange(&thread_counts_, min_threads, max_threads, 2);
482 }
483 
ThreadPerCpu()484 void BenchmarkImp::ThreadPerCpu() {
485   static int num_cpus = NumCPUs();
486   thread_counts_.push_back(num_cpus);
487 }
488 
SetName(const char * name)489 void BenchmarkImp::SetName(const char* name) {
490   name_ = name;
491 }
492 
AddRange(std::vector<int> * dst,int lo,int hi,int mult)493 void BenchmarkImp::AddRange(std::vector<int>* dst, int lo, int hi, int mult) {
494   CHECK_GE(lo, 0);
495   CHECK_GE(hi, lo);
496 
497   // Add "lo"
498   dst->push_back(lo);
499 
500   static const int kint32max = std::numeric_limits<int32_t>::max();
501 
502   // Now space out the benchmarks in multiples of "mult"
503   for (int32_t i = 1; i < kint32max/mult; i *= mult) {
504     if (i >= hi) break;
505     if (i > lo) {
506       dst->push_back(i);
507     }
508   }
509   // Add "hi" (if different from "lo")
510   if (hi != lo) {
511     dst->push_back(hi);
512   }
513 }
514 
Benchmark(const char * name)515 Benchmark::Benchmark(const char* name)
516     : imp_(new BenchmarkImp(name))
517 {
518 }
519 
~Benchmark()520 Benchmark::~Benchmark()  {
521   delete imp_;
522 }
523 
Benchmark(Benchmark const & other)524 Benchmark::Benchmark(Benchmark const& other)
525   : imp_(new BenchmarkImp(*other.imp_))
526 {
527 }
528 
Arg(int x)529 Benchmark* Benchmark::Arg(int x) {
530   imp_->Arg(x);
531   return this;
532 }
533 
Range(int start,int limit)534 Benchmark* Benchmark::Range(int start, int limit) {
535   imp_->Range(start, limit);
536   return this;
537 }
538 
DenseRange(int start,int limit)539 Benchmark* Benchmark::DenseRange(int start, int limit) {
540   imp_->DenseRange(start, limit);
541   return this;
542 }
543 
ArgPair(int x,int y)544 Benchmark* Benchmark::ArgPair(int x, int y) {
545   imp_->ArgPair(x, y);
546   return this;
547 }
548 
RangePair(int lo1,int hi1,int lo2,int hi2)549 Benchmark* Benchmark::RangePair(int lo1, int hi1, int lo2, int hi2) {
550   imp_->RangePair(lo1, hi1, lo2, hi2);
551   return this;
552 }
553 
Apply(void (* custom_arguments)(Benchmark * benchmark))554 Benchmark* Benchmark::Apply(void (*custom_arguments)(Benchmark* benchmark)) {
555   custom_arguments(this);
556   return this;
557 }
558 
MinTime(double t)559 Benchmark* Benchmark::MinTime(double t) {
560   imp_->MinTime(t);
561   return this;
562 }
563 
UseRealTime()564 Benchmark* Benchmark::UseRealTime() {
565   imp_->UseRealTime();
566   return this;
567 }
568 
Threads(int t)569 Benchmark* Benchmark::Threads(int t) {
570   imp_->Threads(t);
571   return this;
572 }
573 
ThreadRange(int min_threads,int max_threads)574 Benchmark* Benchmark::ThreadRange(int min_threads, int max_threads) {
575   imp_->ThreadRange(min_threads, max_threads);
576   return this;
577 }
578 
ThreadPerCpu()579 Benchmark* Benchmark::ThreadPerCpu() {
580   imp_->ThreadPerCpu();
581   return this;
582 }
583 
SetName(const char * name)584 void Benchmark::SetName(const char* name) {
585   imp_->SetName(name);
586 }
587 
Run(State & st)588 void FunctionBenchmark::Run(State& st) {
589   func_(st);
590 }
591 
592 } // end namespace internal
593 
594 namespace {
595 
596 
597 // Execute one thread of benchmark b for the specified number of iterations.
598 // Adds the stats collected for the thread into *total.
RunInThread(const benchmark::internal::Benchmark::Instance * b,size_t iters,int thread_id,ThreadStats * total)599 void RunInThread(const benchmark::internal::Benchmark::Instance* b,
600                  size_t iters, int thread_id,
601                  ThreadStats* total) EXCLUDES(GetBenchmarkLock()) {
602   State st(iters, b->has_arg1, b->arg1, b->has_arg2, b->arg2, thread_id, b->threads);
603   b->benchmark->Run(st);
604   CHECK(st.iterations() == st.max_iterations) <<
605     "Benchmark returned before State::KeepRunning() returned false!";
606   {
607     MutexLock l(GetBenchmarkLock());
608     total->bytes_processed += st.bytes_processed();
609     total->items_processed += st.items_processed();
610   }
611 
612   timer_manager->Finalize();
613 }
614 
RunBenchmark(const benchmark::internal::Benchmark::Instance & b,BenchmarkReporter * br)615 void RunBenchmark(const benchmark::internal::Benchmark::Instance& b,
616                   BenchmarkReporter* br) EXCLUDES(GetBenchmarkLock()) {
617   size_t iters = 1;
618 
619   std::vector<BenchmarkReporter::Run> reports;
620 
621   std::vector<std::thread> pool;
622   if (b.multithreaded)
623     pool.resize(b.threads);
624 
625   for (int i = 0; i < FLAGS_benchmark_repetitions; i++) {
626     std::string mem;
627     for (;;) {
628       // Try benchmark
629       VLOG(2) << "Running " << b.name << " for " << iters << "\n";
630 
631       {
632         MutexLock l(GetBenchmarkLock());
633         GetReportLabel()->clear();
634       }
635 
636       Notification done;
637       timer_manager = std::unique_ptr<TimerManager>(new TimerManager(b.threads, &done));
638 
639       ThreadStats total;
640       running_benchmark = true;
641       if (b.multithreaded) {
642         // If this is out first iteration of the while(true) loop then the
643         // threads haven't been started and can't be joined. Otherwise we need
644         // to join the thread before replacing them.
645         for (std::thread& thread : pool) {
646           if (thread.joinable())
647             thread.join();
648         }
649         for (std::size_t ti = 0; ti < pool.size(); ++ti) {
650             pool[ti] = std::thread(&RunInThread, &b, iters, ti, &total);
651         }
652       } else {
653         // Run directly in this thread
654         RunInThread(&b, iters, 0, &total);
655       }
656       done.WaitForNotification();
657       running_benchmark = false;
658 
659       const double cpu_accumulated_time = timer_manager->cpu_time_used();
660       const double real_accumulated_time = timer_manager->real_time_used();
661       timer_manager.reset();
662 
663       VLOG(2) << "Ran in " << cpu_accumulated_time << "/"
664               << real_accumulated_time << "\n";
665 
666       // Base decisions off of real time if requested by this benchmark.
667       double seconds = cpu_accumulated_time;
668       if (b.use_real_time) {
669           seconds = real_accumulated_time;
670       }
671 
672       std::string label;
673       {
674         MutexLock l(GetBenchmarkLock());
675         label = *GetReportLabel();
676       }
677 
678       const double min_time = !IsZero(b.min_time) ? b.min_time
679                                                   : FLAGS_benchmark_min_time;
680 
681       // If this was the first run, was elapsed time or cpu time large enough?
682       // If this is not the first run, go with the current value of iter.
683       if ((i > 0) ||
684           (iters >= kMaxIterations) ||
685           (seconds >= min_time) ||
686           (real_accumulated_time >= 5*min_time)) {
687         double bytes_per_second = 0;
688         if (total.bytes_processed > 0 && seconds > 0.0) {
689           bytes_per_second = (total.bytes_processed / seconds);
690         }
691         double items_per_second = 0;
692         if (total.items_processed > 0 && seconds > 0.0) {
693           items_per_second = (total.items_processed / seconds);
694         }
695 
696         // Create report about this benchmark run.
697         BenchmarkReporter::Run report;
698         report.benchmark_name = b.name;
699         report.report_label = label;
700         // Report the total iterations across all threads.
701         report.iterations = static_cast<int64_t>(iters) * b.threads;
702         report.real_accumulated_time = real_accumulated_time;
703         report.cpu_accumulated_time = cpu_accumulated_time;
704         report.bytes_per_second = bytes_per_second;
705         report.items_per_second = items_per_second;
706         reports.push_back(report);
707         break;
708       }
709 
710       // See how much iterations should be increased by
711       // Note: Avoid division by zero with max(seconds, 1ns).
712       double multiplier = min_time * 1.4 / std::max(seconds, 1e-9);
713       // If our last run was at least 10% of FLAGS_benchmark_min_time then we
714       // use the multiplier directly. Otherwise we use at most 10 times
715       // expansion.
716       // NOTE: When the last run was at least 10% of the min time the max
717       // expansion should be 14x.
718       bool is_significant = (seconds / min_time) > 0.1;
719       multiplier = is_significant ? multiplier : std::min(10.0, multiplier);
720       if (multiplier <= 1.0) multiplier = 2.0;
721       double next_iters = std::max(multiplier * iters, iters + 1.0);
722       if (next_iters > kMaxIterations) {
723         next_iters = kMaxIterations;
724       }
725       VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n";
726       iters = static_cast<int>(next_iters + 0.5);
727     }
728   }
729   br->ReportRuns(reports);
730   if (b.multithreaded) {
731     for (std::thread& thread : pool)
732       thread.join();
733   }
734 }
735 
736 }  // namespace
737 
State(size_t max_iters,bool has_x,int x,bool has_y,int y,int thread_i,int n_threads)738 State::State(size_t max_iters, bool has_x, int x, bool has_y, int y,
739              int thread_i, int n_threads)
740     : started_(false), total_iterations_(0),
741       has_range_x_(has_x), range_x_(x),
742       has_range_y_(has_y), range_y_(y),
743       bytes_processed_(0), items_processed_(0),
744       thread_index(thread_i),
745       threads(n_threads),
746       max_iterations(max_iters)
747 {
748     CHECK(max_iterations != 0) << "At least one iteration must be run";
749     CHECK_LT(thread_index, threads) << "thread_index must be less than threads";
750 }
751 
PauseTiming()752 void State::PauseTiming() {
753   // Add in time accumulated so far
754   CHECK(running_benchmark);
755   timer_manager->StopTimer();
756 }
757 
ResumeTiming()758 void State::ResumeTiming() {
759   CHECK(running_benchmark);
760   timer_manager->StartTimer();
761 }
762 
SetLabel(const char * label)763 void State::SetLabel(const char* label) {
764   CHECK(running_benchmark);
765   MutexLock l(GetBenchmarkLock());
766   *GetReportLabel() = label;
767 }
768 
769 namespace internal {
770 namespace {
771 
PrintBenchmarkList()772 void PrintBenchmarkList() {
773   std::vector<Benchmark::Instance> benchmarks;
774   auto families = BenchmarkFamilies::GetInstance();
775   if (!families->FindBenchmarks(".", &benchmarks)) return;
776 
777   for (const internal::Benchmark::Instance& benchmark : benchmarks) {
778     std::cout <<  benchmark.name << "\n";
779   }
780 }
781 
RunMatchingBenchmarks(const std::string & spec,BenchmarkReporter * reporter)782 void RunMatchingBenchmarks(const std::string& spec,
783                            BenchmarkReporter* reporter) {
784   CHECK(reporter != nullptr);
785   if (spec.empty()) return;
786 
787   std::vector<Benchmark::Instance> benchmarks;
788   auto families = BenchmarkFamilies::GetInstance();
789   if (!families->FindBenchmarks(spec, &benchmarks)) return;
790 
791   // Determine the width of the name field using a minimum width of 10.
792   size_t name_field_width = 10;
793   for (const Benchmark::Instance& benchmark : benchmarks) {
794     name_field_width =
795         std::max<size_t>(name_field_width, benchmark.name.size());
796   }
797   if (FLAGS_benchmark_repetitions > 1)
798     name_field_width += std::strlen("_stddev");
799 
800   // Print header here
801   BenchmarkReporter::Context context;
802   context.num_cpus = NumCPUs();
803   context.mhz_per_cpu = CyclesPerSecond() / 1000000.0f;
804 
805   context.cpu_scaling_enabled = CpuScalingEnabled();
806   context.name_field_width = name_field_width;
807 
808   if (reporter->ReportContext(context)) {
809     for (const auto& benchmark : benchmarks) {
810       RunBenchmark(benchmark, reporter);
811     }
812   }
813 }
814 
GetDefaultReporter()815 std::unique_ptr<BenchmarkReporter> GetDefaultReporter() {
816   typedef std::unique_ptr<BenchmarkReporter> PtrType;
817   if (FLAGS_benchmark_format == "tabular") {
818     return PtrType(new ConsoleReporter);
819   } else if (FLAGS_benchmark_format == "json") {
820     return PtrType(new JSONReporter);
821   } else if (FLAGS_benchmark_format == "csv") {
822     return PtrType(new CSVReporter);
823   } else {
824     std::cerr << "Unexpected format: '" << FLAGS_benchmark_format << "'\n";
825     std::exit(1);
826   }
827 }
828 
829 } // end namespace
830 } // end namespace internal
831 
RunSpecifiedBenchmarks()832 void RunSpecifiedBenchmarks() {
833   RunSpecifiedBenchmarks(nullptr);
834 }
835 
RunSpecifiedBenchmarks(BenchmarkReporter * reporter)836 void RunSpecifiedBenchmarks(BenchmarkReporter* reporter) {
837   if (FLAGS_benchmark_list_tests) {
838     internal::PrintBenchmarkList();
839     return;
840   }
841   std::string spec = FLAGS_benchmark_filter;
842   if (spec.empty() || spec == "all")
843     spec = ".";  // Regexp that matches all benchmarks
844 
845   std::unique_ptr<BenchmarkReporter> default_reporter;
846   if (!reporter) {
847     default_reporter = internal::GetDefaultReporter();
848     reporter = default_reporter.get();
849   }
850   internal::RunMatchingBenchmarks(spec, reporter);
851   reporter->Finalize();
852 }
853 
854 namespace internal {
855 
PrintUsageAndExit()856 void PrintUsageAndExit() {
857   fprintf(stdout,
858           "benchmark"
859           " [--benchmark_list_tests={true|false}]\n"
860           "          [--benchmark_filter=<regex>]\n"
861           "          [--benchmark_min_time=<min_time>]\n"
862           "          [--benchmark_repetitions=<num_repetitions>]\n"
863           "          [--benchmark_format=<tabular|json|csv>]\n"
864           "          [--color_print={true|false}]\n"
865           "          [--v=<verbosity>]\n");
866   exit(0);
867 }
868 
ParseCommandLineFlags(int * argc,char ** argv)869 void ParseCommandLineFlags(int* argc, char** argv) {
870   using namespace benchmark;
871   for (int i = 1; i < *argc; ++i) {
872     if (
873         ParseBoolFlag(argv[i], "benchmark_list_tests",
874                       &FLAGS_benchmark_list_tests) ||
875         ParseStringFlag(argv[i], "benchmark_filter",
876                         &FLAGS_benchmark_filter) ||
877         ParseDoubleFlag(argv[i], "benchmark_min_time",
878                         &FLAGS_benchmark_min_time) ||
879         ParseInt32Flag(argv[i], "benchmark_repetitions",
880                        &FLAGS_benchmark_repetitions) ||
881         ParseStringFlag(argv[i], "benchmark_format",
882                         &FLAGS_benchmark_format) ||
883         ParseBoolFlag(argv[i], "color_print",
884                        &FLAGS_color_print) ||
885         ParseInt32Flag(argv[i], "v", &FLAGS_v)) {
886       for (int j = i; j != *argc; ++j) argv[j] = argv[j + 1];
887 
888       --(*argc);
889       --i;
890     } else if (IsFlag(argv[i], "help")) {
891       PrintUsageAndExit();
892     }
893   }
894   if (FLAGS_benchmark_format != "tabular" &&
895       FLAGS_benchmark_format != "json" &&
896       FLAGS_benchmark_format != "csv") {
897     PrintUsageAndExit();
898   }
899 }
900 
RegisterBenchmarkInternal(Benchmark * bench)901 Benchmark* RegisterBenchmarkInternal(Benchmark* bench) {
902     std::unique_ptr<Benchmark> bench_ptr(bench);
903     BenchmarkFamilies* families = BenchmarkFamilies::GetInstance();
904     families->AddBenchmark(std::move(bench_ptr));
905     return bench;
906 }
907 
908 } // end namespace internal
909 
Initialize(int * argc,char ** argv)910 void Initialize(int* argc, char** argv) {
911   internal::ParseCommandLineFlags(argc, argv);
912   internal::SetLogLevel(FLAGS_v);
913   // TODO remove this. It prints some output the first time it is called.
914   // We don't want to have this ouput printed during benchmarking.
915   MyCPUUsage();
916   // The first call to walltime::Now initialized it. Call it once to
917   // prevent the initialization from happening in a benchmark.
918   walltime::Now();
919 }
920 
921 } // end namespace benchmark
922