1 // Copyright 2015 Google Inc. All rights reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "benchmark_register.h"
16 
17 #ifndef BENCHMARK_OS_WINDOWS
18 #ifndef BENCHMARK_OS_FUCHSIA
19 #include <sys/resource.h>
20 #endif
21 #include <sys/time.h>
22 #include <unistd.h>
23 #endif
24 
25 #include <algorithm>
26 #include <atomic>
27 #include <condition_variable>
28 #include <cstdio>
29 #include <cstdlib>
30 #include <cstring>
31 #include <fstream>
32 #include <iostream>
33 #include <memory>
34 #include <numeric>
35 #include <sstream>
36 #include <thread>
37 
38 #ifndef __STDC_FORMAT_MACROS
39 #define __STDC_FORMAT_MACROS
40 #endif
41 #include <inttypes.h>
42 
43 #include "benchmark/benchmark.h"
44 #include "benchmark_api_internal.h"
45 #include "check.h"
46 #include "commandlineflags.h"
47 #include "complexity.h"
48 #include "internal_macros.h"
49 #include "log.h"
50 #include "mutex.h"
51 #include "re.h"
52 #include "statistics.h"
53 #include "string_util.h"
54 #include "timers.h"
55 
56 namespace benchmark {
57 
58 namespace {
59 // For non-dense Range, intermediate values are powers of kRangeMultiplier.
60 static const int kRangeMultiplier = 8;
61 // The size of a benchmark family determines is the number of inputs to repeat
62 // the benchmark on. If this is "large" then warn the user during configuration.
63 static const size_t kMaxFamilySize = 100;
64 }  // end namespace
65 
66 namespace internal {
67 
68 //=============================================================================//
69 //                         BenchmarkFamilies
70 //=============================================================================//
71 
72 // Class for managing registered benchmarks.  Note that each registered
73 // benchmark identifies a family of related benchmarks to run.
74 class BenchmarkFamilies {
75  public:
76   static BenchmarkFamilies* GetInstance();
77 
78   // Registers a benchmark family and returns the index assigned to it.
79   size_t AddBenchmark(std::unique_ptr<Benchmark> family);
80 
81   // Clear all registered benchmark families.
82   void ClearBenchmarks();
83 
84   // Extract the list of benchmark instances that match the specified
85   // regular expression.
86   bool FindBenchmarks(std::string re,
87                       std::vector<BenchmarkInstance>* benchmarks,
88                       std::ostream* Err);
89 
90  private:
BenchmarkFamilies()91   BenchmarkFamilies() {}
92 
93   std::vector<std::unique_ptr<Benchmark>> families_;
94   Mutex mutex_;
95 };
96 
GetInstance()97 BenchmarkFamilies* BenchmarkFamilies::GetInstance() {
98   static BenchmarkFamilies instance;
99   return &instance;
100 }
101 
AddBenchmark(std::unique_ptr<Benchmark> family)102 size_t BenchmarkFamilies::AddBenchmark(std::unique_ptr<Benchmark> family) {
103   MutexLock l(mutex_);
104   size_t index = families_.size();
105   families_.push_back(std::move(family));
106   return index;
107 }
108 
ClearBenchmarks()109 void BenchmarkFamilies::ClearBenchmarks() {
110   MutexLock l(mutex_);
111   families_.clear();
112   families_.shrink_to_fit();
113 }
114 
FindBenchmarks(std::string spec,std::vector<BenchmarkInstance> * benchmarks,std::ostream * ErrStream)115 bool BenchmarkFamilies::FindBenchmarks(
116     std::string spec, std::vector<BenchmarkInstance>* benchmarks,
117     std::ostream* ErrStream) {
118   CHECK(ErrStream);
119   auto& Err = *ErrStream;
120   // Make regular expression out of command-line flag
121   std::string error_msg;
122   Regex re;
123   bool isNegativeFilter = false;
124   if (spec[0] == '-') {
125     spec.replace(0, 1, "");
126     isNegativeFilter = true;
127   }
128   if (!re.Init(spec, &error_msg)) {
129     Err << "Could not compile benchmark re: " << error_msg << std::endl;
130     return false;
131   }
132 
133   // Special list of thread counts to use when none are specified
134   const std::vector<int> one_thread = {1};
135 
136   MutexLock l(mutex_);
137   for (std::unique_ptr<Benchmark>& family : families_) {
138     // Family was deleted or benchmark doesn't match
139     if (!family) continue;
140 
141     if (family->ArgsCnt() == -1) {
142       family->Args({});
143     }
144     const std::vector<int>* thread_counts =
145         (family->thread_counts_.empty()
146              ? &one_thread
147              : &static_cast<const std::vector<int>&>(family->thread_counts_));
148     const size_t family_size = family->args_.size() * thread_counts->size();
149     // The benchmark will be run at least 'family_size' different inputs.
150     // If 'family_size' is very large warn the user.
151     if (family_size > kMaxFamilySize) {
152       Err << "The number of inputs is very large. " << family->name_
153           << " will be repeated at least " << family_size << " times.\n";
154     }
155     // reserve in the special case the regex ".", since we know the final
156     // family size.
157     if (spec == ".") benchmarks->reserve(family_size);
158 
159     for (auto const& args : family->args_) {
160       for (int num_threads : *thread_counts) {
161         BenchmarkInstance instance;
162         instance.name.function_name = family->name_;
163         instance.benchmark = family.get();
164         instance.aggregation_report_mode = family->aggregation_report_mode_;
165         instance.arg = args;
166         instance.time_unit = family->time_unit_;
167         instance.range_multiplier = family->range_multiplier_;
168         instance.min_time = family->min_time_;
169         instance.iterations = family->iterations_;
170         instance.repetitions = family->repetitions_;
171         instance.measure_process_cpu_time = family->measure_process_cpu_time_;
172         instance.use_real_time = family->use_real_time_;
173         instance.use_manual_time = family->use_manual_time_;
174         instance.complexity = family->complexity_;
175         instance.complexity_lambda = family->complexity_lambda_;
176         instance.statistics = &family->statistics_;
177         instance.threads = num_threads;
178 
179         // Add arguments to instance name
180         size_t arg_i = 0;
181         for (auto const& arg : args) {
182           if (!instance.name.args.empty()) {
183             instance.name.args += '/';
184           }
185 
186           if (arg_i < family->arg_names_.size()) {
187             const auto& arg_name = family->arg_names_[arg_i];
188             if (!arg_name.empty()) {
189               instance.name.args += StrFormat("%s:", arg_name.c_str());
190             }
191           }
192 
193           instance.name.args += StrFormat("%" PRId64, arg);
194           ++arg_i;
195         }
196 
197         if (!IsZero(family->min_time_))
198           instance.name.min_time =
199               StrFormat("min_time:%0.3f", family->min_time_);
200         if (family->iterations_ != 0) {
201           instance.name.iterations =
202               StrFormat("iterations:%lu",
203                         static_cast<unsigned long>(family->iterations_));
204         }
205         if (family->repetitions_ != 0)
206           instance.name.repetitions =
207               StrFormat("repeats:%d", family->repetitions_);
208 
209         if (family->measure_process_cpu_time_) {
210           instance.name.time_type = "process_time";
211         }
212 
213         if (family->use_manual_time_) {
214           if (!instance.name.time_type.empty()) {
215             instance.name.time_type += '/';
216           }
217           instance.name.time_type += "manual_time";
218         } else if (family->use_real_time_) {
219           if (!instance.name.time_type.empty()) {
220             instance.name.time_type += '/';
221           }
222           instance.name.time_type += "real_time";
223         }
224 
225         // Add the number of threads used to the name
226         if (!family->thread_counts_.empty()) {
227           instance.name.threads = StrFormat("threads:%d", instance.threads);
228         }
229 
230         const auto full_name = instance.name.str();
231         if ((re.Match(full_name) && !isNegativeFilter) ||
232             (!re.Match(full_name) && isNegativeFilter)) {
233           instance.last_benchmark_instance = (&args == &family->args_.back());
234           benchmarks->push_back(std::move(instance));
235         }
236       }
237     }
238   }
239   return true;
240 }
241 
RegisterBenchmarkInternal(Benchmark * bench)242 Benchmark* RegisterBenchmarkInternal(Benchmark* bench) {
243   std::unique_ptr<Benchmark> bench_ptr(bench);
244   BenchmarkFamilies* families = BenchmarkFamilies::GetInstance();
245   families->AddBenchmark(std::move(bench_ptr));
246   return bench;
247 }
248 
249 // FIXME: This function is a hack so that benchmark.cc can access
250 // `BenchmarkFamilies`
FindBenchmarksInternal(const std::string & re,std::vector<BenchmarkInstance> * benchmarks,std::ostream * Err)251 bool FindBenchmarksInternal(const std::string& re,
252                             std::vector<BenchmarkInstance>* benchmarks,
253                             std::ostream* Err) {
254   return BenchmarkFamilies::GetInstance()->FindBenchmarks(re, benchmarks, Err);
255 }
256 
257 //=============================================================================//
258 //                               Benchmark
259 //=============================================================================//
260 
Benchmark(const char * name)261 Benchmark::Benchmark(const char* name)
262     : name_(name),
263       aggregation_report_mode_(ARM_Unspecified),
264       time_unit_(kNanosecond),
265       range_multiplier_(kRangeMultiplier),
266       min_time_(0),
267       iterations_(0),
268       repetitions_(0),
269       measure_process_cpu_time_(false),
270       use_real_time_(false),
271       use_manual_time_(false),
272       complexity_(oNone),
273       complexity_lambda_(nullptr) {
274   ComputeStatistics("mean", StatisticsMean);
275   ComputeStatistics("median", StatisticsMedian);
276   ComputeStatistics("stddev", StatisticsStdDev);
277 }
278 
~Benchmark()279 Benchmark::~Benchmark() {}
280 
Arg(int64_t x)281 Benchmark* Benchmark::Arg(int64_t x) {
282   CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
283   args_.push_back({x});
284   return this;
285 }
286 
Unit(TimeUnit unit)287 Benchmark* Benchmark::Unit(TimeUnit unit) {
288   time_unit_ = unit;
289   return this;
290 }
291 
Range(int64_t start,int64_t limit)292 Benchmark* Benchmark::Range(int64_t start, int64_t limit) {
293   CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
294   std::vector<int64_t> arglist;
295   AddRange(&arglist, start, limit, range_multiplier_);
296 
297   for (int64_t i : arglist) {
298     args_.push_back({i});
299   }
300   return this;
301 }
302 
Ranges(const std::vector<std::pair<int64_t,int64_t>> & ranges)303 Benchmark* Benchmark::Ranges(
304     const std::vector<std::pair<int64_t, int64_t>>& ranges) {
305   CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(ranges.size()));
306   std::vector<std::vector<int64_t>> arglists(ranges.size());
307   for (std::size_t i = 0; i < ranges.size(); i++) {
308     AddRange(&arglists[i], ranges[i].first, ranges[i].second,
309              range_multiplier_);
310   }
311 
312   ArgsProduct(arglists);
313 
314   return this;
315 }
316 
ArgsProduct(const std::vector<std::vector<int64_t>> & arglists)317 Benchmark* Benchmark::ArgsProduct(
318     const std::vector<std::vector<int64_t>>& arglists) {
319   CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(arglists.size()));
320 
321   std::vector<std::size_t> indices(arglists.size());
322   const std::size_t total = std::accumulate(
323       std::begin(arglists), std::end(arglists), std::size_t{1},
324       [](const std::size_t res, const std::vector<int64_t>& arglist) {
325         return res * arglist.size();
326       });
327   std::vector<int64_t> args;
328   args.reserve(arglists.size());
329   for (std::size_t i = 0; i < total; i++) {
330     for (std::size_t arg = 0; arg < arglists.size(); arg++) {
331       args.push_back(arglists[arg][indices[arg]]);
332     }
333     args_.push_back(args);
334     args.clear();
335 
336     std::size_t arg = 0;
337     do {
338       indices[arg] = (indices[arg] + 1) % arglists[arg].size();
339     } while (indices[arg++] == 0 && arg < arglists.size());
340   }
341 
342   return this;
343 }
344 
ArgName(const std::string & name)345 Benchmark* Benchmark::ArgName(const std::string& name) {
346   CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
347   arg_names_ = {name};
348   return this;
349 }
350 
ArgNames(const std::vector<std::string> & names)351 Benchmark* Benchmark::ArgNames(const std::vector<std::string>& names) {
352   CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(names.size()));
353   arg_names_ = names;
354   return this;
355 }
356 
DenseRange(int64_t start,int64_t limit,int step)357 Benchmark* Benchmark::DenseRange(int64_t start, int64_t limit, int step) {
358   CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
359   CHECK_LE(start, limit);
360   for (int64_t arg = start; arg <= limit; arg += step) {
361     args_.push_back({arg});
362   }
363   return this;
364 }
365 
Args(const std::vector<int64_t> & args)366 Benchmark* Benchmark::Args(const std::vector<int64_t>& args) {
367   CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(args.size()));
368   args_.push_back(args);
369   return this;
370 }
371 
Apply(void (* custom_arguments)(Benchmark * benchmark))372 Benchmark* Benchmark::Apply(void (*custom_arguments)(Benchmark* benchmark)) {
373   custom_arguments(this);
374   return this;
375 }
376 
RangeMultiplier(int multiplier)377 Benchmark* Benchmark::RangeMultiplier(int multiplier) {
378   CHECK(multiplier > 1);
379   range_multiplier_ = multiplier;
380   return this;
381 }
382 
MinTime(double t)383 Benchmark* Benchmark::MinTime(double t) {
384   CHECK(t > 0.0);
385   CHECK(iterations_ == 0);
386   min_time_ = t;
387   return this;
388 }
389 
Iterations(IterationCount n)390 Benchmark* Benchmark::Iterations(IterationCount n) {
391   CHECK(n > 0);
392   CHECK(IsZero(min_time_));
393   iterations_ = n;
394   return this;
395 }
396 
Repetitions(int n)397 Benchmark* Benchmark::Repetitions(int n) {
398   CHECK(n > 0);
399   repetitions_ = n;
400   return this;
401 }
402 
ReportAggregatesOnly(bool value)403 Benchmark* Benchmark::ReportAggregatesOnly(bool value) {
404   aggregation_report_mode_ = value ? ARM_ReportAggregatesOnly : ARM_Default;
405   return this;
406 }
407 
DisplayAggregatesOnly(bool value)408 Benchmark* Benchmark::DisplayAggregatesOnly(bool value) {
409   // If we were called, the report mode is no longer 'unspecified', in any case.
410   aggregation_report_mode_ = static_cast<AggregationReportMode>(
411       aggregation_report_mode_ | ARM_Default);
412 
413   if (value) {
414     aggregation_report_mode_ = static_cast<AggregationReportMode>(
415         aggregation_report_mode_ | ARM_DisplayReportAggregatesOnly);
416   } else {
417     aggregation_report_mode_ = static_cast<AggregationReportMode>(
418         aggregation_report_mode_ & ~ARM_DisplayReportAggregatesOnly);
419   }
420 
421   return this;
422 }
423 
MeasureProcessCPUTime()424 Benchmark* Benchmark::MeasureProcessCPUTime() {
425   // Can be used together with UseRealTime() / UseManualTime().
426   measure_process_cpu_time_ = true;
427   return this;
428 }
429 
UseRealTime()430 Benchmark* Benchmark::UseRealTime() {
431   CHECK(!use_manual_time_)
432       << "Cannot set UseRealTime and UseManualTime simultaneously.";
433   use_real_time_ = true;
434   return this;
435 }
436 
UseManualTime()437 Benchmark* Benchmark::UseManualTime() {
438   CHECK(!use_real_time_)
439       << "Cannot set UseRealTime and UseManualTime simultaneously.";
440   use_manual_time_ = true;
441   return this;
442 }
443 
Complexity(BigO complexity)444 Benchmark* Benchmark::Complexity(BigO complexity) {
445   complexity_ = complexity;
446   return this;
447 }
448 
Complexity(BigOFunc * complexity)449 Benchmark* Benchmark::Complexity(BigOFunc* complexity) {
450   complexity_lambda_ = complexity;
451   complexity_ = oLambda;
452   return this;
453 }
454 
ComputeStatistics(std::string name,StatisticsFunc * statistics)455 Benchmark* Benchmark::ComputeStatistics(std::string name,
456                                         StatisticsFunc* statistics) {
457   statistics_.emplace_back(name, statistics);
458   return this;
459 }
460 
Threads(int t)461 Benchmark* Benchmark::Threads(int t) {
462   CHECK_GT(t, 0);
463   thread_counts_.push_back(t);
464   return this;
465 }
466 
ThreadRange(int min_threads,int max_threads)467 Benchmark* Benchmark::ThreadRange(int min_threads, int max_threads) {
468   CHECK_GT(min_threads, 0);
469   CHECK_GE(max_threads, min_threads);
470 
471   AddRange(&thread_counts_, min_threads, max_threads, 2);
472   return this;
473 }
474 
DenseThreadRange(int min_threads,int max_threads,int stride)475 Benchmark* Benchmark::DenseThreadRange(int min_threads, int max_threads,
476                                        int stride) {
477   CHECK_GT(min_threads, 0);
478   CHECK_GE(max_threads, min_threads);
479   CHECK_GE(stride, 1);
480 
481   for (auto i = min_threads; i < max_threads; i += stride) {
482     thread_counts_.push_back(i);
483   }
484   thread_counts_.push_back(max_threads);
485   return this;
486 }
487 
ThreadPerCpu()488 Benchmark* Benchmark::ThreadPerCpu() {
489   thread_counts_.push_back(CPUInfo::Get().num_cpus);
490   return this;
491 }
492 
SetName(const char * name)493 void Benchmark::SetName(const char* name) { name_ = name; }
494 
ArgsCnt() const495 int Benchmark::ArgsCnt() const {
496   if (args_.empty()) {
497     if (arg_names_.empty()) return -1;
498     return static_cast<int>(arg_names_.size());
499   }
500   return static_cast<int>(args_.front().size());
501 }
502 
503 //=============================================================================//
504 //                            FunctionBenchmark
505 //=============================================================================//
506 
Run(State & st)507 void FunctionBenchmark::Run(State& st) { func_(st); }
508 
509 }  // end namespace internal
510 
ClearRegisteredBenchmarks()511 void ClearRegisteredBenchmarks() {
512   internal::BenchmarkFamilies::GetInstance()->ClearBenchmarks();
513 }
514 
515 }  // end namespace benchmark
516