1 // Copyright 2015 Google Inc. All rights reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "benchmark_register.h"
16
17 #ifndef BENCHMARK_OS_WINDOWS
18 #ifndef BENCHMARK_OS_FUCHSIA
19 #include <sys/resource.h>
20 #endif
21 #include <sys/time.h>
22 #include <unistd.h>
23 #endif
24
25 #include <algorithm>
26 #include <atomic>
27 #include <condition_variable>
28 #include <cstdio>
29 #include <cstdlib>
30 #include <cstring>
31 #include <fstream>
32 #include <iostream>
33 #include <memory>
34 #include <numeric>
35 #include <sstream>
36 #include <thread>
37
38 #ifndef __STDC_FORMAT_MACROS
39 #define __STDC_FORMAT_MACROS
40 #endif
41 #include <inttypes.h>
42
43 #include "benchmark/benchmark.h"
44 #include "benchmark_api_internal.h"
45 #include "check.h"
46 #include "commandlineflags.h"
47 #include "complexity.h"
48 #include "internal_macros.h"
49 #include "log.h"
50 #include "mutex.h"
51 #include "re.h"
52 #include "statistics.h"
53 #include "string_util.h"
54 #include "timers.h"
55
56 namespace benchmark {
57
58 namespace {
59 // For non-dense Range, intermediate values are powers of kRangeMultiplier.
60 static const int kRangeMultiplier = 8;
61 // The size of a benchmark family determines is the number of inputs to repeat
62 // the benchmark on. If this is "large" then warn the user during configuration.
63 static const size_t kMaxFamilySize = 100;
64 } // end namespace
65
66 namespace internal {
67
68 //=============================================================================//
69 // BenchmarkFamilies
70 //=============================================================================//
71
72 // Class for managing registered benchmarks. Note that each registered
73 // benchmark identifies a family of related benchmarks to run.
74 class BenchmarkFamilies {
75 public:
76 static BenchmarkFamilies* GetInstance();
77
78 // Registers a benchmark family and returns the index assigned to it.
79 size_t AddBenchmark(std::unique_ptr<Benchmark> family);
80
81 // Clear all registered benchmark families.
82 void ClearBenchmarks();
83
84 // Extract the list of benchmark instances that match the specified
85 // regular expression.
86 bool FindBenchmarks(std::string re,
87 std::vector<BenchmarkInstance>* benchmarks,
88 std::ostream* Err);
89
90 private:
BenchmarkFamilies()91 BenchmarkFamilies() {}
92
93 std::vector<std::unique_ptr<Benchmark>> families_;
94 Mutex mutex_;
95 };
96
GetInstance()97 BenchmarkFamilies* BenchmarkFamilies::GetInstance() {
98 static BenchmarkFamilies instance;
99 return &instance;
100 }
101
AddBenchmark(std::unique_ptr<Benchmark> family)102 size_t BenchmarkFamilies::AddBenchmark(std::unique_ptr<Benchmark> family) {
103 MutexLock l(mutex_);
104 size_t index = families_.size();
105 families_.push_back(std::move(family));
106 return index;
107 }
108
ClearBenchmarks()109 void BenchmarkFamilies::ClearBenchmarks() {
110 MutexLock l(mutex_);
111 families_.clear();
112 families_.shrink_to_fit();
113 }
114
FindBenchmarks(std::string spec,std::vector<BenchmarkInstance> * benchmarks,std::ostream * ErrStream)115 bool BenchmarkFamilies::FindBenchmarks(
116 std::string spec, std::vector<BenchmarkInstance>* benchmarks,
117 std::ostream* ErrStream) {
118 CHECK(ErrStream);
119 auto& Err = *ErrStream;
120 // Make regular expression out of command-line flag
121 std::string error_msg;
122 Regex re;
123 bool isNegativeFilter = false;
124 if (spec[0] == '-') {
125 spec.replace(0, 1, "");
126 isNegativeFilter = true;
127 }
128 if (!re.Init(spec, &error_msg)) {
129 Err << "Could not compile benchmark re: " << error_msg << std::endl;
130 return false;
131 }
132
133 // Special list of thread counts to use when none are specified
134 const std::vector<int> one_thread = {1};
135
136 MutexLock l(mutex_);
137 for (std::unique_ptr<Benchmark>& family : families_) {
138 // Family was deleted or benchmark doesn't match
139 if (!family) continue;
140
141 if (family->ArgsCnt() == -1) {
142 family->Args({});
143 }
144 const std::vector<int>* thread_counts =
145 (family->thread_counts_.empty()
146 ? &one_thread
147 : &static_cast<const std::vector<int>&>(family->thread_counts_));
148 const size_t family_size = family->args_.size() * thread_counts->size();
149 // The benchmark will be run at least 'family_size' different inputs.
150 // If 'family_size' is very large warn the user.
151 if (family_size > kMaxFamilySize) {
152 Err << "The number of inputs is very large. " << family->name_
153 << " will be repeated at least " << family_size << " times.\n";
154 }
155 // reserve in the special case the regex ".", since we know the final
156 // family size.
157 if (spec == ".") benchmarks->reserve(family_size);
158
159 for (auto const& args : family->args_) {
160 for (int num_threads : *thread_counts) {
161 BenchmarkInstance instance;
162 instance.name.function_name = family->name_;
163 instance.benchmark = family.get();
164 instance.aggregation_report_mode = family->aggregation_report_mode_;
165 instance.arg = args;
166 instance.time_unit = family->time_unit_;
167 instance.range_multiplier = family->range_multiplier_;
168 instance.min_time = family->min_time_;
169 instance.iterations = family->iterations_;
170 instance.repetitions = family->repetitions_;
171 instance.measure_process_cpu_time = family->measure_process_cpu_time_;
172 instance.use_real_time = family->use_real_time_;
173 instance.use_manual_time = family->use_manual_time_;
174 instance.complexity = family->complexity_;
175 instance.complexity_lambda = family->complexity_lambda_;
176 instance.statistics = &family->statistics_;
177 instance.threads = num_threads;
178
179 // Add arguments to instance name
180 size_t arg_i = 0;
181 for (auto const& arg : args) {
182 if (!instance.name.args.empty()) {
183 instance.name.args += '/';
184 }
185
186 if (arg_i < family->arg_names_.size()) {
187 const auto& arg_name = family->arg_names_[arg_i];
188 if (!arg_name.empty()) {
189 instance.name.args += StrFormat("%s:", arg_name.c_str());
190 }
191 }
192
193 instance.name.args += StrFormat("%" PRId64, arg);
194 ++arg_i;
195 }
196
197 if (!IsZero(family->min_time_))
198 instance.name.min_time =
199 StrFormat("min_time:%0.3f", family->min_time_);
200 if (family->iterations_ != 0) {
201 instance.name.iterations =
202 StrFormat("iterations:%lu",
203 static_cast<unsigned long>(family->iterations_));
204 }
205 if (family->repetitions_ != 0)
206 instance.name.repetitions =
207 StrFormat("repeats:%d", family->repetitions_);
208
209 if (family->measure_process_cpu_time_) {
210 instance.name.time_type = "process_time";
211 }
212
213 if (family->use_manual_time_) {
214 if (!instance.name.time_type.empty()) {
215 instance.name.time_type += '/';
216 }
217 instance.name.time_type += "manual_time";
218 } else if (family->use_real_time_) {
219 if (!instance.name.time_type.empty()) {
220 instance.name.time_type += '/';
221 }
222 instance.name.time_type += "real_time";
223 }
224
225 // Add the number of threads used to the name
226 if (!family->thread_counts_.empty()) {
227 instance.name.threads = StrFormat("threads:%d", instance.threads);
228 }
229
230 const auto full_name = instance.name.str();
231 if ((re.Match(full_name) && !isNegativeFilter) ||
232 (!re.Match(full_name) && isNegativeFilter)) {
233 instance.last_benchmark_instance = (&args == &family->args_.back());
234 benchmarks->push_back(std::move(instance));
235 }
236 }
237 }
238 }
239 return true;
240 }
241
RegisterBenchmarkInternal(Benchmark * bench)242 Benchmark* RegisterBenchmarkInternal(Benchmark* bench) {
243 std::unique_ptr<Benchmark> bench_ptr(bench);
244 BenchmarkFamilies* families = BenchmarkFamilies::GetInstance();
245 families->AddBenchmark(std::move(bench_ptr));
246 return bench;
247 }
248
249 // FIXME: This function is a hack so that benchmark.cc can access
250 // `BenchmarkFamilies`
FindBenchmarksInternal(const std::string & re,std::vector<BenchmarkInstance> * benchmarks,std::ostream * Err)251 bool FindBenchmarksInternal(const std::string& re,
252 std::vector<BenchmarkInstance>* benchmarks,
253 std::ostream* Err) {
254 return BenchmarkFamilies::GetInstance()->FindBenchmarks(re, benchmarks, Err);
255 }
256
257 //=============================================================================//
258 // Benchmark
259 //=============================================================================//
260
Benchmark(const char * name)261 Benchmark::Benchmark(const char* name)
262 : name_(name),
263 aggregation_report_mode_(ARM_Unspecified),
264 time_unit_(kNanosecond),
265 range_multiplier_(kRangeMultiplier),
266 min_time_(0),
267 iterations_(0),
268 repetitions_(0),
269 measure_process_cpu_time_(false),
270 use_real_time_(false),
271 use_manual_time_(false),
272 complexity_(oNone),
273 complexity_lambda_(nullptr) {
274 ComputeStatistics("mean", StatisticsMean);
275 ComputeStatistics("median", StatisticsMedian);
276 ComputeStatistics("stddev", StatisticsStdDev);
277 }
278
~Benchmark()279 Benchmark::~Benchmark() {}
280
Arg(int64_t x)281 Benchmark* Benchmark::Arg(int64_t x) {
282 CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
283 args_.push_back({x});
284 return this;
285 }
286
Unit(TimeUnit unit)287 Benchmark* Benchmark::Unit(TimeUnit unit) {
288 time_unit_ = unit;
289 return this;
290 }
291
Range(int64_t start,int64_t limit)292 Benchmark* Benchmark::Range(int64_t start, int64_t limit) {
293 CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
294 std::vector<int64_t> arglist;
295 AddRange(&arglist, start, limit, range_multiplier_);
296
297 for (int64_t i : arglist) {
298 args_.push_back({i});
299 }
300 return this;
301 }
302
Ranges(const std::vector<std::pair<int64_t,int64_t>> & ranges)303 Benchmark* Benchmark::Ranges(
304 const std::vector<std::pair<int64_t, int64_t>>& ranges) {
305 CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(ranges.size()));
306 std::vector<std::vector<int64_t>> arglists(ranges.size());
307 for (std::size_t i = 0; i < ranges.size(); i++) {
308 AddRange(&arglists[i], ranges[i].first, ranges[i].second,
309 range_multiplier_);
310 }
311
312 ArgsProduct(arglists);
313
314 return this;
315 }
316
ArgsProduct(const std::vector<std::vector<int64_t>> & arglists)317 Benchmark* Benchmark::ArgsProduct(
318 const std::vector<std::vector<int64_t>>& arglists) {
319 CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(arglists.size()));
320
321 std::vector<std::size_t> indices(arglists.size());
322 const std::size_t total = std::accumulate(
323 std::begin(arglists), std::end(arglists), std::size_t{1},
324 [](const std::size_t res, const std::vector<int64_t>& arglist) {
325 return res * arglist.size();
326 });
327 std::vector<int64_t> args;
328 args.reserve(arglists.size());
329 for (std::size_t i = 0; i < total; i++) {
330 for (std::size_t arg = 0; arg < arglists.size(); arg++) {
331 args.push_back(arglists[arg][indices[arg]]);
332 }
333 args_.push_back(args);
334 args.clear();
335
336 std::size_t arg = 0;
337 do {
338 indices[arg] = (indices[arg] + 1) % arglists[arg].size();
339 } while (indices[arg++] == 0 && arg < arglists.size());
340 }
341
342 return this;
343 }
344
ArgName(const std::string & name)345 Benchmark* Benchmark::ArgName(const std::string& name) {
346 CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
347 arg_names_ = {name};
348 return this;
349 }
350
ArgNames(const std::vector<std::string> & names)351 Benchmark* Benchmark::ArgNames(const std::vector<std::string>& names) {
352 CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(names.size()));
353 arg_names_ = names;
354 return this;
355 }
356
DenseRange(int64_t start,int64_t limit,int step)357 Benchmark* Benchmark::DenseRange(int64_t start, int64_t limit, int step) {
358 CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
359 CHECK_LE(start, limit);
360 for (int64_t arg = start; arg <= limit; arg += step) {
361 args_.push_back({arg});
362 }
363 return this;
364 }
365
Args(const std::vector<int64_t> & args)366 Benchmark* Benchmark::Args(const std::vector<int64_t>& args) {
367 CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(args.size()));
368 args_.push_back(args);
369 return this;
370 }
371
Apply(void (* custom_arguments)(Benchmark * benchmark))372 Benchmark* Benchmark::Apply(void (*custom_arguments)(Benchmark* benchmark)) {
373 custom_arguments(this);
374 return this;
375 }
376
RangeMultiplier(int multiplier)377 Benchmark* Benchmark::RangeMultiplier(int multiplier) {
378 CHECK(multiplier > 1);
379 range_multiplier_ = multiplier;
380 return this;
381 }
382
MinTime(double t)383 Benchmark* Benchmark::MinTime(double t) {
384 CHECK(t > 0.0);
385 CHECK(iterations_ == 0);
386 min_time_ = t;
387 return this;
388 }
389
Iterations(IterationCount n)390 Benchmark* Benchmark::Iterations(IterationCount n) {
391 CHECK(n > 0);
392 CHECK(IsZero(min_time_));
393 iterations_ = n;
394 return this;
395 }
396
Repetitions(int n)397 Benchmark* Benchmark::Repetitions(int n) {
398 CHECK(n > 0);
399 repetitions_ = n;
400 return this;
401 }
402
ReportAggregatesOnly(bool value)403 Benchmark* Benchmark::ReportAggregatesOnly(bool value) {
404 aggregation_report_mode_ = value ? ARM_ReportAggregatesOnly : ARM_Default;
405 return this;
406 }
407
DisplayAggregatesOnly(bool value)408 Benchmark* Benchmark::DisplayAggregatesOnly(bool value) {
409 // If we were called, the report mode is no longer 'unspecified', in any case.
410 aggregation_report_mode_ = static_cast<AggregationReportMode>(
411 aggregation_report_mode_ | ARM_Default);
412
413 if (value) {
414 aggregation_report_mode_ = static_cast<AggregationReportMode>(
415 aggregation_report_mode_ | ARM_DisplayReportAggregatesOnly);
416 } else {
417 aggregation_report_mode_ = static_cast<AggregationReportMode>(
418 aggregation_report_mode_ & ~ARM_DisplayReportAggregatesOnly);
419 }
420
421 return this;
422 }
423
MeasureProcessCPUTime()424 Benchmark* Benchmark::MeasureProcessCPUTime() {
425 // Can be used together with UseRealTime() / UseManualTime().
426 measure_process_cpu_time_ = true;
427 return this;
428 }
429
UseRealTime()430 Benchmark* Benchmark::UseRealTime() {
431 CHECK(!use_manual_time_)
432 << "Cannot set UseRealTime and UseManualTime simultaneously.";
433 use_real_time_ = true;
434 return this;
435 }
436
UseManualTime()437 Benchmark* Benchmark::UseManualTime() {
438 CHECK(!use_real_time_)
439 << "Cannot set UseRealTime and UseManualTime simultaneously.";
440 use_manual_time_ = true;
441 return this;
442 }
443
Complexity(BigO complexity)444 Benchmark* Benchmark::Complexity(BigO complexity) {
445 complexity_ = complexity;
446 return this;
447 }
448
Complexity(BigOFunc * complexity)449 Benchmark* Benchmark::Complexity(BigOFunc* complexity) {
450 complexity_lambda_ = complexity;
451 complexity_ = oLambda;
452 return this;
453 }
454
ComputeStatistics(std::string name,StatisticsFunc * statistics)455 Benchmark* Benchmark::ComputeStatistics(std::string name,
456 StatisticsFunc* statistics) {
457 statistics_.emplace_back(name, statistics);
458 return this;
459 }
460
Threads(int t)461 Benchmark* Benchmark::Threads(int t) {
462 CHECK_GT(t, 0);
463 thread_counts_.push_back(t);
464 return this;
465 }
466
ThreadRange(int min_threads,int max_threads)467 Benchmark* Benchmark::ThreadRange(int min_threads, int max_threads) {
468 CHECK_GT(min_threads, 0);
469 CHECK_GE(max_threads, min_threads);
470
471 AddRange(&thread_counts_, min_threads, max_threads, 2);
472 return this;
473 }
474
DenseThreadRange(int min_threads,int max_threads,int stride)475 Benchmark* Benchmark::DenseThreadRange(int min_threads, int max_threads,
476 int stride) {
477 CHECK_GT(min_threads, 0);
478 CHECK_GE(max_threads, min_threads);
479 CHECK_GE(stride, 1);
480
481 for (auto i = min_threads; i < max_threads; i += stride) {
482 thread_counts_.push_back(i);
483 }
484 thread_counts_.push_back(max_threads);
485 return this;
486 }
487
ThreadPerCpu()488 Benchmark* Benchmark::ThreadPerCpu() {
489 thread_counts_.push_back(CPUInfo::Get().num_cpus);
490 return this;
491 }
492
SetName(const char * name)493 void Benchmark::SetName(const char* name) { name_ = name; }
494
ArgsCnt() const495 int Benchmark::ArgsCnt() const {
496 if (args_.empty()) {
497 if (arg_names_.empty()) return -1;
498 return static_cast<int>(arg_names_.size());
499 }
500 return static_cast<int>(args_.front().size());
501 }
502
503 //=============================================================================//
504 // FunctionBenchmark
505 //=============================================================================//
506
Run(State & st)507 void FunctionBenchmark::Run(State& st) { func_(st); }
508
509 } // end namespace internal
510
ClearRegisteredBenchmarks()511 void ClearRegisteredBenchmarks() {
512 internal::BenchmarkFamilies::GetInstance()->ClearBenchmarks();
513 }
514
515 } // end namespace benchmark
516