1 // Copyright 2017 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 // Benchmarks for absl random distributions as well as a selection of the
16 // C++ standard library random distributions.
17 
18 #include <algorithm>
19 #include <cstddef>
20 #include <cstdint>
21 #include <initializer_list>
22 #include <iterator>
23 #include <limits>
24 #include <random>
25 #include <type_traits>
26 #include <vector>
27 
28 #include "absl/base/macros.h"
29 #include "absl/meta/type_traits.h"
30 #include "absl/random/bernoulli_distribution.h"
31 #include "absl/random/beta_distribution.h"
32 #include "absl/random/exponential_distribution.h"
33 #include "absl/random/gaussian_distribution.h"
34 #include "absl/random/internal/fast_uniform_bits.h"
35 #include "absl/random/internal/randen_engine.h"
36 #include "absl/random/log_uniform_int_distribution.h"
37 #include "absl/random/poisson_distribution.h"
38 #include "absl/random/random.h"
39 #include "absl/random/uniform_int_distribution.h"
40 #include "absl/random/uniform_real_distribution.h"
41 #include "absl/random/zipf_distribution.h"
42 #include "benchmark/benchmark.h"
43 
44 namespace {
45 
46 // Seed data to avoid reading random_device() for benchmarks.
47 uint32_t kSeedData[] = {
48     0x1B510052, 0x9A532915, 0xD60F573F, 0xBC9BC6E4, 0x2B60A476, 0x81E67400,
49     0x08BA6FB5, 0x571BE91F, 0xF296EC6B, 0x2A0DD915, 0xB6636521, 0xE7B9F9B6,
50     0xFF34052E, 0xC5855664, 0x53B02D5D, 0xA99F8FA1, 0x08BA4799, 0x6E85076A,
51     0x4B7A70E9, 0xB5B32944, 0xDB75092E, 0xC4192623, 0xAD6EA6B0, 0x49A7DF7D,
52     0x9CEE60B8, 0x8FEDB266, 0xECAA8C71, 0x699A18FF, 0x5664526C, 0xC2B19EE1,
53     0x193602A5, 0x75094C29, 0xA0591340, 0xE4183A3E, 0x3F54989A, 0x5B429D65,
54     0x6B8FE4D6, 0x99F73FD6, 0xA1D29C07, 0xEFE830F5, 0x4D2D38E6, 0xF0255DC1,
55     0x4CDD2086, 0x8470EB26, 0x6382E9C6, 0x021ECC5E, 0x09686B3F, 0x3EBAEFC9,
56     0x3C971814, 0x6B6A70A1, 0x687F3584, 0x52A0E286, 0x13198A2E, 0x03707344,
57 };
58 
59 // PrecompiledSeedSeq provides kSeedData to a conforming
60 // random engine to speed initialization in the benchmarks.
61 class PrecompiledSeedSeq {
62  public:
63   using result_type = uint32_t;
64 
PrecompiledSeedSeq()65   PrecompiledSeedSeq() {}
66 
67   template <typename Iterator>
PrecompiledSeedSeq(Iterator begin,Iterator end)68   PrecompiledSeedSeq(Iterator begin, Iterator end) {}
69 
70   template <typename T>
PrecompiledSeedSeq(std::initializer_list<T> il)71   PrecompiledSeedSeq(std::initializer_list<T> il) {}
72 
73   template <typename OutIterator>
generate(OutIterator begin,OutIterator end)74   void generate(OutIterator begin, OutIterator end) {
75     static size_t idx = 0;
76     for (; begin != end; begin++) {
77       *begin = kSeedData[idx++];
78       if (idx >= ABSL_ARRAYSIZE(kSeedData)) {
79         idx = 0;
80       }
81     }
82   }
83 
size() const84   size_t size() const { return ABSL_ARRAYSIZE(kSeedData); }
85 
86   template <typename OutIterator>
param(OutIterator out) const87   void param(OutIterator out) const {
88     std::copy(std::begin(kSeedData), std::end(kSeedData), out);
89   }
90 };
91 
92 // use_default_initialization<T> indicates whether the random engine
93 // T must be default initialized, or whether we may initialize it using
94 // a seed sequence. This is used because some engines do not accept seed
95 // sequence-based initialization.
96 template <typename E>
97 using use_default_initialization = std::false_type;
98 
99 // make_engine<T, SSeq> returns a random_engine which is initialized,
100 // either via the default constructor, when use_default_initialization<T>
101 // is true, or via the indicated seed sequence, SSeq.
102 template <typename Engine, typename SSeq = PrecompiledSeedSeq>
103 typename absl::enable_if_t<!use_default_initialization<Engine>::value, Engine>
make_engine()104 make_engine() {
105   // Initialize the random engine using the seed sequence SSeq, which
106   // is constructed from the precompiled seed data.
107   SSeq seq(std::begin(kSeedData), std::end(kSeedData));
108   return Engine(seq);
109 }
110 
111 template <typename Engine, typename SSeq = PrecompiledSeedSeq>
112 typename absl::enable_if_t<use_default_initialization<Engine>::value, Engine>
make_engine()113 make_engine() {
114   // Initialize the random engine using the default constructor.
115   return Engine();
116 }
117 
118 template <typename Engine, typename SSeq>
BM_Construct(benchmark::State & state)119 void BM_Construct(benchmark::State& state) {
120   for (auto _ : state) {
121     auto rng = make_engine<Engine, SSeq>();
122     benchmark::DoNotOptimize(rng());
123   }
124 }
125 
126 template <typename Engine>
BM_Direct(benchmark::State & state)127 void BM_Direct(benchmark::State& state) {
128   using value_type = typename Engine::result_type;
129   // Direct use of the URBG.
130   auto rng = make_engine<Engine>();
131   for (auto _ : state) {
132     benchmark::DoNotOptimize(rng());
133   }
134   state.SetBytesProcessed(sizeof(value_type) * state.iterations());
135 }
136 
137 template <typename Engine>
BM_Generate(benchmark::State & state)138 void BM_Generate(benchmark::State& state) {
139   // std::generate makes a copy of the RNG; thus this tests the
140   // copy-constructor efficiency.
141   using value_type = typename Engine::result_type;
142   std::vector<value_type> v(64);
143   auto rng = make_engine<Engine>();
144   while (state.KeepRunningBatch(64)) {
145     std::generate(std::begin(v), std::end(v), rng);
146   }
147 }
148 
149 template <typename Engine, size_t elems>
BM_Shuffle(benchmark::State & state)150 void BM_Shuffle(benchmark::State& state) {
151   // Direct use of the Engine.
152   std::vector<uint32_t> v(elems);
153   while (state.KeepRunningBatch(elems)) {
154     auto rng = make_engine<Engine>();
155     std::shuffle(std::begin(v), std::end(v), rng);
156   }
157 }
158 
159 template <typename Engine, size_t elems>
BM_ShuffleReuse(benchmark::State & state)160 void BM_ShuffleReuse(benchmark::State& state) {
161   // Direct use of the Engine.
162   std::vector<uint32_t> v(elems);
163   auto rng = make_engine<Engine>();
164   while (state.KeepRunningBatch(elems)) {
165     std::shuffle(std::begin(v), std::end(v), rng);
166   }
167 }
168 
169 template <typename Engine, typename Dist, typename... Args>
BM_Dist(benchmark::State & state,Args &&...args)170 void BM_Dist(benchmark::State& state, Args&&... args) {
171   using value_type = typename Dist::result_type;
172   auto rng = make_engine<Engine>();
173   Dist dis{std::forward<Args>(args)...};
174   // Compare the following loop performance:
175   for (auto _ : state) {
176     benchmark::DoNotOptimize(dis(rng));
177   }
178   state.SetBytesProcessed(sizeof(value_type) * state.iterations());
179 }
180 
181 template <typename Engine, typename Dist>
BM_Large(benchmark::State & state)182 void BM_Large(benchmark::State& state) {
183   using value_type = typename Dist::result_type;
184   volatile value_type kMin = 0;
185   volatile value_type kMax = std::numeric_limits<value_type>::max() / 2 + 1;
186   BM_Dist<Engine, Dist>(state, kMin, kMax);
187 }
188 
189 template <typename Engine, typename Dist>
BM_Small(benchmark::State & state)190 void BM_Small(benchmark::State& state) {
191   using value_type = typename Dist::result_type;
192   volatile value_type kMin = 0;
193   volatile value_type kMax = std::numeric_limits<value_type>::max() / 64 + 1;
194   BM_Dist<Engine, Dist>(state, kMin, kMax);
195 }
196 
197 template <typename Engine, typename Dist, int A>
BM_Bernoulli(benchmark::State & state)198 void BM_Bernoulli(benchmark::State& state) {
199   volatile double a = static_cast<double>(A) / 1000000;
200   BM_Dist<Engine, Dist>(state, a);
201 }
202 
203 template <typename Engine, typename Dist, int A, int B>
BM_Beta(benchmark::State & state)204 void BM_Beta(benchmark::State& state) {
205   using value_type = typename Dist::result_type;
206   volatile value_type a = static_cast<value_type>(A) / 100;
207   volatile value_type b = static_cast<value_type>(B) / 100;
208   BM_Dist<Engine, Dist>(state, a, b);
209 }
210 
211 template <typename Engine, typename Dist, int A>
BM_Gamma(benchmark::State & state)212 void BM_Gamma(benchmark::State& state) {
213   using value_type = typename Dist::result_type;
214   volatile value_type a = static_cast<value_type>(A) / 100;
215   BM_Dist<Engine, Dist>(state, a);
216 }
217 
218 template <typename Engine, typename Dist, int A = 100>
BM_Poisson(benchmark::State & state)219 void BM_Poisson(benchmark::State& state) {
220   volatile double a = static_cast<double>(A) / 100;
221   BM_Dist<Engine, Dist>(state, a);
222 }
223 
224 template <typename Engine, typename Dist, int Q = 2, int V = 1>
BM_Zipf(benchmark::State & state)225 void BM_Zipf(benchmark::State& state) {
226   using value_type = typename Dist::result_type;
227   volatile double q = Q;
228   volatile double v = V;
229   BM_Dist<Engine, Dist>(state, std::numeric_limits<value_type>::max(), q, v);
230 }
231 
232 template <typename Engine, typename Dist>
BM_Thread(benchmark::State & state)233 void BM_Thread(benchmark::State& state) {
234   using value_type = typename Dist::result_type;
235   auto rng = make_engine<Engine>();
236   Dist dis{};
237   for (auto _ : state) {
238     benchmark::DoNotOptimize(dis(rng));
239   }
240   state.SetBytesProcessed(sizeof(value_type) * state.iterations());
241 }
242 
243 // NOTES:
244 //
245 // std::geometric_distribution is similar to the zipf distributions.
246 // The algorithm for the geometric_distribution is, basically,
247 // floor(log(1-X) / log(1-p))
248 
249 // Normal benchmark suite
250 #define BM_BASIC(Engine)                                                       \
251   BENCHMARK_TEMPLATE(BM_Construct, Engine, PrecompiledSeedSeq);                \
252   BENCHMARK_TEMPLATE(BM_Construct, Engine, std::seed_seq);                     \
253   BENCHMARK_TEMPLATE(BM_Direct, Engine);                                       \
254   BENCHMARK_TEMPLATE(BM_Shuffle, Engine, 10);                                  \
255   BENCHMARK_TEMPLATE(BM_Shuffle, Engine, 100);                                 \
256   BENCHMARK_TEMPLATE(BM_Shuffle, Engine, 1000);                                \
257   BENCHMARK_TEMPLATE(BM_ShuffleReuse, Engine, 100);                            \
258   BENCHMARK_TEMPLATE(BM_ShuffleReuse, Engine, 1000);                           \
259   BENCHMARK_TEMPLATE(BM_Dist, Engine,                                          \
260                      absl::random_internal::FastUniformBits<uint32_t>);        \
261   BENCHMARK_TEMPLATE(BM_Dist, Engine,                                          \
262                      absl::random_internal::FastUniformBits<uint64_t>);        \
263   BENCHMARK_TEMPLATE(BM_Dist, Engine, std::uniform_int_distribution<int32_t>); \
264   BENCHMARK_TEMPLATE(BM_Dist, Engine, std::uniform_int_distribution<int64_t>); \
265   BENCHMARK_TEMPLATE(BM_Dist, Engine,                                          \
266                      absl::uniform_int_distribution<int32_t>);                 \
267   BENCHMARK_TEMPLATE(BM_Dist, Engine,                                          \
268                      absl::uniform_int_distribution<int64_t>);                 \
269   BENCHMARK_TEMPLATE(BM_Large, Engine,                                         \
270                      std::uniform_int_distribution<int32_t>);                  \
271   BENCHMARK_TEMPLATE(BM_Large, Engine,                                         \
272                      std::uniform_int_distribution<int64_t>);                  \
273   BENCHMARK_TEMPLATE(BM_Large, Engine,                                         \
274                      absl::uniform_int_distribution<int32_t>);                 \
275   BENCHMARK_TEMPLATE(BM_Large, Engine,                                         \
276                      absl::uniform_int_distribution<int64_t>);                 \
277   BENCHMARK_TEMPLATE(BM_Dist, Engine, std::uniform_real_distribution<float>);  \
278   BENCHMARK_TEMPLATE(BM_Dist, Engine, std::uniform_real_distribution<double>); \
279   BENCHMARK_TEMPLATE(BM_Dist, Engine, absl::uniform_real_distribution<float>); \
280   BENCHMARK_TEMPLATE(BM_Dist, Engine, absl::uniform_real_distribution<double>)
281 
282 #define BM_COPY(Engine) BENCHMARK_TEMPLATE(BM_Generate, Engine)
283 
284 #define BM_THREAD(Engine)                                           \
285   BENCHMARK_TEMPLATE(BM_Thread, Engine,                             \
286                      absl::uniform_int_distribution<int64_t>)       \
287       ->ThreadPerCpu();                                             \
288   BENCHMARK_TEMPLATE(BM_Thread, Engine,                             \
289                      absl::uniform_real_distribution<double>)       \
290       ->ThreadPerCpu();                                             \
291   BENCHMARK_TEMPLATE(BM_Shuffle, Engine, 100)->ThreadPerCpu();      \
292   BENCHMARK_TEMPLATE(BM_Shuffle, Engine, 1000)->ThreadPerCpu();     \
293   BENCHMARK_TEMPLATE(BM_ShuffleReuse, Engine, 100)->ThreadPerCpu(); \
294   BENCHMARK_TEMPLATE(BM_ShuffleReuse, Engine, 1000)->ThreadPerCpu();
295 
296 #define BM_EXTENDED(Engine)                                                    \
297   /* -------------- Extended Uniform -----------------------*/                 \
298   BENCHMARK_TEMPLATE(BM_Small, Engine,                                         \
299                      std::uniform_int_distribution<int32_t>);                  \
300   BENCHMARK_TEMPLATE(BM_Small, Engine,                                         \
301                      std::uniform_int_distribution<int64_t>);                  \
302   BENCHMARK_TEMPLATE(BM_Small, Engine,                                         \
303                      absl::uniform_int_distribution<int32_t>);                 \
304   BENCHMARK_TEMPLATE(BM_Small, Engine,                                         \
305                      absl::uniform_int_distribution<int64_t>);                 \
306   BENCHMARK_TEMPLATE(BM_Small, Engine, std::uniform_real_distribution<float>); \
307   BENCHMARK_TEMPLATE(BM_Small, Engine,                                         \
308                      std::uniform_real_distribution<double>);                  \
309   BENCHMARK_TEMPLATE(BM_Small, Engine,                                         \
310                      absl::uniform_real_distribution<float>);                  \
311   BENCHMARK_TEMPLATE(BM_Small, Engine,                                         \
312                      absl::uniform_real_distribution<double>);                 \
313   /* -------------- Other -----------------------*/                            \
314   BENCHMARK_TEMPLATE(BM_Dist, Engine, std::normal_distribution<double>);       \
315   BENCHMARK_TEMPLATE(BM_Dist, Engine, absl::gaussian_distribution<double>);    \
316   BENCHMARK_TEMPLATE(BM_Dist, Engine, std::exponential_distribution<double>);  \
317   BENCHMARK_TEMPLATE(BM_Dist, Engine, absl::exponential_distribution<double>); \
318   BENCHMARK_TEMPLATE(BM_Poisson, Engine, std::poisson_distribution<int64_t>,   \
319                      100);                                                     \
320   BENCHMARK_TEMPLATE(BM_Poisson, Engine, absl::poisson_distribution<int64_t>,  \
321                      100);                                                     \
322   BENCHMARK_TEMPLATE(BM_Poisson, Engine, std::poisson_distribution<int64_t>,   \
323                      10 * 100);                                                \
324   BENCHMARK_TEMPLATE(BM_Poisson, Engine, absl::poisson_distribution<int64_t>,  \
325                      10 * 100);                                                \
326   BENCHMARK_TEMPLATE(BM_Poisson, Engine, std::poisson_distribution<int64_t>,   \
327                      13 * 100);                                                \
328   BENCHMARK_TEMPLATE(BM_Poisson, Engine, absl::poisson_distribution<int64_t>,  \
329                      13 * 100);                                                \
330   BENCHMARK_TEMPLATE(BM_Dist, Engine,                                          \
331                      absl::log_uniform_int_distribution<int32_t>);             \
332   BENCHMARK_TEMPLATE(BM_Dist, Engine,                                          \
333                      absl::log_uniform_int_distribution<int64_t>);             \
334   BENCHMARK_TEMPLATE(BM_Dist, Engine, std::geometric_distribution<int64_t>);   \
335   BENCHMARK_TEMPLATE(BM_Zipf, Engine, absl::zipf_distribution<uint64_t>);      \
336   BENCHMARK_TEMPLATE(BM_Zipf, Engine, absl::zipf_distribution<uint64_t>, 2,    \
337                      3);                                                       \
338   BENCHMARK_TEMPLATE(BM_Bernoulli, Engine, std::bernoulli_distribution,        \
339                      257305);                                                  \
340   BENCHMARK_TEMPLATE(BM_Bernoulli, Engine, absl::bernoulli_distribution,       \
341                      257305);                                                  \
342   BENCHMARK_TEMPLATE(BM_Beta, Engine, absl::beta_distribution<double>, 65,     \
343                      41);                                                      \
344   BENCHMARK_TEMPLATE(BM_Beta, Engine, absl::beta_distribution<double>, 99,     \
345                      330);                                                     \
346   BENCHMARK_TEMPLATE(BM_Beta, Engine, absl::beta_distribution<double>, 150,    \
347                      150);                                                     \
348   BENCHMARK_TEMPLATE(BM_Beta, Engine, absl::beta_distribution<double>, 410,    \
349                      580);                                                     \
350   BENCHMARK_TEMPLATE(BM_Beta, Engine, absl::beta_distribution<float>, 65, 41); \
351   BENCHMARK_TEMPLATE(BM_Beta, Engine, absl::beta_distribution<float>, 99,      \
352                      330);                                                     \
353   BENCHMARK_TEMPLATE(BM_Beta, Engine, absl::beta_distribution<float>, 150,     \
354                      150);                                                     \
355   BENCHMARK_TEMPLATE(BM_Beta, Engine, absl::beta_distribution<float>, 410,     \
356                      580);                                                     \
357   BENCHMARK_TEMPLATE(BM_Gamma, Engine, std::gamma_distribution<float>, 199);   \
358   BENCHMARK_TEMPLATE(BM_Gamma, Engine, std::gamma_distribution<double>, 199);
359 
360 // ABSL Recommended interfaces.
361 BM_BASIC(absl::InsecureBitGen);  // === pcg64_2018_engine
362 BM_BASIC(absl::BitGen);    // === randen_engine<uint64_t>.
363 BM_THREAD(absl::BitGen);
364 BM_EXTENDED(absl::BitGen);
365 
366 // Instantiate benchmarks for multiple engines.
367 using randen_engine_64 = absl::random_internal::randen_engine<uint64_t>;
368 using randen_engine_32 = absl::random_internal::randen_engine<uint32_t>;
369 
370 // Comparison interfaces.
371 BM_BASIC(std::mt19937_64);
372 BM_COPY(std::mt19937_64);
373 BM_EXTENDED(std::mt19937_64);
374 BM_BASIC(randen_engine_64);
375 BM_COPY(randen_engine_64);
376 BM_EXTENDED(randen_engine_64);
377 
378 BM_BASIC(std::mt19937);
379 BM_COPY(std::mt19937);
380 BM_BASIC(randen_engine_32);
381 BM_COPY(randen_engine_32);
382 
383 }  // namespace
384