1 /*
2  *  Copyright 2020 The WebRTC Project Authors. All rights reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "benchmark/benchmark.h"
12 #include "rtc_base/synchronization/mutex.h"
13 #include "rtc_base/system/unused.h"
14 
15 namespace webrtc {
16 
17 class PerfTestData {
18  public:
PerfTestData()19   PerfTestData() : cache_line_barrier_1_(), cache_line_barrier_2_() {
20     cache_line_barrier_1_[0]++;  // Avoid 'is not used'.
21     cache_line_barrier_2_[0]++;  // Avoid 'is not used'.
22   }
23 
AddToCounter(int add)24   int AddToCounter(int add) {
25     MutexLock mu(&mu_);
26     my_counter_ += add;
27     return 0;
28   }
29 
30  private:
31   uint8_t cache_line_barrier_1_[64];
32   Mutex mu_;
33   uint8_t cache_line_barrier_2_[64];
34   int64_t my_counter_ = 0;
35 };
36 
BM_LockWithMutex(benchmark::State & state)37 void BM_LockWithMutex(benchmark::State& state) {
38   static PerfTestData test_data;
39   for (auto s : state) {
40     RTC_UNUSED(s);
41     benchmark::DoNotOptimize(test_data.AddToCounter(2));
42   }
43 }
44 
45 BENCHMARK(BM_LockWithMutex)->Threads(1);
46 BENCHMARK(BM_LockWithMutex)->Threads(2);
47 BENCHMARK(BM_LockWithMutex)->Threads(4);
48 BENCHMARK(BM_LockWithMutex)->ThreadPerCpu();
49 
50 }  // namespace webrtc
51 
52 /*
53 
54 Results:
55 
56 NB when reproducing: Remember to turn of power management features such as CPU
57 scaling before running!
58 
59 pthreads (Linux):
60 ----------------------------------------------------------------------
61 Run on (12 X 4500 MHz CPU s)
62 CPU Caches:
63   L1 Data 32 KiB (x6)
64   L1 Instruction 32 KiB (x6)
65   L2 Unified 1024 KiB (x6)
66   L3 Unified 8448 KiB (x1)
67 Load Average: 0.26, 0.28, 0.44
68 ----------------------------------------------------------------------
69 Benchmark                            Time             CPU   Iterations
70 ----------------------------------------------------------------------
71 BM_LockWithMutex/threads:1        13.4 ns         13.4 ns     52192906
72 BM_LockWithMutex/threads:2        44.2 ns         88.4 ns      8189944
73 BM_LockWithMutex/threads:4        52.0 ns          198 ns      3743244
74 BM_LockWithMutex/threads:12       84.9 ns          944 ns       733524
75 
76 std::mutex performs like the pthread implementation (Linux).
77 
78 Abseil (Linux):
79 ----------------------------------------------------------------------
80 Run on (12 X 4500 MHz CPU s)
81 CPU Caches:
82   L1 Data 32 KiB (x6)
83   L1 Instruction 32 KiB (x6)
84   L2 Unified 1024 KiB (x6)
85   L3 Unified 8448 KiB (x1)
86 Load Average: 0.27, 0.24, 0.37
87 ----------------------------------------------------------------------
88 Benchmark                            Time             CPU   Iterations
89 ----------------------------------------------------------------------
90 BM_LockWithMutex/threads:1        15.0 ns         15.0 ns     46550231
91 BM_LockWithMutex/threads:2        91.1 ns          182 ns      4059212
92 BM_LockWithMutex/threads:4        40.8 ns          131 ns      5496560
93 BM_LockWithMutex/threads:12       37.0 ns          130 ns      5377668
94 
95 */
96