1 /* 2 * Copyright 2020 The WebRTC Project Authors. All rights reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "benchmark/benchmark.h" 12 #include "rtc_base/synchronization/mutex.h" 13 #include "rtc_base/system/unused.h" 14 15 namespace webrtc { 16 17 class PerfTestData { 18 public: PerfTestData()19 PerfTestData() : cache_line_barrier_1_(), cache_line_barrier_2_() { 20 cache_line_barrier_1_[0]++; // Avoid 'is not used'. 21 cache_line_barrier_2_[0]++; // Avoid 'is not used'. 22 } 23 AddToCounter(int add)24 int AddToCounter(int add) { 25 MutexLock mu(&mu_); 26 my_counter_ += add; 27 return 0; 28 } 29 30 private: 31 uint8_t cache_line_barrier_1_[64]; 32 Mutex mu_; 33 uint8_t cache_line_barrier_2_[64]; 34 int64_t my_counter_ = 0; 35 }; 36 BM_LockWithMutex(benchmark::State & state)37void BM_LockWithMutex(benchmark::State& state) { 38 static PerfTestData test_data; 39 for (auto s : state) { 40 RTC_UNUSED(s); 41 benchmark::DoNotOptimize(test_data.AddToCounter(2)); 42 } 43 } 44 45 BENCHMARK(BM_LockWithMutex)->Threads(1); 46 BENCHMARK(BM_LockWithMutex)->Threads(2); 47 BENCHMARK(BM_LockWithMutex)->Threads(4); 48 BENCHMARK(BM_LockWithMutex)->ThreadPerCpu(); 49 50 } // namespace webrtc 51 52 /* 53 54 Results: 55 56 NB when reproducing: Remember to turn of power management features such as CPU 57 scaling before running! 58 59 pthreads (Linux): 60 ---------------------------------------------------------------------- 61 Run on (12 X 4500 MHz CPU s) 62 CPU Caches: 63 L1 Data 32 KiB (x6) 64 L1 Instruction 32 KiB (x6) 65 L2 Unified 1024 KiB (x6) 66 L3 Unified 8448 KiB (x1) 67 Load Average: 0.26, 0.28, 0.44 68 ---------------------------------------------------------------------- 69 Benchmark Time CPU Iterations 70 ---------------------------------------------------------------------- 71 BM_LockWithMutex/threads:1 13.4 ns 13.4 ns 52192906 72 BM_LockWithMutex/threads:2 44.2 ns 88.4 ns 8189944 73 BM_LockWithMutex/threads:4 52.0 ns 198 ns 3743244 74 BM_LockWithMutex/threads:12 84.9 ns 944 ns 733524 75 76 std::mutex performs like the pthread implementation (Linux). 77 78 Abseil (Linux): 79 ---------------------------------------------------------------------- 80 Run on (12 X 4500 MHz CPU s) 81 CPU Caches: 82 L1 Data 32 KiB (x6) 83 L1 Instruction 32 KiB (x6) 84 L2 Unified 1024 KiB (x6) 85 L3 Unified 8448 KiB (x1) 86 Load Average: 0.27, 0.24, 0.37 87 ---------------------------------------------------------------------- 88 Benchmark Time CPU Iterations 89 ---------------------------------------------------------------------- 90 BM_LockWithMutex/threads:1 15.0 ns 15.0 ns 46550231 91 BM_LockWithMutex/threads:2 91.1 ns 182 ns 4059212 92 BM_LockWithMutex/threads:4 40.8 ns 131 ns 5496560 93 BM_LockWithMutex/threads:12 37.0 ns 130 ns 5377668 94 95 */ 96