1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 // Our goal is to measure the cost of various C++ atomic operations.
18 // Android doesn't really control those. But since some of these operations can be quite
19 // expensive, this may be useful input for development of higher level code.
20 // Expected mappings from C++ atomics to hardware primitives can be found at
21 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html .
22 
23 #include <atomic>
24 #include <mutex>
25 
26 #include <benchmark/benchmark.h>
27 #include "util.h"
28 
29 // We time atomic operations separated by a volatile (not atomic!) increment.  This ensures
30 // that the compiler emits memory instructions (e.g. load or store) prior to any fence or the
31 // like.  That in turn ensures that the CPU has outstanding memory operations when the fence
32 // is executed.
33 
34 // In most respects, we compute best case values. Since there is only one thread, there are no
35 // coherence misses.
36 
37 // We assume that the compiler is not smart enough to optimize away fences in a single-threaded
38 // program. If that changes, we'll need to add a second thread.
39 
40 // We increment the counter this way to avoid -Wdeprecated-volatile warnings.
41 static volatile unsigned counter;
42 #define INC_COUNTER() counter = counter + 1
43 
44 std::atomic<int> test_loc(0);
45 
46 static volatile unsigned sink;
47 
48 static std::mutex mtx;
49 
BM_atomic_empty(benchmark::State & state)50 void BM_atomic_empty(benchmark::State& state) {
51   while (state.KeepRunning()) {
52     INC_COUNTER();
53   }
54 }
55 BIONIC_BENCHMARK(BM_atomic_empty);
56 
BM_atomic_load_relaxed(benchmark::State & state)57 static void BM_atomic_load_relaxed(benchmark::State& state) {
58   unsigned result = 0;
59   while (state.KeepRunning()) {
60     result += test_loc.load(std::memory_order_relaxed);
61     INC_COUNTER();
62   }
63   sink = result;
64 }
65 BIONIC_BENCHMARK(BM_atomic_load_relaxed);
66 
BM_atomic_load_acquire(benchmark::State & state)67 static void BM_atomic_load_acquire(benchmark::State& state) {
68   unsigned result = 0;
69   while (state.KeepRunning()) {
70     result += test_loc.load(std::memory_order_acquire);
71     INC_COUNTER();
72   }
73   sink = result;
74 }
75 BIONIC_BENCHMARK(BM_atomic_load_acquire);
76 
BM_atomic_store_release(benchmark::State & state)77 static void BM_atomic_store_release(benchmark::State& state) {
78   int i = counter;
79   while (state.KeepRunning()) {
80     test_loc.store(++i, std::memory_order_release);
81     INC_COUNTER();
82   }
83 }
84 BIONIC_BENCHMARK(BM_atomic_store_release);
85 
BM_atomic_store_seq_cst(benchmark::State & state)86 static void BM_atomic_store_seq_cst(benchmark::State& state) {
87   int i = counter;
88   while (state.KeepRunning()) {
89     test_loc.store(++i, std::memory_order_seq_cst);
90     INC_COUNTER();
91   }
92 }
93 BIONIC_BENCHMARK(BM_atomic_store_seq_cst);
94 
BM_atomic_fetch_add_relaxed(benchmark::State & state)95 static void BM_atomic_fetch_add_relaxed(benchmark::State& state) {
96   unsigned result = 0;
97   while (state.KeepRunning()) {
98     result += test_loc.fetch_add(1, std::memory_order_relaxed);
99     INC_COUNTER();
100   }
101   sink = result;
102 }
103 BIONIC_BENCHMARK(BM_atomic_fetch_add_relaxed);
104 
BM_atomic_fetch_add_seq_cst(benchmark::State & state)105 static void BM_atomic_fetch_add_seq_cst(benchmark::State& state) {
106   unsigned result = 0;
107   while (state.KeepRunning()) {
108     result += test_loc.fetch_add(1, std::memory_order_seq_cst);
109     INC_COUNTER();
110   }
111   sink = result;
112 }
113 BIONIC_BENCHMARK(BM_atomic_fetch_add_seq_cst);
114 
115 // The fence benchmarks include a relaxed load to make it much harder to optimize away
116 // the fence.
117 
BM_atomic_acquire_fence(benchmark::State & state)118 static void BM_atomic_acquire_fence(benchmark::State& state) {
119   unsigned result = 0;
120   while (state.KeepRunning()) {
121     result += test_loc.load(std::memory_order_relaxed);
122     std::atomic_thread_fence(std::memory_order_acquire);
123     INC_COUNTER();
124   }
125   sink = result;
126 }
127 BIONIC_BENCHMARK(BM_atomic_acquire_fence);
128 
BM_atomic_seq_cst_fence(benchmark::State & state)129 static void BM_atomic_seq_cst_fence(benchmark::State& state) {
130   unsigned result = 0;
131   while (state.KeepRunning()) {
132     result += test_loc.load(std::memory_order_relaxed);
133     std::atomic_thread_fence(std::memory_order_seq_cst);
134     INC_COUNTER();
135   }
136   sink = result;
137 }
138 BIONIC_BENCHMARK(BM_atomic_seq_cst_fence);
139 
140 // For comparison, also throw in a critical section version:
141 
BM_atomic_fetch_add_cs(benchmark::State & state)142 static void BM_atomic_fetch_add_cs(benchmark::State& state) {
143   unsigned result = 0;
144   while (state.KeepRunning()) {
145     {
146       std::lock_guard<std::mutex> _(mtx);
147       INC_COUNTER();
148       result += counter;
149     }
150   }
151   sink = result;
152 }
153 BIONIC_BENCHMARK(BM_atomic_fetch_add_cs);
154