1 /*
2  * Copyright (c) 2016 Cyril Hrubis <chrubis@suse.cz>
3  *
4  * This program is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program. If not, see <http://www.gnu.org/licenses/>.
16  */
17 /* The LTP library has some of its own atomic synchronisation primitives
18  * contained in this file. Generally speaking these should not be used
19  * directly in tests for synchronisation, instead use tst_checkpoint.h,
20  * tst_fuzzy_sync.h or the POSIX library.
21  *
22  * Notes on compile and runtime memory barriers and atomics.
23  *
24  * Within the LTP library we have three concerns when accessing variables
25  * shared by multiple threads or processes:
26  *
27  * (1) Removal or reordering of accesses by the compiler.
28  * (2) Atomicity of addition.
29  * (3) LOAD-STORE ordering between threads.
30  *
31  * The first (1) is the most likely to cause an error if not properly
32  * handled. We avoid it by using volatile variables and statements which will
33  * not be removed or reordered by the compiler during optimisation. This includes
34  * the __atomic and __sync intrinsics and volatile asm statements marked with
35  * "memory" as well as variables marked with volatile.
36  *
37  * On any platform Linux is likely to run on, a LOAD (fetch) or STORE of a
38  * 32-bit integer will be atomic. However fetching and adding to a variable is
39  * quite likely not; so for (2) we need to ensure we use atomic addition.
40  *
41  * Finally, for tst_fuzzy_sync at least, we need to ensure that LOADs and
42  * STOREs of any shared variables (including non-atomics) that are made
43  * between calls to tst_fzsync_wait are completed (globally visible) before
44  * tst_fzsync_wait completes. For this, runtime memory and instruction
45  * barriers are required in addition to compile time.
46  *
47  * We use full sequential ordering (__ATOMIC_SEQ_CST) for the sake of
48  * simplicity. LTP tests tend to be syscall heavy so any performance gain from
49  * using a weaker memory model is unlikely to result in a relatively large
50  * performance improvement while at the same time being a potent source of
51  * confusion.
52  *
53  * Likewise, for the fallback ASM, the simplest "definitely will work, always"
54  * approach is preferred over anything more performant.
55  *
56  * Also see Documentation/memory-barriers.txt in the kernel tree and
57  * https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html
58  * terminology may vary between sources.
59  */
60 
61 #ifndef TST_ATOMIC_H__
62 #define TST_ATOMIC_H__
63 
64 #include "config.h"
65 
66 #if HAVE_ATOMIC_MEMORY_MODEL == 1
tst_atomic_add_return(int i,int * v)67 static inline int tst_atomic_add_return(int i, int *v)
68 {
69 	return __atomic_add_fetch(v, i, __ATOMIC_SEQ_CST);
70 }
71 
tst_atomic_load(int * v)72 static inline int tst_atomic_load(int *v)
73 {
74 	return __atomic_load_n(v, __ATOMIC_SEQ_CST);
75 }
76 
tst_atomic_store(int i,int * v)77 static inline void tst_atomic_store(int i, int *v)
78 {
79 	__atomic_store_n(v, i, __ATOMIC_SEQ_CST);
80 }
81 
82 #elif HAVE_SYNC_ADD_AND_FETCH == 1
tst_atomic_add_return(int i,int * v)83 static inline int tst_atomic_add_return(int i, int *v)
84 {
85 	return __sync_add_and_fetch(v, i);
86 }
87 
tst_atomic_load(int * v)88 static inline int tst_atomic_load(int *v)
89 {
90 	int ret;
91 
92 	__sync_synchronize();
93 	ret = *v;
94 	__sync_synchronize();
95 	return ret;
96 }
97 
tst_atomic_store(int i,int * v)98 static inline void tst_atomic_store(int i, int *v)
99 {
100 	__sync_synchronize();
101 	*v = i;
102 	__sync_synchronize();
103 }
104 
105 #elif defined(__i386__) || defined(__x86_64__)
106 # define LTP_USE_GENERIC_LOAD_STORE_ASM 1
107 
tst_atomic_add_return(int i,int * v)108 static inline int tst_atomic_add_return(int i, int *v)
109 {
110 	int __ret = i;
111 
112 	/*
113 	 * taken from arch/x86/include/asm/cmpxchg.h
114 	 */
115 	asm volatile ("lock; xaddl %0, %1\n"
116 		: "+r" (__ret), "+m" (*v) : : "memory", "cc");
117 
118 	return i + __ret;
119 }
120 
121 #elif defined(__powerpc__) || defined(__powerpc64__)
tst_atomic_add_return(int i,int * v)122 static inline int tst_atomic_add_return(int i, int *v)
123 {
124 	int t;
125 
126 	/* taken from arch/powerpc/include/asm/atomic.h */
127 	asm volatile(
128 		"	sync\n"
129 		"1:	lwarx	%0,0,%2		# atomic_add_return\n"
130 		"	add %0,%1,%0\n"
131 		"	stwcx.	%0,0,%2 \n"
132 		"	bne-	1b\n"
133 		"	sync\n"
134 		: "=&r" (t)
135 		: "r" (i), "r" (v)
136 		: "cc", "memory");
137 
138 	return t;
139 }
140 
tst_atomic_load(int * v)141 static inline int tst_atomic_load(int *v)
142 {
143 	int ret;
144 
145 	asm volatile("sync\n" : : : "memory");
146 	ret = *v;
147 	asm volatile("sync\n" : : : "memory");
148 
149 	return ret;
150 }
151 
tst_atomic_store(int i,int * v)152 static inline void tst_atomic_store(int i, int *v)
153 {
154 	asm volatile("sync\n" : : : "memory");
155 	*v = i;
156 	asm volatile("sync\n" : : : "memory");
157 }
158 
159 #elif defined(__s390__) || defined(__s390x__)
160 # define LTP_USE_GENERIC_LOAD_STORE_ASM 1
161 
tst_atomic_add_return(int i,int * v)162 static inline int tst_atomic_add_return(int i, int *v)
163 {
164 	int old_val, new_val;
165 
166 	/* taken from arch/s390/include/asm/atomic.h */
167 	asm volatile(
168 		"	l	%0,%2\n"
169 		"0:	lr	%1,%0\n"
170 		"	ar	%1,%3\n"
171 		"	cs	%0,%1,%2\n"
172 		"	jl	0b"
173 		: "=&d" (old_val), "=&d" (new_val), "+Q" (*v)
174 		: "d" (i)
175 		: "cc", "memory");
176 
177 	return old_val + i;
178 }
179 
180 #elif defined(__arc__)
181 
182 /*ARCv2 defines the smp barriers */
183 #ifdef __ARC700__
184 #define smp_mb()	asm volatile("" : : : "memory")
185 #else
186 #define smp_mb()	asm volatile("dmb 3\n" : : : "memory")
187 #endif
188 
tst_atomic_add_return(int i,int * v)189 static inline int tst_atomic_add_return(int i, int *v)
190 {
191 	unsigned int val;
192 
193 	smp_mb();
194 
195 	asm volatile(
196 		"1:	llock   %[val], [%[ctr]]	\n"
197 		"	add     %[val], %[val], %[i]	\n"
198 		"	scond   %[val], [%[ctr]]	\n"
199 		"	bnz     1b			\n"
200 		: [val]	"=&r"	(val)
201 		: [ctr]	"r"	(v),
202 		  [i]	"ir"	(i)
203 		: "cc", "memory");
204 
205 	smp_mb();
206 
207 	return val;
208 }
209 
tst_atomic_load(int * v)210 static inline int tst_atomic_load(int *v)
211 {
212 	int ret;
213 
214 	smp_mb();
215 	ret = *v;
216 	smp_mb();
217 
218 	return ret;
219 }
220 
tst_atomic_store(int i,int * v)221 static inline void tst_atomic_store(int i, int *v)
222 {
223 	smp_mb();
224 	*v = i;
225 	smp_mb();
226 }
227 
228 #elif defined (__aarch64__)
tst_atomic_add_return(int i,int * v)229 static inline int tst_atomic_add_return(int i, int *v)
230 {
231 	unsigned long tmp;
232 	int result;
233 
234 	__asm__ __volatile__(
235 "       prfm    pstl1strm, %2	\n"
236 "1:     ldaxr	%w0, %2		\n"
237 "       add	%w0, %w0, %w3	\n"
238 "       stlxr	%w1, %w0, %2	\n"
239 "       cbnz	%w1, 1b		\n"
240 "       dmb ish			\n"
241 	: "=&r" (result), "=&r" (tmp), "+Q" (*v)
242 	: "Ir" (i)
243 	: "memory");
244 
245 	return result;
246 }
247 
248 /* We are using load and store exclusive (ldaxr & stlxr) instructions to try
249  * and help prevent the tst_atomic_load and, more likely, tst_atomic_store
250  * functions from interfering with tst_atomic_add_return which takes advantage
251  * of exclusivity. It is not clear if this is a good idea or not, but does
252  * mean that all three functions are very similar.
253  */
tst_atomic_load(int * v)254 static inline int tst_atomic_load(int *v)
255 {
256 	int ret;
257 	unsigned long tmp;
258 
259 	asm volatile("//atomic_load			\n"
260 		"	prfm	pstl1strm,  %[v]	\n"
261 		"1:	ldaxr	%w[ret], %[v]		\n"
262 		"	stlxr   %w[tmp], %w[ret], %[v]  \n"
263 		"	cbnz    %w[tmp], 1b		\n"
264 		"	dmb ish				\n"
265 		: [tmp] "=&r" (tmp), [ret] "=&r" (ret), [v] "+Q" (*v)
266 		: : "memory");
267 
268 	return ret;
269 }
270 
tst_atomic_store(int i,int * v)271 static inline void tst_atomic_store(int i, int *v)
272 {
273 	unsigned long tmp;
274 
275 	asm volatile("//atomic_store			\n"
276 		"	prfm	pstl1strm, %[v]		\n"
277 		"1:	ldaxr	%w[tmp], %[v]		\n"
278 		"	stlxr   %w[tmp], %w[i], %[v]	\n"
279 		"	cbnz    %w[tmp], 1b		\n"
280 		"	dmb ish				\n"
281 		: [tmp] "=&r" (tmp), [v] "+Q" (*v)
282 		: [i] "r" (i)
283 		: "memory");
284 }
285 
286 #elif defined(__sparc__) && defined(__arch64__)
287 # define LTP_USE_GENERIC_LOAD_STORE_ASM 1
tst_atomic_add_return(int i,int * v)288 static inline int tst_atomic_add_return(int i, int *v)
289 {
290 	int ret, tmp;
291 
292 	/* Based on arch/sparc/lib/atomic_64.S with the exponential backoff
293 	 * function removed because we are unlikely to have a large (>= 16?)
294 	 * number of cores continuously trying to update one variable.
295 	 */
296 	asm volatile("/*atomic_add_return*/		\n"
297 		"1:	ldsw	[%[v]], %[ret];		\n"
298 		"	add	%[ret], %[i], %[tmp];	\n"
299 		"	cas	[%[v]], %[ret], %[tmp];	\n"
300 		"	cmp	%[ret], %[tmp];		\n"
301 		"	bne,pn	%%icc, 1b;		\n"
302 		"	nop;				\n"
303 		"	add	%[ret], %[i], %[ret];	\n"
304 		: [ret] "=r&" (ret), [tmp] "=r&" (tmp)
305 		: [i] "r" (i), [v] "r" (v)
306 		: "memory", "cc");
307 
308 	return ret;
309 }
310 
311 #else /* HAVE_SYNC_ADD_AND_FETCH == 1 */
312 # error Your compiler does not provide __atomic_add_fetch, __sync_add_and_fetch \
313         and an LTP implementation is missing for your architecture.
314 #endif
315 
316 #ifdef LTP_USE_GENERIC_LOAD_STORE_ASM
tst_atomic_load(int * v)317 static inline int tst_atomic_load(int *v)
318 {
319 	int ret;
320 
321 	asm volatile("" : : : "memory");
322 	ret = *v;
323 	asm volatile("" : : : "memory");
324 
325 	return ret;
326 }
327 
tst_atomic_store(int i,int * v)328 static inline void tst_atomic_store(int i, int *v)
329 {
330 	asm volatile("" : : : "memory");
331 	*v = i;
332 	asm volatile("" : : : "memory");
333 }
334 #endif
335 
tst_atomic_inc(int * v)336 static inline int tst_atomic_inc(int *v)
337 {
338 	return tst_atomic_add_return(1, v);
339 }
340 
tst_atomic_dec(int * v)341 static inline int tst_atomic_dec(int *v)
342 {
343 	return tst_atomic_add_return(-1, v);
344 }
345 
346 #endif	/* TST_ATOMIC_H__ */
347