1 #ifndef Py_ATOMIC_H
2 #define Py_ATOMIC_H
3 #ifdef __cplusplus
4 extern "C" {
5 #endif
6 
7 #ifndef Py_BUILD_CORE
8 #  error "this header requires Py_BUILD_CORE define"
9 #endif
10 
11 #include "dynamic_annotations.h"   /* _Py_ANNOTATE_MEMORY_ORDER */
12 #include "pyconfig.h"
13 
14 #if defined(HAVE_STD_ATOMIC)
15 #include <stdatomic.h>
16 #endif
17 
18 
19 #if defined(_MSC_VER)
20 #include <intrin.h>
21 #if defined(_M_IX86) || defined(_M_X64)
22 #  include <immintrin.h>
23 #endif
24 #endif
25 
26 /* This is modeled after the atomics interface from C1x, according to
27  * the draft at
28  * http://www.open-std.org/JTC1/SC22/wg14/www/docs/n1425.pdf.
29  * Operations and types are named the same except with a _Py_ prefix
30  * and have the same semantics.
31  *
32  * Beware, the implementations here are deep magic.
33  */
34 
35 #if defined(HAVE_STD_ATOMIC)
36 
37 typedef enum _Py_memory_order {
38     _Py_memory_order_relaxed = memory_order_relaxed,
39     _Py_memory_order_acquire = memory_order_acquire,
40     _Py_memory_order_release = memory_order_release,
41     _Py_memory_order_acq_rel = memory_order_acq_rel,
42     _Py_memory_order_seq_cst = memory_order_seq_cst
43 } _Py_memory_order;
44 
45 typedef struct _Py_atomic_address {
46     atomic_uintptr_t _value;
47 } _Py_atomic_address;
48 
49 typedef struct _Py_atomic_int {
50     atomic_int _value;
51 } _Py_atomic_int;
52 
53 #define _Py_atomic_signal_fence(/*memory_order*/ ORDER) \
54     atomic_signal_fence(ORDER)
55 
56 #define _Py_atomic_thread_fence(/*memory_order*/ ORDER) \
57     atomic_thread_fence(ORDER)
58 
59 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
60     atomic_store_explicit(&((ATOMIC_VAL)->_value), NEW_VAL, ORDER)
61 
62 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
63     atomic_load_explicit(&((ATOMIC_VAL)->_value), ORDER)
64 
65 /* Use builtin atomic operations in GCC >= 4.7 */
66 #elif defined(HAVE_BUILTIN_ATOMIC)
67 
68 typedef enum _Py_memory_order {
69     _Py_memory_order_relaxed = __ATOMIC_RELAXED,
70     _Py_memory_order_acquire = __ATOMIC_ACQUIRE,
71     _Py_memory_order_release = __ATOMIC_RELEASE,
72     _Py_memory_order_acq_rel = __ATOMIC_ACQ_REL,
73     _Py_memory_order_seq_cst = __ATOMIC_SEQ_CST
74 } _Py_memory_order;
75 
76 typedef struct _Py_atomic_address {
77     uintptr_t _value;
78 } _Py_atomic_address;
79 
80 typedef struct _Py_atomic_int {
81     int _value;
82 } _Py_atomic_int;
83 
84 #define _Py_atomic_signal_fence(/*memory_order*/ ORDER) \
85     __atomic_signal_fence(ORDER)
86 
87 #define _Py_atomic_thread_fence(/*memory_order*/ ORDER) \
88     __atomic_thread_fence(ORDER)
89 
90 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
91     (assert((ORDER) == __ATOMIC_RELAXED                       \
92             || (ORDER) == __ATOMIC_SEQ_CST                    \
93             || (ORDER) == __ATOMIC_RELEASE),                  \
94      __atomic_store_n(&((ATOMIC_VAL)->_value), NEW_VAL, ORDER))
95 
96 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER)           \
97     (assert((ORDER) == __ATOMIC_RELAXED                       \
98             || (ORDER) == __ATOMIC_SEQ_CST                    \
99             || (ORDER) == __ATOMIC_ACQUIRE                    \
100             || (ORDER) == __ATOMIC_CONSUME),                  \
101      __atomic_load_n(&((ATOMIC_VAL)->_value), ORDER))
102 
103 /* Only support GCC (for expression statements) and x86 (for simple
104  * atomic semantics) and MSVC x86/x64/ARM */
105 #elif defined(__GNUC__) && (defined(__i386__) || defined(__amd64))
106 typedef enum _Py_memory_order {
107     _Py_memory_order_relaxed,
108     _Py_memory_order_acquire,
109     _Py_memory_order_release,
110     _Py_memory_order_acq_rel,
111     _Py_memory_order_seq_cst
112 } _Py_memory_order;
113 
114 typedef struct _Py_atomic_address {
115     uintptr_t _value;
116 } _Py_atomic_address;
117 
118 typedef struct _Py_atomic_int {
119     int _value;
120 } _Py_atomic_int;
121 
122 
123 static __inline__ void
124 _Py_atomic_signal_fence(_Py_memory_order order)
125 {
126     if (order != _Py_memory_order_relaxed)
127         __asm__ volatile("":::"memory");
128 }
129 
130 static __inline__ void
131 _Py_atomic_thread_fence(_Py_memory_order order)
132 {
133     if (order != _Py_memory_order_relaxed)
134         __asm__ volatile("mfence":::"memory");
135 }
136 
137 /* Tell the race checker about this operation's effects. */
138 static __inline__ void
139 _Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order)
140 {
141     (void)address;              /* shut up -Wunused-parameter */
142     switch(order) {
143     case _Py_memory_order_release:
144     case _Py_memory_order_acq_rel:
145     case _Py_memory_order_seq_cst:
146         _Py_ANNOTATE_HAPPENS_BEFORE(address);
147         break;
148     case _Py_memory_order_relaxed:
149     case _Py_memory_order_acquire:
150         break;
151     }
152     switch(order) {
153     case _Py_memory_order_acquire:
154     case _Py_memory_order_acq_rel:
155     case _Py_memory_order_seq_cst:
156         _Py_ANNOTATE_HAPPENS_AFTER(address);
157         break;
158     case _Py_memory_order_relaxed:
159     case _Py_memory_order_release:
160         break;
161     }
162 }
163 
164 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
165     __extension__ ({ \
166         __typeof__(ATOMIC_VAL) atomic_val = ATOMIC_VAL; \
167         __typeof__(atomic_val->_value) new_val = NEW_VAL;\
168         volatile __typeof__(new_val) *volatile_data = &atomic_val->_value; \
169         _Py_memory_order order = ORDER; \
170         _Py_ANNOTATE_MEMORY_ORDER(atomic_val, order); \
171         \
172         /* Perform the operation. */ \
173         _Py_ANNOTATE_IGNORE_WRITES_BEGIN(); \
174         switch(order) { \
175         case _Py_memory_order_release: \
176             _Py_atomic_signal_fence(_Py_memory_order_release); \
177             /* fallthrough */ \
178         case _Py_memory_order_relaxed: \
179             *volatile_data = new_val; \
180             break; \
181         \
182         case _Py_memory_order_acquire: \
183         case _Py_memory_order_acq_rel: \
184         case _Py_memory_order_seq_cst: \
185             __asm__ volatile("xchg %0, %1" \
186                          : "+r"(new_val) \
187                          : "m"(atomic_val->_value) \
188                          : "memory"); \
189             break; \
190         } \
191         _Py_ANNOTATE_IGNORE_WRITES_END(); \
192     })
193 
194 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
195     __extension__ ({  \
196         __typeof__(ATOMIC_VAL) atomic_val = ATOMIC_VAL; \
197         __typeof__(atomic_val->_value) result; \
198         volatile __typeof__(result) *volatile_data = &atomic_val->_value; \
199         _Py_memory_order order = ORDER; \
200         _Py_ANNOTATE_MEMORY_ORDER(atomic_val, order); \
201         \
202         /* Perform the operation. */ \
203         _Py_ANNOTATE_IGNORE_READS_BEGIN(); \
204         switch(order) { \
205         case _Py_memory_order_release: \
206         case _Py_memory_order_acq_rel: \
207         case _Py_memory_order_seq_cst: \
208             /* Loads on x86 are not releases by default, so need a */ \
209             /* thread fence. */ \
210             _Py_atomic_thread_fence(_Py_memory_order_release); \
211             break; \
212         default: \
213             /* No fence */ \
214             break; \
215         } \
216         result = *volatile_data; \
217         switch(order) { \
218         case _Py_memory_order_acquire: \
219         case _Py_memory_order_acq_rel: \
220         case _Py_memory_order_seq_cst: \
221             /* Loads on x86 are automatically acquire operations so */ \
222             /* can get by with just a compiler fence. */ \
223             _Py_atomic_signal_fence(_Py_memory_order_acquire); \
224             break; \
225         default: \
226             /* No fence */ \
227             break; \
228         } \
229         _Py_ANNOTATE_IGNORE_READS_END(); \
230         result; \
231     })
232 
233 #elif defined(_MSC_VER)
234 /*  _Interlocked* functions provide a full memory barrier and are therefore
235     enough for acq_rel and seq_cst. If the HLE variants aren't available
236     in hardware they will fall back to a full memory barrier as well.
237 
238     This might affect performance but likely only in some very specific and
239     hard to meassure scenario.
240 */
241 #if defined(_M_IX86) || defined(_M_X64)
242 typedef enum _Py_memory_order {
243     _Py_memory_order_relaxed,
244     _Py_memory_order_acquire,
245     _Py_memory_order_release,
246     _Py_memory_order_acq_rel,
247     _Py_memory_order_seq_cst
248 } _Py_memory_order;
249 
250 typedef struct _Py_atomic_address {
251     volatile uintptr_t _value;
252 } _Py_atomic_address;
253 
254 typedef struct _Py_atomic_int {
255     volatile int _value;
256 } _Py_atomic_int;
257 
258 
259 #if defined(_M_X64)
260 #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \
261     switch (ORDER) { \
262     case _Py_memory_order_acquire: \
263       _InterlockedExchange64_HLEAcquire((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)(NEW_VAL)); \
264       break; \
265     case _Py_memory_order_release: \
266       _InterlockedExchange64_HLERelease((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)(NEW_VAL)); \
267       break; \
268     default: \
269       _InterlockedExchange64((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)(NEW_VAL)); \
270       break; \
271   }
272 #else
273 #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0);
274 #endif
275 
276 #define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \
277   switch (ORDER) { \
278   case _Py_memory_order_acquire: \
279     _InterlockedExchange_HLEAcquire((volatile long*)&((ATOMIC_VAL)->_value), (int)(NEW_VAL)); \
280     break; \
281   case _Py_memory_order_release: \
282     _InterlockedExchange_HLERelease((volatile long*)&((ATOMIC_VAL)->_value), (int)(NEW_VAL)); \
283     break; \
284   default: \
285     _InterlockedExchange((volatile long*)&((ATOMIC_VAL)->_value), (int)(NEW_VAL)); \
286     break; \
287   }
288 
289 #if defined(_M_X64)
290 /*  This has to be an intptr_t for now.
291     gil_created() uses -1 as a sentinel value, if this returns
292     a uintptr_t it will do an unsigned compare and crash
293 */
294 inline intptr_t _Py_atomic_load_64bit_impl(volatile uintptr_t* value, int order) {
295     __int64 old;
296     switch (order) {
297     case _Py_memory_order_acquire:
298     {
299       do {
300         old = *value;
301       } while(_InterlockedCompareExchange64_HLEAcquire((volatile __int64*)value, old, old) != old);
302       break;
303     }
304     case _Py_memory_order_release:
305     {
306       do {
307         old = *value;
308       } while(_InterlockedCompareExchange64_HLERelease((volatile __int64*)value, old, old) != old);
309       break;
310     }
311     case _Py_memory_order_relaxed:
312       old = *value;
313       break;
314     default:
315     {
316       do {
317         old = *value;
318       } while(_InterlockedCompareExchange64((volatile __int64*)value, old, old) != old);
319       break;
320     }
321     }
322     return old;
323 }
324 
325 #define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) \
326     _Py_atomic_load_64bit_impl((volatile uintptr_t*)&((ATOMIC_VAL)->_value), (ORDER))
327 
328 #else
329 #define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) ((ATOMIC_VAL)->_value)
330 #endif
331 
332 inline int _Py_atomic_load_32bit_impl(volatile int* value, int order) {
333     long old;
334     switch (order) {
335     case _Py_memory_order_acquire:
336     {
337       do {
338         old = *value;
339       } while(_InterlockedCompareExchange_HLEAcquire((volatile long*)value, old, old) != old);
340       break;
341     }
342     case _Py_memory_order_release:
343     {
344       do {
345         old = *value;
346       } while(_InterlockedCompareExchange_HLERelease((volatile long*)value, old, old) != old);
347       break;
348     }
349     case _Py_memory_order_relaxed:
350       old = *value;
351       break;
352     default:
353     {
354       do {
355         old = *value;
356       } while(_InterlockedCompareExchange((volatile long*)value, old, old) != old);
357       break;
358     }
359     }
360     return old;
361 }
362 
363 #define _Py_atomic_load_32bit(ATOMIC_VAL, ORDER) \
364     _Py_atomic_load_32bit_impl((volatile int*)&((ATOMIC_VAL)->_value), (ORDER))
365 
366 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
367   if (sizeof((ATOMIC_VAL)->_value) == 8) { \
368     _Py_atomic_store_64bit((ATOMIC_VAL), NEW_VAL, ORDER) } else { \
369     _Py_atomic_store_32bit((ATOMIC_VAL), NEW_VAL, ORDER) }
370 
371 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
372   ( \
373     sizeof((ATOMIC_VAL)->_value) == 8 ? \
374     _Py_atomic_load_64bit((ATOMIC_VAL), ORDER) : \
375     _Py_atomic_load_32bit((ATOMIC_VAL), ORDER) \
376   )
377 #elif defined(_M_ARM) || defined(_M_ARM64)
378 typedef enum _Py_memory_order {
379     _Py_memory_order_relaxed,
380     _Py_memory_order_acquire,
381     _Py_memory_order_release,
382     _Py_memory_order_acq_rel,
383     _Py_memory_order_seq_cst
384 } _Py_memory_order;
385 
386 typedef struct _Py_atomic_address {
387     volatile uintptr_t _value;
388 } _Py_atomic_address;
389 
390 typedef struct _Py_atomic_int {
391     volatile int _value;
392 } _Py_atomic_int;
393 
394 
395 #if defined(_M_ARM64)
396 #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \
397     switch (ORDER) { \
398     case _Py_memory_order_acquire: \
399       _InterlockedExchange64_acq((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)NEW_VAL); \
400       break; \
401     case _Py_memory_order_release: \
402       _InterlockedExchange64_rel((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)NEW_VAL); \
403       break; \
404     default: \
405       _InterlockedExchange64((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)NEW_VAL); \
406       break; \
407   }
408 #else
409 #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0);
410 #endif
411 
412 #define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \
413   switch (ORDER) { \
414   case _Py_memory_order_acquire: \
415     _InterlockedExchange_acq((volatile long*)&((ATOMIC_VAL)->_value), (int)NEW_VAL); \
416     break; \
417   case _Py_memory_order_release: \
418     _InterlockedExchange_rel((volatile long*)&((ATOMIC_VAL)->_value), (int)NEW_VAL); \
419     break; \
420   default: \
421     _InterlockedExchange((volatile long*)&((ATOMIC_VAL)->_value), (int)NEW_VAL); \
422     break; \
423   }
424 
425 #if defined(_M_ARM64)
426 /*  This has to be an intptr_t for now.
427     gil_created() uses -1 as a sentinel value, if this returns
428     a uintptr_t it will do an unsigned compare and crash
429 */
430 inline intptr_t _Py_atomic_load_64bit_impl(volatile uintptr_t* value, int order) {
431     uintptr_t old;
432     switch (order) {
433     case _Py_memory_order_acquire:
434     {
435       do {
436         old = *value;
437       } while(_InterlockedCompareExchange64_acq(value, old, old) != old);
438       break;
439     }
440     case _Py_memory_order_release:
441     {
442       do {
443         old = *value;
444       } while(_InterlockedCompareExchange64_rel(value, old, old) != old);
445       break;
446     }
447     case _Py_memory_order_relaxed:
448       old = *value;
449       break;
450     default:
451     {
452       do {
453         old = *value;
454       } while(_InterlockedCompareExchange64(value, old, old) != old);
455       break;
456     }
457     }
458     return old;
459 }
460 
461 #define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) \
462     _Py_atomic_load_64bit_impl((volatile uintptr_t*)&((ATOMIC_VAL)->_value), (ORDER))
463 
464 #else
465 #define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) ((ATOMIC_VAL)->_value)
466 #endif
467 
468 inline int _Py_atomic_load_32bit_impl(volatile int* value, int order) {
469     int old;
470     switch (order) {
471     case _Py_memory_order_acquire:
472     {
473       do {
474         old = *value;
475       } while(_InterlockedCompareExchange_acq(value, old, old) != old);
476       break;
477     }
478     case _Py_memory_order_release:
479     {
480       do {
481         old = *value;
482       } while(_InterlockedCompareExchange_rel(value, old, old) != old);
483       break;
484     }
485     case _Py_memory_order_relaxed:
486       old = *value;
487       break;
488     default:
489     {
490       do {
491         old = *value;
492       } while(_InterlockedCompareExchange(value, old, old) != old);
493       break;
494     }
495     }
496     return old;
497 }
498 
499 #define _Py_atomic_load_32bit(ATOMIC_VAL, ORDER) \
500     _Py_atomic_load_32bit_impl((volatile int*)&((ATOMIC_VAL)->_value), (ORDER))
501 
502 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
503   if (sizeof((ATOMIC_VAL)->_value) == 8) { \
504     _Py_atomic_store_64bit((ATOMIC_VAL), (NEW_VAL), (ORDER)) } else { \
505     _Py_atomic_store_32bit((ATOMIC_VAL), (NEW_VAL), (ORDER)) }
506 
507 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
508   ( \
509     sizeof((ATOMIC_VAL)->_value) == 8 ? \
510     _Py_atomic_load_64bit((ATOMIC_VAL), (ORDER)) : \
511     _Py_atomic_load_32bit((ATOMIC_VAL), (ORDER)) \
512   )
513 #endif
514 #else  /* !gcc x86  !_msc_ver */
515 typedef enum _Py_memory_order {
516     _Py_memory_order_relaxed,
517     _Py_memory_order_acquire,
518     _Py_memory_order_release,
519     _Py_memory_order_acq_rel,
520     _Py_memory_order_seq_cst
521 } _Py_memory_order;
522 
523 typedef struct _Py_atomic_address {
524     uintptr_t _value;
525 } _Py_atomic_address;
526 
527 typedef struct _Py_atomic_int {
528     int _value;
529 } _Py_atomic_int;
530 /* Fall back to other compilers and processors by assuming that simple
531    volatile accesses are atomic.  This is false, so people should port
532    this. */
533 #define _Py_atomic_signal_fence(/*memory_order*/ ORDER) ((void)0)
534 #define _Py_atomic_thread_fence(/*memory_order*/ ORDER) ((void)0)
535 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
536     ((ATOMIC_VAL)->_value = NEW_VAL)
537 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
538     ((ATOMIC_VAL)->_value)
539 #endif
540 
541 /* Standardized shortcuts. */
542 #define _Py_atomic_store(ATOMIC_VAL, NEW_VAL) \
543     _Py_atomic_store_explicit((ATOMIC_VAL), (NEW_VAL), _Py_memory_order_seq_cst)
544 #define _Py_atomic_load(ATOMIC_VAL) \
545     _Py_atomic_load_explicit((ATOMIC_VAL), _Py_memory_order_seq_cst)
546 
547 /* Python-local extensions */
548 
549 #define _Py_atomic_store_relaxed(ATOMIC_VAL, NEW_VAL) \
550     _Py_atomic_store_explicit((ATOMIC_VAL), (NEW_VAL), _Py_memory_order_relaxed)
551 #define _Py_atomic_load_relaxed(ATOMIC_VAL) \
552     _Py_atomic_load_explicit((ATOMIC_VAL), _Py_memory_order_relaxed)
553 
554 #ifdef __cplusplus
555 }
556 #endif
557 #endif  /* Py_ATOMIC_H */
558