1 //===----------------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #ifndef LIBCXXABI_SRC_INCLUDE_CXA_GUARD_IMPL_H
9 #define LIBCXXABI_SRC_INCLUDE_CXA_GUARD_IMPL_H
10 
11 /* cxa_guard_impl.h - Implements the C++ runtime support for function local
12  * static guards.
13  * The layout of the guard object is the same across ARM and Itanium.
14  *
15  * The first "guard byte" (which is checked by the compiler) is set only upon
16  * the completion of cxa release.
17  *
18  * The second "init byte" does the rest of the bookkeeping. It tracks if
19  * initialization is complete or pending, and if there are waiting threads.
20  *
21  * If the guard variable is 64-bits and the platforms supplies a 32-bit thread
22  * identifier, it is used to detect recursive initialization. The thread ID of
23  * the thread currently performing initialization is stored in the second word.
24  *
25  *  Guard Object Layout:
26  * -------------------------------------------------------------------------
27  * |a: guard byte | a+1: init byte | a+2 : unused ... | a+4: thread-id ... |
28  * ------------------------------------------------------------------------
29  *
30  *  Access Protocol:
31  *    For each implementation the guard byte is checked and set before accessing
32  *    the init byte.
33  *
34  *  Overall Design:
35  *    The implementation was designed to allow each implementation to be tested
36  *    independent of the C++ runtime or platform support.
37  *
38  */
39 
40 #include "__cxxabi_config.h"
41 #include "include/atomic_support.h"
42 #include <unistd.h>
43 #if defined(__has_include)
44 # if __has_include(<sys/syscall.h>)
45 #   include <sys/syscall.h>
46 # endif
47 #endif
48 
49 #include <stdlib.h>
50 #include <__threading_support>
51 #ifndef _LIBCXXABI_HAS_NO_THREADS
52 #if defined(__ELF__) && defined(_LIBCXXABI_LINK_PTHREAD_LIB)
53 #pragma comment(lib, "pthread")
54 #endif
55 #endif
56 
57 #if defined(__clang__)
58 # pragma clang diagnostic push
59 # pragma clang diagnostic ignored "-Wtautological-pointer-compare"
60 #elif defined(__GNUC__)
61 # pragma GCC diagnostic push
62 # pragma GCC diagnostic ignored "-Waddress"
63 #endif
64 
65 // To make testing possible, this header is included from both cxa_guard.cpp
66 // and a number of tests.
67 //
68 // For this reason we place everything in an anonymous namespace -- even though
69 // we're in a header. We want the actual implementation and the tests to have
70 // unique definitions of the types in this header (since the tests may depend
71 // on function local statics).
72 //
73 // To enforce this either `BUILDING_CXA_GUARD` or `TESTING_CXA_GUARD` must be
74 // defined when including this file. Only `src/cxa_guard.cpp` should define
75 // the former.
76 #ifdef BUILDING_CXA_GUARD
77 # include "abort_message.h"
78 # define ABORT_WITH_MESSAGE(...) ::abort_message(__VA_ARGS__)
79 #elif defined(TESTING_CXA_GUARD)
80 # define ABORT_WITH_MESSAGE(...) ::abort()
81 #else
82 # error "Either BUILDING_CXA_GUARD or TESTING_CXA_GUARD must be defined"
83 #endif
84 
85 #if __has_feature(thread_sanitizer)
86 extern "C" void __tsan_acquire(void*);
87 extern "C" void __tsan_release(void*);
88 #else
89 #define __tsan_acquire(addr) ((void)0)
90 #define __tsan_release(addr) ((void)0)
91 #endif
92 
93 namespace __cxxabiv1 {
94 // Use an anonymous namespace to ensure that the tests and actual implementation
95 // have unique definitions of these symbols.
96 namespace {
97 
98 //===----------------------------------------------------------------------===//
99 //                          Misc Utilities
100 //===----------------------------------------------------------------------===//
101 
102 template <class T, T(*Init)()>
103 struct LazyValue {
LazyValueLazyValue104   LazyValue() : is_init(false) {}
105 
getLazyValue106   T& get() {
107     if (!is_init) {
108       value = Init();
109       is_init = true;
110     }
111     return value;
112   }
113  private:
114   T value;
115   bool is_init = false;
116 };
117 
118 template <class IntType>
119 class AtomicInt {
120 public:
121   using MemoryOrder = std::__libcpp_atomic_order;
122 
AtomicInt(IntType * b)123   explicit AtomicInt(IntType *b) : b_(b) {}
124   AtomicInt(AtomicInt const&) = delete;
125   AtomicInt& operator=(AtomicInt const&) = delete;
126 
load(MemoryOrder ord)127   IntType load(MemoryOrder ord) {
128     return std::__libcpp_atomic_load(b_, ord);
129   }
store(IntType val,MemoryOrder ord)130   void store(IntType val, MemoryOrder ord) {
131     std::__libcpp_atomic_store(b_, val, ord);
132   }
exchange(IntType new_val,MemoryOrder ord)133   IntType exchange(IntType new_val, MemoryOrder ord) {
134     return std::__libcpp_atomic_exchange(b_, new_val, ord);
135   }
compare_exchange(IntType * expected,IntType desired,MemoryOrder ord_success,MemoryOrder ord_failure)136   bool compare_exchange(IntType *expected, IntType desired, MemoryOrder ord_success, MemoryOrder ord_failure) {
137     return std::__libcpp_atomic_compare_exchange(b_, expected, desired, ord_success, ord_failure);
138   }
139 
140 private:
141   IntType *b_;
142 };
143 
144 //===----------------------------------------------------------------------===//
145 //                       PlatformGetThreadID
146 //===----------------------------------------------------------------------===//
147 
148 #if defined(__APPLE__) && defined(_LIBCPP_HAS_THREAD_API_PTHREAD)
PlatformThreadID()149 uint32_t PlatformThreadID() {
150   static_assert(sizeof(mach_port_t) == sizeof(uint32_t), "");
151   return static_cast<uint32_t>(
152       pthread_mach_thread_np(std::__libcpp_thread_get_current_id()));
153 }
154 #elif defined(SYS_gettid) && defined(_LIBCPP_HAS_THREAD_API_PTHREAD)
PlatformThreadID()155 uint32_t PlatformThreadID() {
156   static_assert(sizeof(pid_t) == sizeof(uint32_t), "");
157   return static_cast<uint32_t>(syscall(SYS_gettid));
158 }
159 #else
160 constexpr uint32_t (*PlatformThreadID)() = nullptr;
161 #endif
162 
163 
PlatformSupportsThreadID()164 constexpr bool PlatformSupportsThreadID() {
165   return +PlatformThreadID != nullptr;
166 }
167 
168 //===----------------------------------------------------------------------===//
169 //                          GuardBase
170 //===----------------------------------------------------------------------===//
171 
172 enum class AcquireResult {
173   INIT_IS_DONE,
174   INIT_IS_PENDING,
175 };
176 constexpr AcquireResult INIT_IS_DONE = AcquireResult::INIT_IS_DONE;
177 constexpr AcquireResult INIT_IS_PENDING = AcquireResult::INIT_IS_PENDING;
178 
179 static constexpr uint8_t UNSET = 0;
180 static constexpr uint8_t COMPLETE_BIT = (1 << 0);
181 static constexpr uint8_t PENDING_BIT = (1 << 1);
182 static constexpr uint8_t WAITING_BIT = (1 << 2);
183 
184 template <class Derived>
185 struct GuardObject {
186   GuardObject() = delete;
187   GuardObject(GuardObject const&) = delete;
188   GuardObject& operator=(GuardObject const&) = delete;
189 
GuardObjectGuardObject190   explicit GuardObject(uint32_t* g)
191       : base_address(g), guard_byte_address(reinterpret_cast<uint8_t*>(g)),
192         init_byte_address(reinterpret_cast<uint8_t*>(g) + 1),
193         thread_id_address(nullptr) {}
194 
GuardObjectGuardObject195   explicit GuardObject(uint64_t* g)
196       : base_address(g), guard_byte_address(reinterpret_cast<uint8_t*>(g)),
197         init_byte_address(reinterpret_cast<uint8_t*>(g) + 1),
198         thread_id_address(reinterpret_cast<uint32_t*>(g) + 1) {}
199 
200 public:
201   /// Implements __cxa_guard_acquire
cxa_guard_acquireGuardObject202   AcquireResult cxa_guard_acquire() {
203     AtomicInt<uint8_t> guard_byte(guard_byte_address);
204     if (guard_byte.load(std::_AO_Acquire) != UNSET)
205       return INIT_IS_DONE;
206     return derived()->acquire_init_byte();
207   }
208 
209   /// Implements __cxa_guard_release
cxa_guard_releaseGuardObject210   void cxa_guard_release() {
211     AtomicInt<uint8_t> guard_byte(guard_byte_address);
212     // Store complete first, so that when release wakes other folks, they see
213     // it as having been completed.
214     guard_byte.store(COMPLETE_BIT, std::_AO_Release);
215     derived()->release_init_byte();
216   }
217 
218   /// Implements __cxa_guard_abort
cxa_guard_abortGuardObject219   void cxa_guard_abort() { derived()->abort_init_byte(); }
220 
221 public:
222   /// base_address - the address of the original guard object.
223   void* const base_address;
224   /// The address of the guard byte at offset 0.
225   uint8_t* const guard_byte_address;
226   /// The address of the byte used by the implementation during initialization.
227   uint8_t* const init_byte_address;
228   /// An optional address storing an identifier for the thread performing initialization.
229   /// It's used to detect recursive initialization.
230   uint32_t* const thread_id_address;
231 
232 private:
derivedGuardObject233   Derived* derived() { return static_cast<Derived*>(this); }
234 };
235 
236 //===----------------------------------------------------------------------===//
237 //                    Single Threaded Implementation
238 //===----------------------------------------------------------------------===//
239 
240 struct InitByteNoThreads : GuardObject<InitByteNoThreads> {
241   using GuardObject::GuardObject;
242 
acquire_init_byteInitByteNoThreads243   AcquireResult acquire_init_byte() {
244     if (*init_byte_address == COMPLETE_BIT)
245       return INIT_IS_DONE;
246     if (*init_byte_address & PENDING_BIT)
247       ABORT_WITH_MESSAGE("__cxa_guard_acquire detected recursive initialization");
248     *init_byte_address = PENDING_BIT;
249     return INIT_IS_PENDING;
250   }
251 
release_init_byteInitByteNoThreads252   void release_init_byte() { *init_byte_address = COMPLETE_BIT; }
abort_init_byteInitByteNoThreads253   void abort_init_byte() { *init_byte_address = UNSET; }
254 };
255 
256 
257 //===----------------------------------------------------------------------===//
258 //                     Global Mutex Implementation
259 //===----------------------------------------------------------------------===//
260 
261 struct LibcppMutex;
262 struct LibcppCondVar;
263 
264 #ifndef _LIBCXXABI_HAS_NO_THREADS
265 struct LibcppMutex {
266   LibcppMutex() = default;
267   LibcppMutex(LibcppMutex const&) = delete;
268   LibcppMutex& operator=(LibcppMutex const&) = delete;
269 
lockLibcppMutex270   bool lock() { return std::__libcpp_mutex_lock(&mutex); }
unlockLibcppMutex271   bool unlock() { return std::__libcpp_mutex_unlock(&mutex); }
272 
273 private:
274   friend struct LibcppCondVar;
275   std::__libcpp_mutex_t mutex = _LIBCPP_MUTEX_INITIALIZER;
276 };
277 
278 struct LibcppCondVar {
279   LibcppCondVar() = default;
280   LibcppCondVar(LibcppCondVar const&) = delete;
281   LibcppCondVar& operator=(LibcppCondVar const&) = delete;
282 
waitLibcppCondVar283   bool wait(LibcppMutex& mut) {
284     return std::__libcpp_condvar_wait(&cond, &mut.mutex);
285   }
broadcastLibcppCondVar286   bool broadcast() { return std::__libcpp_condvar_broadcast(&cond); }
287 
288 private:
289   std::__libcpp_condvar_t cond = _LIBCPP_CONDVAR_INITIALIZER;
290 };
291 #else
292 struct LibcppMutex {};
293 struct LibcppCondVar {};
294 #endif // !defined(_LIBCXXABI_HAS_NO_THREADS)
295 
296 
297 template <class Mutex, class CondVar, Mutex& global_mutex, CondVar& global_cond,
298           uint32_t (*GetThreadID)() = PlatformThreadID>
299 struct InitByteGlobalMutex
300     : GuardObject<InitByteGlobalMutex<Mutex, CondVar, global_mutex, global_cond,
301                                     GetThreadID>> {
302 
303   using BaseT = typename InitByteGlobalMutex::GuardObject;
304   using BaseT::BaseT;
305 
InitByteGlobalMutexInitByteGlobalMutex306   explicit InitByteGlobalMutex(uint32_t *g)
307     : BaseT(g), has_thread_id_support(false) {}
InitByteGlobalMutexInitByteGlobalMutex308   explicit InitByteGlobalMutex(uint64_t *g)
309     : BaseT(g), has_thread_id_support(PlatformSupportsThreadID()) {}
310 
311 public:
acquire_init_byteInitByteGlobalMutex312   AcquireResult acquire_init_byte() {
313     LockGuard g("__cxa_guard_acquire");
314     // Check for possible recursive initialization.
315     if (has_thread_id_support && (*init_byte_address & PENDING_BIT)) {
316       if (*thread_id_address == current_thread_id.get())
317        ABORT_WITH_MESSAGE("__cxa_guard_acquire detected recursive initialization");
318     }
319 
320     // Wait until the pending bit is not set.
321     while (*init_byte_address & PENDING_BIT) {
322       *init_byte_address |= WAITING_BIT;
323       global_cond.wait(global_mutex);
324     }
325 
326     if (*init_byte_address == COMPLETE_BIT)
327       return INIT_IS_DONE;
328 
329     if (has_thread_id_support)
330       *thread_id_address = current_thread_id.get();
331 
332     *init_byte_address = PENDING_BIT;
333     return INIT_IS_PENDING;
334   }
335 
release_init_byteInitByteGlobalMutex336   void release_init_byte() {
337     bool has_waiting;
338     {
339       LockGuard g("__cxa_guard_release");
340       has_waiting = *init_byte_address & WAITING_BIT;
341       *init_byte_address = COMPLETE_BIT;
342     }
343     if (has_waiting) {
344       if (global_cond.broadcast()) {
345         ABORT_WITH_MESSAGE("%s failed to broadcast", "__cxa_guard_release");
346       }
347     }
348   }
349 
abort_init_byteInitByteGlobalMutex350   void abort_init_byte() {
351     bool has_waiting;
352     {
353       LockGuard g("__cxa_guard_abort");
354       if (has_thread_id_support)
355         *thread_id_address = 0;
356       has_waiting = *init_byte_address & WAITING_BIT;
357       *init_byte_address = UNSET;
358     }
359     if (has_waiting) {
360       if (global_cond.broadcast()) {
361         ABORT_WITH_MESSAGE("%s failed to broadcast", "__cxa_guard_abort");
362       }
363     }
364   }
365 
366 private:
367   using BaseT::init_byte_address;
368   using BaseT::thread_id_address;
369   const bool has_thread_id_support;
370   LazyValue<uint32_t, GetThreadID> current_thread_id;
371 
372 private:
373   struct LockGuard {
374     LockGuard() = delete;
375     LockGuard(LockGuard const&) = delete;
376     LockGuard& operator=(LockGuard const&) = delete;
377 
LockGuardInitByteGlobalMutex::LockGuard378     explicit LockGuard(const char* calling_func)
379         : calling_func_(calling_func)  {
380       if (global_mutex.lock())
381         ABORT_WITH_MESSAGE("%s failed to acquire mutex", calling_func_);
382     }
383 
~LockGuardInitByteGlobalMutex::LockGuard384     ~LockGuard() {
385       if (global_mutex.unlock())
386         ABORT_WITH_MESSAGE("%s failed to release mutex", calling_func_);
387     }
388 
389   private:
390     const char* const calling_func_;
391   };
392 };
393 
394 //===----------------------------------------------------------------------===//
395 //                         Futex Implementation
396 //===----------------------------------------------------------------------===//
397 
398 #if defined(SYS_futex)
PlatformFutexWait(int * addr,int expect)399 void PlatformFutexWait(int* addr, int expect) {
400   constexpr int WAIT = 0;
401   syscall(SYS_futex, addr, WAIT, expect, 0);
402   __tsan_acquire(addr);
403 }
PlatformFutexWake(int * addr)404 void PlatformFutexWake(int* addr) {
405   constexpr int WAKE = 1;
406   __tsan_release(addr);
407   syscall(SYS_futex, addr, WAKE, INT_MAX);
408 }
409 #else
410 constexpr void (*PlatformFutexWait)(int*, int) = nullptr;
411 constexpr void (*PlatformFutexWake)(int*) = nullptr;
412 #endif
413 
PlatformSupportsFutex()414 constexpr bool PlatformSupportsFutex() {
415   return +PlatformFutexWait != nullptr;
416 }
417 
418 /// InitByteFutex - Manages initialization using atomics and the futex syscall
419 /// for waiting and waking.
420 template <void (*Wait)(int*, int) = PlatformFutexWait,
421           void (*Wake)(int*) = PlatformFutexWake,
422           uint32_t (*GetThreadIDArg)() = PlatformThreadID>
423 struct InitByteFutex : GuardObject<InitByteFutex<Wait, Wake, GetThreadIDArg>> {
424   using BaseT = typename InitByteFutex::GuardObject;
425 
426   /// ARM Constructor
InitByteFutexInitByteFutex427   explicit InitByteFutex(uint32_t *g) : BaseT(g),
428     init_byte(this->init_byte_address),
429     has_thread_id_support(this->thread_id_address && GetThreadIDArg),
430     thread_id(this->thread_id_address) {}
431 
432   /// Itanium Constructor
InitByteFutexInitByteFutex433   explicit InitByteFutex(uint64_t *g) : BaseT(g),
434     init_byte(this->init_byte_address),
435     has_thread_id_support(this->thread_id_address && GetThreadIDArg),
436     thread_id(this->thread_id_address) {}
437 
438 public:
acquire_init_byteInitByteFutex439   AcquireResult acquire_init_byte() {
440     while (true) {
441       uint8_t last_val = UNSET;
442       if (init_byte.compare_exchange(&last_val, PENDING_BIT, std::_AO_Acq_Rel,
443                                      std::_AO_Acquire)) {
444         if (has_thread_id_support) {
445           thread_id.store(current_thread_id.get(), std::_AO_Relaxed);
446         }
447         return INIT_IS_PENDING;
448       }
449 
450       if (last_val == COMPLETE_BIT)
451         return INIT_IS_DONE;
452 
453       if (last_val & PENDING_BIT) {
454 
455         // Check for recursive initialization
456         if (has_thread_id_support && thread_id.load(std::_AO_Relaxed) == current_thread_id.get()) {
457             ABORT_WITH_MESSAGE("__cxa_guard_acquire detected recursive initialization");
458         }
459 
460         if ((last_val & WAITING_BIT) == 0) {
461           // This compare exchange can fail for several reasons
462           // (1) another thread finished the whole thing before we got here
463           // (2) another thread set the waiting bit we were trying to thread
464           // (3) another thread had an exception and failed to finish
465           if (!init_byte.compare_exchange(&last_val, PENDING_BIT | WAITING_BIT,
466                                           std::_AO_Acq_Rel, std::_AO_Release)) {
467             // (1) success, via someone else's work!
468             if (last_val == COMPLETE_BIT)
469               return INIT_IS_DONE;
470 
471             // (3) someone else, bailed on doing the work, retry from the start!
472             if (last_val == UNSET)
473               continue;
474 
475             // (2) the waiting bit got set, so we are happy to keep waiting
476           }
477         }
478         wait_on_initialization();
479       }
480     }
481   }
482 
release_init_byteInitByteFutex483   void release_init_byte() {
484     uint8_t old = init_byte.exchange(COMPLETE_BIT, std::_AO_Acq_Rel);
485     if (old & WAITING_BIT)
486       wake_all();
487   }
488 
abort_init_byteInitByteFutex489   void abort_init_byte() {
490     if (has_thread_id_support)
491       thread_id.store(0, std::_AO_Relaxed);
492 
493     uint8_t old = init_byte.exchange(0, std::_AO_Acq_Rel);
494     if (old & WAITING_BIT)
495       wake_all();
496   }
497 
498 private:
499   /// Use the futex to wait on the current guard variable. Futex expects a
500   /// 32-bit 4-byte aligned address as the first argument, so we have to use use
501   /// the base address of the guard variable (not the init byte).
wait_on_initializationInitByteFutex502   void wait_on_initialization() {
503     Wait(static_cast<int*>(this->base_address),
504          expected_value_for_futex(PENDING_BIT | WAITING_BIT));
505   }
wake_allInitByteFutex506   void wake_all() { Wake(static_cast<int*>(this->base_address)); }
507 
508 private:
509   AtomicInt<uint8_t> init_byte;
510 
511   const bool has_thread_id_support;
512   // Unsafe to use unless has_thread_id_support
513   AtomicInt<uint32_t> thread_id;
514   LazyValue<uint32_t, GetThreadIDArg> current_thread_id;
515 
516   /// Create the expected integer value for futex `wait(int* addr, int expected)`.
517   /// We pass the base address as the first argument, So this function creates
518   /// an zero-initialized integer  with `b` copied at the correct offset.
expected_value_for_futexInitByteFutex519   static int expected_value_for_futex(uint8_t b) {
520     int dest_val = 0;
521     std::memcpy(reinterpret_cast<char*>(&dest_val) + 1, &b, 1);
522     return dest_val;
523   }
524 
525   static_assert(Wait != nullptr && Wake != nullptr, "");
526 };
527 
528 //===----------------------------------------------------------------------===//
529 //
530 //===----------------------------------------------------------------------===//
531 
532 template <class T>
533 struct GlobalStatic {
534   static T instance;
535 };
536 template <class T>
537 _LIBCPP_SAFE_STATIC T GlobalStatic<T>::instance = {};
538 
539 enum class Implementation {
540   NoThreads,
541   GlobalLock,
542   Futex
543 };
544 
545 template <Implementation Impl>
546 struct SelectImplementation;
547 
548 template <>
549 struct SelectImplementation<Implementation::NoThreads> {
550   using type = InitByteNoThreads;
551 };
552 
553 template <>
554 struct SelectImplementation<Implementation::GlobalLock> {
555   using type = InitByteGlobalMutex<
556       LibcppMutex, LibcppCondVar, GlobalStatic<LibcppMutex>::instance,
557       GlobalStatic<LibcppCondVar>::instance, PlatformThreadID>;
558 };
559 
560 template <>
561 struct SelectImplementation<Implementation::Futex> {
562   using type =
563       InitByteFutex<PlatformFutexWait, PlatformFutexWake, PlatformThreadID>;
564 };
565 
566 // TODO(EricWF): We should prefer the futex implementation when available. But
567 // it should be done in a separate step from adding the implementation.
568 constexpr Implementation CurrentImplementation =
569 #if defined(_LIBCXXABI_HAS_NO_THREADS)
570     Implementation::NoThreads;
571 #elif defined(_LIBCXXABI_USE_FUTEX)
572     Implementation::Futex;
573 #else
574    Implementation::GlobalLock;
575 #endif
576 
577 static_assert(CurrentImplementation != Implementation::Futex
578            || PlatformSupportsFutex(), "Futex selected but not supported");
579 
580 using SelectedImplementation =
581     SelectImplementation<CurrentImplementation>::type;
582 
583 } // end namespace
584 } // end namespace __cxxabiv1
585 
586 #if defined(__clang__)
587 # pragma clang diagnostic pop
588 #elif defined(__GNUC__)
589 # pragma GCC diagnostic pop
590 #endif
591 
592 #endif // LIBCXXABI_SRC_INCLUDE_CXA_GUARD_IMPL_H
593