1 //===------------ sync.cu - GPU OpenMP synchronizations ---------- CUDA -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Include all synchronization.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "common/omptarget.h"
14 #include "target_impl.h"
15 
16 ////////////////////////////////////////////////////////////////////////////////
17 // KMP Ordered calls
18 ////////////////////////////////////////////////////////////////////////////////
19 
__kmpc_ordered(kmp_Ident * loc,int32_t tid)20 EXTERN void __kmpc_ordered(kmp_Ident *loc, int32_t tid) {
21   PRINT0(LD_IO, "call kmpc_ordered\n");
22 }
23 
__kmpc_end_ordered(kmp_Ident * loc,int32_t tid)24 EXTERN void __kmpc_end_ordered(kmp_Ident *loc, int32_t tid) {
25   PRINT0(LD_IO, "call kmpc_end_ordered\n");
26 }
27 
28 ////////////////////////////////////////////////////////////////////////////////
29 // KMP Barriers
30 ////////////////////////////////////////////////////////////////////////////////
31 
32 // a team is a block: we can use CUDA native synchronization mechanism
33 // FIXME: what if not all threads (warps) participate to the barrier?
34 // We may need to implement it differently
35 
__kmpc_cancel_barrier(kmp_Ident * loc_ref,int32_t tid)36 EXTERN int32_t __kmpc_cancel_barrier(kmp_Ident *loc_ref, int32_t tid) {
37   PRINT0(LD_IO, "call kmpc_cancel_barrier\n");
38   __kmpc_barrier(loc_ref, tid);
39   PRINT0(LD_SYNC, "completed kmpc_cancel_barrier\n");
40   return 0;
41 }
42 
__kmpc_barrier(kmp_Ident * loc_ref,int32_t tid)43 EXTERN void __kmpc_barrier(kmp_Ident *loc_ref, int32_t tid) {
44   if (checkRuntimeUninitialized(loc_ref)) {
45     ASSERT0(LT_FUSSY, checkSPMDMode(loc_ref),
46             "Expected SPMD mode with uninitialized runtime.");
47     __kmpc_barrier_simple_spmd(loc_ref, tid);
48   } else {
49     tid = GetLogicalThreadIdInBlock(checkSPMDMode(loc_ref));
50     int numberOfActiveOMPThreads =
51         GetNumberOfOmpThreads(checkSPMDMode(loc_ref));
52     if (numberOfActiveOMPThreads > 1) {
53       if (checkSPMDMode(loc_ref)) {
54         __kmpc_barrier_simple_spmd(loc_ref, tid);
55       } else {
56         // The #threads parameter must be rounded up to the WARPSIZE.
57         int threads =
58             WARPSIZE * ((numberOfActiveOMPThreads + WARPSIZE - 1) / WARPSIZE);
59 
60         PRINT(LD_SYNC,
61               "call kmpc_barrier with %d omp threads, sync parameter %d\n",
62               (int)numberOfActiveOMPThreads, (int)threads);
63         __kmpc_impl_named_sync(threads);
64       }
65     } else {
66       // Still need to flush the memory per the standard.
67       __kmpc_flush(loc_ref);
68     } // numberOfActiveOMPThreads > 1
69     PRINT0(LD_SYNC, "completed kmpc_barrier\n");
70   }
71 }
72 
73 // Emit a simple barrier call in SPMD mode.  Assumes the caller is in an L0
74 // parallel region and that all worker threads participate.
__kmpc_barrier_simple_spmd(kmp_Ident * loc_ref,int32_t tid)75 EXTERN void __kmpc_barrier_simple_spmd(kmp_Ident *loc_ref, int32_t tid) {
76   PRINT0(LD_SYNC, "call kmpc_barrier_simple_spmd\n");
77   __kmpc_impl_syncthreads();
78   PRINT0(LD_SYNC, "completed kmpc_barrier_simple_spmd\n");
79 }
80 
81 ////////////////////////////////////////////////////////////////////////////////
82 // KMP MASTER
83 ////////////////////////////////////////////////////////////////////////////////
84 
__kmpc_master(kmp_Ident * loc,int32_t global_tid)85 EXTERN int32_t __kmpc_master(kmp_Ident *loc, int32_t global_tid) {
86   PRINT0(LD_IO, "call kmpc_master\n");
87   return IsTeamMaster(global_tid);
88 }
89 
__kmpc_end_master(kmp_Ident * loc,int32_t global_tid)90 EXTERN void __kmpc_end_master(kmp_Ident *loc, int32_t global_tid) {
91   PRINT0(LD_IO, "call kmpc_end_master\n");
92   ASSERT0(LT_FUSSY, IsTeamMaster(global_tid), "expected only master here");
93 }
94 
95 ////////////////////////////////////////////////////////////////////////////////
96 // KMP SINGLE
97 ////////////////////////////////////////////////////////////////////////////////
98 
__kmpc_single(kmp_Ident * loc,int32_t global_tid)99 EXTERN int32_t __kmpc_single(kmp_Ident *loc, int32_t global_tid) {
100   PRINT0(LD_IO, "call kmpc_single\n");
101   // decide to implement single with master; master get the single
102   return IsTeamMaster(global_tid);
103 }
104 
__kmpc_end_single(kmp_Ident * loc,int32_t global_tid)105 EXTERN void __kmpc_end_single(kmp_Ident *loc, int32_t global_tid) {
106   PRINT0(LD_IO, "call kmpc_end_single\n");
107   // decide to implement single with master: master get the single
108   ASSERT0(LT_FUSSY, IsTeamMaster(global_tid), "expected only master here");
109   // sync barrier is explicitly called... so that is not a problem
110 }
111 
112 ////////////////////////////////////////////////////////////////////////////////
113 // Flush
114 ////////////////////////////////////////////////////////////////////////////////
115 
__kmpc_flush(kmp_Ident * loc)116 EXTERN void __kmpc_flush(kmp_Ident *loc) {
117   PRINT0(LD_IO, "call kmpc_flush\n");
118   __kmpc_impl_threadfence();
119 }
120 
121 ////////////////////////////////////////////////////////////////////////////////
122 // Vote
123 ////////////////////////////////////////////////////////////////////////////////
124 
__kmpc_warp_active_thread_mask()125 EXTERN __kmpc_impl_lanemask_t __kmpc_warp_active_thread_mask() {
126   PRINT0(LD_IO, "call __kmpc_warp_active_thread_mask\n");
127   return __kmpc_impl_activemask();
128 }
129 
130 ////////////////////////////////////////////////////////////////////////////////
131 // Syncwarp
132 ////////////////////////////////////////////////////////////////////////////////
133 
__kmpc_syncwarp(__kmpc_impl_lanemask_t Mask)134 EXTERN void __kmpc_syncwarp(__kmpc_impl_lanemask_t Mask) {
135   PRINT0(LD_IO, "call __kmpc_syncwarp\n");
136   __kmpc_impl_syncwarp(Mask);
137 }
138