1 //===------------ sync.cu - GPU OpenMP synchronizations ---------- CUDA -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Include all synchronization.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "common/omptarget.h"
14 #include "target_impl.h"
15
16 ////////////////////////////////////////////////////////////////////////////////
17 // KMP Ordered calls
18 ////////////////////////////////////////////////////////////////////////////////
19
__kmpc_ordered(kmp_Ident * loc,int32_t tid)20 EXTERN void __kmpc_ordered(kmp_Ident *loc, int32_t tid) {
21 PRINT0(LD_IO, "call kmpc_ordered\n");
22 }
23
__kmpc_end_ordered(kmp_Ident * loc,int32_t tid)24 EXTERN void __kmpc_end_ordered(kmp_Ident *loc, int32_t tid) {
25 PRINT0(LD_IO, "call kmpc_end_ordered\n");
26 }
27
28 ////////////////////////////////////////////////////////////////////////////////
29 // KMP Barriers
30 ////////////////////////////////////////////////////////////////////////////////
31
32 // a team is a block: we can use CUDA native synchronization mechanism
33 // FIXME: what if not all threads (warps) participate to the barrier?
34 // We may need to implement it differently
35
__kmpc_cancel_barrier(kmp_Ident * loc_ref,int32_t tid)36 EXTERN int32_t __kmpc_cancel_barrier(kmp_Ident *loc_ref, int32_t tid) {
37 PRINT0(LD_IO, "call kmpc_cancel_barrier\n");
38 __kmpc_barrier(loc_ref, tid);
39 PRINT0(LD_SYNC, "completed kmpc_cancel_barrier\n");
40 return 0;
41 }
42
__kmpc_barrier(kmp_Ident * loc_ref,int32_t tid)43 EXTERN void __kmpc_barrier(kmp_Ident *loc_ref, int32_t tid) {
44 if (checkRuntimeUninitialized(loc_ref)) {
45 ASSERT0(LT_FUSSY, checkSPMDMode(loc_ref),
46 "Expected SPMD mode with uninitialized runtime.");
47 __kmpc_barrier_simple_spmd(loc_ref, tid);
48 } else {
49 tid = GetLogicalThreadIdInBlock(checkSPMDMode(loc_ref));
50 int numberOfActiveOMPThreads =
51 GetNumberOfOmpThreads(checkSPMDMode(loc_ref));
52 if (numberOfActiveOMPThreads > 1) {
53 if (checkSPMDMode(loc_ref)) {
54 __kmpc_barrier_simple_spmd(loc_ref, tid);
55 } else {
56 // The #threads parameter must be rounded up to the WARPSIZE.
57 int threads =
58 WARPSIZE * ((numberOfActiveOMPThreads + WARPSIZE - 1) / WARPSIZE);
59
60 PRINT(LD_SYNC,
61 "call kmpc_barrier with %d omp threads, sync parameter %d\n",
62 (int)numberOfActiveOMPThreads, (int)threads);
63 __kmpc_impl_named_sync(threads);
64 }
65 } else {
66 // Still need to flush the memory per the standard.
67 __kmpc_flush(loc_ref);
68 } // numberOfActiveOMPThreads > 1
69 PRINT0(LD_SYNC, "completed kmpc_barrier\n");
70 }
71 }
72
73 // Emit a simple barrier call in SPMD mode. Assumes the caller is in an L0
74 // parallel region and that all worker threads participate.
__kmpc_barrier_simple_spmd(kmp_Ident * loc_ref,int32_t tid)75 EXTERN void __kmpc_barrier_simple_spmd(kmp_Ident *loc_ref, int32_t tid) {
76 PRINT0(LD_SYNC, "call kmpc_barrier_simple_spmd\n");
77 __kmpc_impl_syncthreads();
78 PRINT0(LD_SYNC, "completed kmpc_barrier_simple_spmd\n");
79 }
80
81 ////////////////////////////////////////////////////////////////////////////////
82 // KMP MASTER
83 ////////////////////////////////////////////////////////////////////////////////
84
__kmpc_master(kmp_Ident * loc,int32_t global_tid)85 EXTERN int32_t __kmpc_master(kmp_Ident *loc, int32_t global_tid) {
86 PRINT0(LD_IO, "call kmpc_master\n");
87 return IsTeamMaster(global_tid);
88 }
89
__kmpc_end_master(kmp_Ident * loc,int32_t global_tid)90 EXTERN void __kmpc_end_master(kmp_Ident *loc, int32_t global_tid) {
91 PRINT0(LD_IO, "call kmpc_end_master\n");
92 ASSERT0(LT_FUSSY, IsTeamMaster(global_tid), "expected only master here");
93 }
94
95 ////////////////////////////////////////////////////////////////////////////////
96 // KMP SINGLE
97 ////////////////////////////////////////////////////////////////////////////////
98
__kmpc_single(kmp_Ident * loc,int32_t global_tid)99 EXTERN int32_t __kmpc_single(kmp_Ident *loc, int32_t global_tid) {
100 PRINT0(LD_IO, "call kmpc_single\n");
101 // decide to implement single with master; master get the single
102 return IsTeamMaster(global_tid);
103 }
104
__kmpc_end_single(kmp_Ident * loc,int32_t global_tid)105 EXTERN void __kmpc_end_single(kmp_Ident *loc, int32_t global_tid) {
106 PRINT0(LD_IO, "call kmpc_end_single\n");
107 // decide to implement single with master: master get the single
108 ASSERT0(LT_FUSSY, IsTeamMaster(global_tid), "expected only master here");
109 // sync barrier is explicitly called... so that is not a problem
110 }
111
112 ////////////////////////////////////////////////////////////////////////////////
113 // Flush
114 ////////////////////////////////////////////////////////////////////////////////
115
__kmpc_flush(kmp_Ident * loc)116 EXTERN void __kmpc_flush(kmp_Ident *loc) {
117 PRINT0(LD_IO, "call kmpc_flush\n");
118 __kmpc_impl_threadfence();
119 }
120
121 ////////////////////////////////////////////////////////////////////////////////
122 // Vote
123 ////////////////////////////////////////////////////////////////////////////////
124
__kmpc_warp_active_thread_mask()125 EXTERN __kmpc_impl_lanemask_t __kmpc_warp_active_thread_mask() {
126 PRINT0(LD_IO, "call __kmpc_warp_active_thread_mask\n");
127 return __kmpc_impl_activemask();
128 }
129
130 ////////////////////////////////////////////////////////////////////////////////
131 // Syncwarp
132 ////////////////////////////////////////////////////////////////////////////////
133
__kmpc_syncwarp(__kmpc_impl_lanemask_t Mask)134 EXTERN void __kmpc_syncwarp(__kmpc_impl_lanemask_t Mask) {
135 PRINT0(LD_IO, "call __kmpc_syncwarp\n");
136 __kmpc_impl_syncwarp(Mask);
137 }
138