1 //===--------- support.cu - GPU OpenMP support functions --------- CUDA -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Wrapper implementation to some functions natively supported by the GPU.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "common/support.h"
14 #include "common/debug.h"
15 #include "common/omptarget.h"
16 
17 ////////////////////////////////////////////////////////////////////////////////
18 // Execution Parameters
19 ////////////////////////////////////////////////////////////////////////////////
20 
setExecutionParameters(ExecutionMode EMode,RuntimeMode RMode)21 DEVICE void setExecutionParameters(ExecutionMode EMode, RuntimeMode RMode) {
22   execution_param = EMode;
23   execution_param |= RMode;
24 }
25 
isGenericMode()26 DEVICE bool isGenericMode() { return (execution_param & ModeMask) == Generic; }
27 
isSPMDMode()28 DEVICE bool isSPMDMode() { return (execution_param & ModeMask) == Spmd; }
29 
isRuntimeUninitialized()30 DEVICE bool isRuntimeUninitialized() {
31   return (execution_param & RuntimeMask) == RuntimeUninitialized;
32 }
33 
isRuntimeInitialized()34 DEVICE bool isRuntimeInitialized() {
35   return (execution_param & RuntimeMask) == RuntimeInitialized;
36 }
37 
38 ////////////////////////////////////////////////////////////////////////////////
39 // Execution Modes based on location parameter fields
40 ////////////////////////////////////////////////////////////////////////////////
41 
checkSPMDMode(kmp_Ident * loc)42 DEVICE bool checkSPMDMode(kmp_Ident *loc) {
43   if (!loc)
44     return isSPMDMode();
45 
46   // If SPMD is true then we are not in the UNDEFINED state so
47   // we can return immediately.
48   if (loc->reserved_2 & KMP_IDENT_SPMD_MODE)
49     return true;
50 
51   // If not in SPMD mode and runtime required is a valid
52   // combination of flags so we can return immediately.
53   if (!(loc->reserved_2 & KMP_IDENT_SIMPLE_RT_MODE))
54     return false;
55 
56   // We are in underfined state.
57   return isSPMDMode();
58 }
59 
checkGenericMode(kmp_Ident * loc)60 DEVICE bool checkGenericMode(kmp_Ident *loc) {
61   return !checkSPMDMode(loc);
62 }
63 
checkRuntimeUninitialized(kmp_Ident * loc)64 DEVICE bool checkRuntimeUninitialized(kmp_Ident *loc) {
65   if (!loc)
66     return isRuntimeUninitialized();
67 
68   // If runtime is required then we know we can't be
69   // in the undefined mode. We can return immediately.
70   if (!(loc->reserved_2 & KMP_IDENT_SIMPLE_RT_MODE))
71     return false;
72 
73   // If runtime is required then we need to check is in
74   // SPMD mode or not. If not in SPMD mode then we end
75   // up in the UNDEFINED state that marks the orphaned
76   // functions.
77   if (loc->reserved_2 & KMP_IDENT_SPMD_MODE)
78     return true;
79 
80   // Check if we are in an UNDEFINED state. Undefined is denoted by
81   // non-SPMD + noRuntimeRequired which is a combination that
82   // cannot actually happen. Undefined states is used to mark orphaned
83   // functions.
84   return isRuntimeUninitialized();
85 }
86 
checkRuntimeInitialized(kmp_Ident * loc)87 DEVICE bool checkRuntimeInitialized(kmp_Ident *loc) {
88   return !checkRuntimeUninitialized(loc);
89 }
90 
91 ////////////////////////////////////////////////////////////////////////////////
92 // support: get info from machine
93 ////////////////////////////////////////////////////////////////////////////////
94 
95 ////////////////////////////////////////////////////////////////////////////////
96 //
97 // Calls to the Generic Scheme Implementation Layer (assuming 1D layout)
98 //
99 ////////////////////////////////////////////////////////////////////////////////
100 
101 // The master thread id is the first thread (lane) of the last warp.
102 // Thread id is 0 indexed.
103 // E.g: If NumThreads is 33, master id is 32.
104 //      If NumThreads is 64, master id is 32.
105 //      If NumThreads is 97, master id is 96.
106 //      If NumThreads is 1024, master id is 992.
107 //
108 // Called in Generic Execution Mode only.
GetMasterThreadID()109 DEVICE int GetMasterThreadID() { return (GetNumberOfThreadsInBlock() - 1) & ~(WARPSIZE - 1); }
110 
111 // The last warp is reserved for the master; other warps are workers.
112 // Called in Generic Execution Mode only.
GetNumberOfWorkersInTeam()113 DEVICE int GetNumberOfWorkersInTeam() { return GetMasterThreadID(); }
114 
115 ////////////////////////////////////////////////////////////////////////////////
116 // get thread id in team
117 
118 // This function may be called in a parallel region by the workers
119 // or a serial region by the master.  If the master (whose CUDA thread
120 // id is GetMasterThreadID()) calls this routine, we return 0 because
121 // it is a shadow for the first worker.
GetLogicalThreadIdInBlock(bool isSPMDExecutionMode)122 DEVICE int GetLogicalThreadIdInBlock(bool isSPMDExecutionMode) {
123   // Implemented using control flow (predication) instead of with a modulo
124   // operation.
125   int tid = GetThreadIdInBlock();
126   if (!isSPMDExecutionMode && tid >= GetMasterThreadID())
127     return 0;
128   else
129     return tid;
130 }
131 
132 ////////////////////////////////////////////////////////////////////////////////
133 //
134 // OpenMP Thread Support Layer
135 //
136 ////////////////////////////////////////////////////////////////////////////////
137 
GetOmpThreadId(int threadId,bool isSPMDExecutionMode)138 DEVICE int GetOmpThreadId(int threadId, bool isSPMDExecutionMode) {
139   // omp_thread_num
140   int rc;
141   if ((parallelLevel[GetWarpId()] & (OMP_ACTIVE_PARALLEL_LEVEL - 1)) > 1) {
142     rc = 0;
143   } else if (isSPMDExecutionMode) {
144     rc = GetThreadIdInBlock();
145   } else {
146     omptarget_nvptx_TaskDescr *currTaskDescr =
147         omptarget_nvptx_threadPrivateContext->GetTopLevelTaskDescr(threadId);
148     ASSERT0(LT_FUSSY, currTaskDescr, "expected a top task descr");
149     rc = currTaskDescr->ThreadId();
150   }
151   return rc;
152 }
153 
GetNumberOfOmpThreads(bool isSPMDExecutionMode)154 DEVICE int GetNumberOfOmpThreads(bool isSPMDExecutionMode) {
155   // omp_num_threads
156   int rc;
157   int Level = parallelLevel[GetWarpId()];
158   if (Level != OMP_ACTIVE_PARALLEL_LEVEL + 1) {
159     rc = 1;
160   } else if (isSPMDExecutionMode) {
161     rc = GetNumberOfThreadsInBlock();
162   } else {
163     rc = threadsInTeam;
164   }
165 
166   return rc;
167 }
168 
169 ////////////////////////////////////////////////////////////////////////////////
170 // Team id linked to OpenMP
171 
GetOmpTeamId()172 DEVICE int GetOmpTeamId() {
173   // omp_team_num
174   return GetBlockIdInKernel(); // assume 1 block per team
175 }
176 
GetNumberOfOmpTeams()177 DEVICE int GetNumberOfOmpTeams() {
178   // omp_num_teams
179   return GetNumberOfBlocksInKernel(); // assume 1 block per team
180 }
181 
182 ////////////////////////////////////////////////////////////////////////////////
183 // Masters
184 
IsTeamMaster(int ompThreadId)185 DEVICE int IsTeamMaster(int ompThreadId) { return (ompThreadId == 0); }
186 
187 ////////////////////////////////////////////////////////////////////////////////
188 // Parallel level
189 
IncParallelLevel(bool ActiveParallel,__kmpc_impl_lanemask_t Mask)190 DEVICE void IncParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask) {
191   __kmpc_impl_syncwarp(Mask);
192   __kmpc_impl_lanemask_t LaneMaskLt = __kmpc_impl_lanemask_lt();
193   unsigned Rank = __kmpc_impl_popc(Mask & LaneMaskLt);
194   if (Rank == 0) {
195     parallelLevel[GetWarpId()] +=
196         (1 + (ActiveParallel ? OMP_ACTIVE_PARALLEL_LEVEL : 0));
197     __kmpc_impl_threadfence();
198   }
199   __kmpc_impl_syncwarp(Mask);
200 }
201 
DecParallelLevel(bool ActiveParallel,__kmpc_impl_lanemask_t Mask)202 DEVICE void DecParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask) {
203   __kmpc_impl_syncwarp(Mask);
204   __kmpc_impl_lanemask_t LaneMaskLt = __kmpc_impl_lanemask_lt();
205   unsigned Rank = __kmpc_impl_popc(Mask & LaneMaskLt);
206   if (Rank == 0) {
207     parallelLevel[GetWarpId()] -=
208         (1 + (ActiveParallel ? OMP_ACTIVE_PARALLEL_LEVEL : 0));
209     __kmpc_impl_threadfence();
210   }
211   __kmpc_impl_syncwarp(Mask);
212 }
213 
214 ////////////////////////////////////////////////////////////////////////////////
215 // get OpenMP number of procs
216 
217 // Get the number of processors in the device.
GetNumberOfProcsInDevice(bool isSPMDExecutionMode)218 DEVICE int GetNumberOfProcsInDevice(bool isSPMDExecutionMode) {
219   if (!isSPMDExecutionMode)
220     return GetNumberOfWorkersInTeam();
221   return GetNumberOfThreadsInBlock();
222 }
223 
GetNumberOfProcsInTeam(bool isSPMDExecutionMode)224 DEVICE int GetNumberOfProcsInTeam(bool isSPMDExecutionMode) {
225   return GetNumberOfProcsInDevice(isSPMDExecutionMode);
226 }
227 
228 ////////////////////////////////////////////////////////////////////////////////
229 // Memory
230 ////////////////////////////////////////////////////////////////////////////////
231 
PadBytes(unsigned long size,unsigned long alignment)232 DEVICE unsigned long PadBytes(unsigned long size,
233                               unsigned long alignment) // must be a power of 2
234 {
235   // compute the necessary padding to satisfy alignment constraint
236   ASSERT(LT_FUSSY, (alignment & (alignment - 1)) == 0,
237          "alignment %lu is not a power of 2\n", alignment);
238   return (~(unsigned long)size + 1) & (alignment - 1);
239 }
240 
SafeMalloc(size_t size,const char * msg)241 DEVICE void *SafeMalloc(size_t size, const char *msg) // check if success
242 {
243   void *ptr = __kmpc_impl_malloc(size);
244   PRINT(LD_MEM, "malloc data of size %llu for %s: 0x%llx\n",
245         (unsigned long long)size, msg, (unsigned long long)ptr);
246   return ptr;
247 }
248 
SafeFree(void * ptr,const char * msg)249 DEVICE void *SafeFree(void *ptr, const char *msg) {
250   PRINT(LD_MEM, "free data ptr 0x%llx for %s\n", (unsigned long long)ptr, msg);
251   __kmpc_impl_free(ptr);
252   return NULL;
253 }
254 
255 ////////////////////////////////////////////////////////////////////////////////
256 // Teams Reduction Scratchpad Helpers
257 ////////////////////////////////////////////////////////////////////////////////
258 
GetTeamsReductionTimestamp()259 DEVICE unsigned int *GetTeamsReductionTimestamp() {
260   return static_cast<unsigned int *>(ReductionScratchpadPtr);
261 }
262 
GetTeamsReductionScratchpad()263 DEVICE char *GetTeamsReductionScratchpad() {
264   return static_cast<char *>(ReductionScratchpadPtr) + 256;
265 }
266 
267