1 //===--- omptarget.cu - OpenMP GPU initialization ---------------- CUDA -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the initialization code for the GPU
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "common/omptarget.h"
14 #include "target_impl.h"
15 
16 ////////////////////////////////////////////////////////////////////////////////
17 // global data tables
18 ////////////////////////////////////////////////////////////////////////////////
19 
20 extern DEVICE
21     omptarget_nvptx_Queue<omptarget_nvptx_ThreadPrivateContext, OMP_STATE_COUNT>
22         omptarget_nvptx_device_State[MAX_SM];
23 
24 ////////////////////////////////////////////////////////////////////////////////
25 // init entry points
26 ////////////////////////////////////////////////////////////////////////////////
27 
__kmpc_kernel_init(int ThreadLimit,int16_t RequiresOMPRuntime)28 EXTERN void __kmpc_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime) {
29   PRINT(LD_IO, "call to __kmpc_kernel_init with version %f\n",
30         OMPTARGET_NVPTX_VERSION);
31   ASSERT0(LT_FUSSY, RequiresOMPRuntime,
32           "Generic always requires initialized runtime.");
33   setExecutionParameters(Generic, RuntimeInitialized);
34   for (int I = 0; I < MAX_THREADS_PER_TEAM / WARPSIZE; ++I)
35     parallelLevel[I] = 0;
36 
37   int threadIdInBlock = GetThreadIdInBlock();
38   ASSERT0(LT_FUSSY, threadIdInBlock == GetMasterThreadID(),
39           "__kmpc_kernel_init() must be called by team master warp only!");
40   PRINT0(LD_IO, "call to __kmpc_kernel_init for master\n");
41 
42   // Get a state object from the queue.
43   int slot = __kmpc_impl_smid() % MAX_SM;
44   usedSlotIdx = slot;
45   omptarget_nvptx_threadPrivateContext =
46       omptarget_nvptx_device_State[slot].Dequeue();
47 
48   // init thread private
49   int threadId = GetLogicalThreadIdInBlock(/*isSPMDExecutionMode=*/false);
50   omptarget_nvptx_threadPrivateContext->InitThreadPrivateContext(threadId);
51 
52   // init team context
53   omptarget_nvptx_TeamDescr &currTeamDescr = getMyTeamDescriptor();
54   currTeamDescr.InitTeamDescr();
55   // this thread will start execution... has to update its task ICV
56   // to point to the level zero task ICV. That ICV was init in
57   // InitTeamDescr()
58   omptarget_nvptx_threadPrivateContext->SetTopLevelTaskDescr(
59       threadId, currTeamDescr.LevelZeroTaskDescr());
60 
61   // set number of threads and thread limit in team to started value
62   omptarget_nvptx_TaskDescr *currTaskDescr =
63       omptarget_nvptx_threadPrivateContext->GetTopLevelTaskDescr(threadId);
64   nThreads = GetNumberOfThreadsInBlock();
65   threadLimit = ThreadLimit;
66   __kmpc_impl_target_init();
67 }
68 
__kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized)69 EXTERN void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized) {
70   PRINT0(LD_IO, "call to __kmpc_kernel_deinit\n");
71   ASSERT0(LT_FUSSY, IsOMPRuntimeInitialized,
72           "Generic always requires initialized runtime.");
73   // Enqueue omp state object for use by another team.
74   int slot = usedSlotIdx;
75   omptarget_nvptx_device_State[slot].Enqueue(
76       omptarget_nvptx_threadPrivateContext);
77   // Done with work.  Kill the workers.
78   omptarget_nvptx_workFn = 0;
79 }
80 
__kmpc_spmd_kernel_init(int ThreadLimit,int16_t RequiresOMPRuntime)81 EXTERN void __kmpc_spmd_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime) {
82   PRINT0(LD_IO, "call to __kmpc_spmd_kernel_init\n");
83 
84   setExecutionParameters(Spmd, RequiresOMPRuntime ? RuntimeInitialized
85                                                   : RuntimeUninitialized);
86   int threadId = GetThreadIdInBlock();
87   if (threadId == 0) {
88     usedSlotIdx = __kmpc_impl_smid() % MAX_SM;
89     parallelLevel[0] =
90         1 + (GetNumberOfThreadsInBlock() > 1 ? OMP_ACTIVE_PARALLEL_LEVEL : 0);
91   } else if (GetLaneId() == 0) {
92     parallelLevel[GetWarpId()] =
93         1 + (GetNumberOfThreadsInBlock() > 1 ? OMP_ACTIVE_PARALLEL_LEVEL : 0);
94   }
95   if (!RequiresOMPRuntime) {
96     // Runtime is not required - exit.
97     __kmpc_impl_syncthreads();
98     return;
99   }
100 
101   //
102   // Team Context Initialization.
103   //
104   // In SPMD mode there is no master thread so use any cuda thread for team
105   // context initialization.
106   if (threadId == 0) {
107     // Get a state object from the queue.
108     omptarget_nvptx_threadPrivateContext =
109         omptarget_nvptx_device_State[usedSlotIdx].Dequeue();
110 
111     omptarget_nvptx_TeamDescr &currTeamDescr = getMyTeamDescriptor();
112     omptarget_nvptx_WorkDescr &workDescr = getMyWorkDescriptor();
113     // init team context
114     currTeamDescr.InitTeamDescr();
115   }
116   __kmpc_impl_syncthreads();
117 
118   omptarget_nvptx_TeamDescr &currTeamDescr = getMyTeamDescriptor();
119   omptarget_nvptx_WorkDescr &workDescr = getMyWorkDescriptor();
120 
121   //
122   // Initialize task descr for each thread.
123   //
124   omptarget_nvptx_TaskDescr *newTaskDescr =
125       omptarget_nvptx_threadPrivateContext->Level1TaskDescr(threadId);
126   ASSERT0(LT_FUSSY, newTaskDescr, "expected a task descr");
127   newTaskDescr->InitLevelOneTaskDescr(currTeamDescr.LevelZeroTaskDescr());
128   // install new top descriptor
129   omptarget_nvptx_threadPrivateContext->SetTopLevelTaskDescr(threadId,
130                                                              newTaskDescr);
131 
132   // init thread private from init value
133   PRINT(LD_PAR,
134         "thread will execute parallel region with id %d in a team of "
135         "%d threads\n",
136         (int)newTaskDescr->ThreadId(), (int)ThreadLimit);
137 }
138 
__kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime)139 EXTERN void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime) {
140   // We're not going to pop the task descr stack of each thread since
141   // there are no more parallel regions in SPMD mode.
142   if (!RequiresOMPRuntime)
143     return;
144 
145   __kmpc_impl_syncthreads();
146   int threadId = GetThreadIdInBlock();
147   if (threadId == 0) {
148     // Enqueue omp state object for use by another team.
149     int slot = usedSlotIdx;
150     omptarget_nvptx_device_State[slot].Enqueue(
151         omptarget_nvptx_threadPrivateContext);
152   }
153 }
154 
155 // Return true if the current target region is executed in SPMD mode.
__kmpc_is_spmd_exec_mode()156 EXTERN int8_t __kmpc_is_spmd_exec_mode() {
157   PRINT0(LD_IO | LD_PAR, "call to __kmpc_is_spmd_exec_mode\n");
158   return isSPMDMode();
159 }
160