1 //===------------- task.h - NVPTX OpenMP tasks support ----------- CUDA -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Task implementation support.
10 //
11 //  explicit task structure uses
12 //  omptarget_nvptx task
13 //  kmp_task
14 //
15 //  where kmp_task is
16 //    - klegacy_TaskDescr    <- task pointer
17 //        shared -> X
18 //        routine
19 //        part_id
20 //        descr
21 //    -  private (of size given by task_alloc call). Accessed by
22 //       task+sizeof(klegacy_TaskDescr)
23 //        * private data *
24 //    - shared: X. Accessed by shared ptr in klegacy_TaskDescr
25 //        * pointer table to shared variables *
26 //    - end
27 //
28 //===----------------------------------------------------------------------===//
29 
30 #include "common/omptarget.h"
31 
__kmpc_omp_task_alloc(kmp_Ident * loc,uint32_t global_tid,int32_t flag,size_t sizeOfTaskInclPrivate,size_t sizeOfSharedTable,kmp_TaskFctPtr taskSub)32 EXTERN kmp_TaskDescr *__kmpc_omp_task_alloc(
33     kmp_Ident *loc,     // unused
34     uint32_t global_tid, // unused
35     int32_t flag, // unused (because in our impl, all are immediately exec
36     size_t sizeOfTaskInclPrivate, size_t sizeOfSharedTable,
37     kmp_TaskFctPtr taskSub) {
38   PRINT(LD_IO,
39         "call __kmpc_omp_task_alloc(size priv&struct %lld, shared %lld, "
40         "fct 0x%llx)\n",
41         (long long)sizeOfTaskInclPrivate, (long long)sizeOfSharedTable,
42         (unsigned long long)taskSub);
43   // want task+priv to be a multiple of 8 bytes
44   size_t padForTaskInclPriv = PadBytes(sizeOfTaskInclPrivate, sizeof(void *));
45   sizeOfTaskInclPrivate += padForTaskInclPriv;
46   size_t kmpSize = sizeOfTaskInclPrivate + sizeOfSharedTable;
47   ASSERT(LT_FUSSY, sizeof(omptarget_nvptx_TaskDescr) % sizeof(void *) == 0,
48          "need task descr of size %d to be a multiple of %d\n",
49          (int)sizeof(omptarget_nvptx_TaskDescr), (int)sizeof(void *));
50   size_t totSize = sizeof(omptarget_nvptx_TaskDescr) + kmpSize;
51   omptarget_nvptx_ExplicitTaskDescr *newExplicitTaskDescr =
52       (omptarget_nvptx_ExplicitTaskDescr *)SafeMalloc(
53           totSize, "explicit task descriptor");
54   kmp_TaskDescr *newKmpTaskDescr = &newExplicitTaskDescr->kmpTaskDescr;
55   ASSERT0(LT_FUSSY,
56           (uint64_t)newKmpTaskDescr ==
57               (uint64_t)ADD_BYTES(newExplicitTaskDescr,
58                                   sizeof(omptarget_nvptx_TaskDescr)),
59           "bad size assumptions");
60   // init kmp_TaskDescr
61   newKmpTaskDescr->sharedPointerTable =
62       (void *)((char *)newKmpTaskDescr + sizeOfTaskInclPrivate);
63   newKmpTaskDescr->sub = taskSub;
64   newKmpTaskDescr->destructors = NULL;
65   PRINT(LD_TASK, "return with task descr kmp: 0x%llx, omptarget-nvptx 0x%llx\n",
66         (unsigned long long)newKmpTaskDescr,
67         (unsigned long long)newExplicitTaskDescr);
68 
69   return newKmpTaskDescr;
70 }
71 
__kmpc_omp_task(kmp_Ident * loc,uint32_t global_tid,kmp_TaskDescr * newKmpTaskDescr)72 EXTERN int32_t __kmpc_omp_task(kmp_Ident *loc, uint32_t global_tid,
73                                kmp_TaskDescr *newKmpTaskDescr) {
74   return __kmpc_omp_task_with_deps(loc, global_tid, newKmpTaskDescr, 0, 0, 0,
75                                    0);
76 }
77 
__kmpc_omp_task_with_deps(kmp_Ident * loc,uint32_t global_tid,kmp_TaskDescr * newKmpTaskDescr,int32_t depNum,void * depList,int32_t noAliasDepNum,void * noAliasDepList)78 EXTERN int32_t __kmpc_omp_task_with_deps(kmp_Ident *loc, uint32_t global_tid,
79                                          kmp_TaskDescr *newKmpTaskDescr,
80                                          int32_t depNum, void *depList,
81                                          int32_t noAliasDepNum,
82                                          void *noAliasDepList) {
83   PRINT(LD_IO, "call to __kmpc_omp_task_with_deps(task 0x%llx)\n",
84         P64(newKmpTaskDescr));
85   ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc),
86           "Runtime must be initialized.");
87   // 1. get explicit task descr from kmp task descr
88   omptarget_nvptx_ExplicitTaskDescr *newExplicitTaskDescr =
89       (omptarget_nvptx_ExplicitTaskDescr *)SUB_BYTES(
90           newKmpTaskDescr, sizeof(omptarget_nvptx_TaskDescr));
91   ASSERT0(LT_FUSSY, &newExplicitTaskDescr->kmpTaskDescr == newKmpTaskDescr,
92           "bad assumptions");
93   omptarget_nvptx_TaskDescr *newTaskDescr = &newExplicitTaskDescr->taskDescr;
94   ASSERT0(LT_FUSSY, (uint64_t)newTaskDescr == (uint64_t)newExplicitTaskDescr,
95           "bad assumptions");
96 
97   // 2. push new context: update new task descriptor
98   int tid = GetLogicalThreadIdInBlock(checkSPMDMode(loc));
99   omptarget_nvptx_TaskDescr *parentTaskDescr = getMyTopTaskDescriptor(tid);
100   newTaskDescr->CopyForExplicitTask(parentTaskDescr);
101   // set new task descriptor as top
102   omptarget_nvptx_threadPrivateContext->SetTopLevelTaskDescr(tid, newTaskDescr);
103 
104   // 3. call sub
105   PRINT(LD_TASK, "call task sub 0x%llx(task descr 0x%llx)\n",
106         (unsigned long long)newKmpTaskDescr->sub,
107         (unsigned long long)newKmpTaskDescr);
108   newKmpTaskDescr->sub(0, newKmpTaskDescr);
109   PRINT(LD_TASK, "return from call task sub 0x%llx()\n",
110         (unsigned long long)newKmpTaskDescr->sub);
111 
112   // 4. pop context
113   omptarget_nvptx_threadPrivateContext->SetTopLevelTaskDescr(tid,
114                                                              parentTaskDescr);
115   // 5. free
116   SafeFree(newExplicitTaskDescr, "explicit task descriptor");
117   return 0;
118 }
119 
__kmpc_omp_task_begin_if0(kmp_Ident * loc,uint32_t global_tid,kmp_TaskDescr * newKmpTaskDescr)120 EXTERN void __kmpc_omp_task_begin_if0(kmp_Ident *loc, uint32_t global_tid,
121                                       kmp_TaskDescr *newKmpTaskDescr) {
122   PRINT(LD_IO, "call to __kmpc_omp_task_begin_if0(task 0x%llx)\n",
123         (unsigned long long)newKmpTaskDescr);
124   ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc),
125           "Runtime must be initialized.");
126   // 1. get explicit task descr from kmp task descr
127   omptarget_nvptx_ExplicitTaskDescr *newExplicitTaskDescr =
128       (omptarget_nvptx_ExplicitTaskDescr *)SUB_BYTES(
129           newKmpTaskDescr, sizeof(omptarget_nvptx_TaskDescr));
130   ASSERT0(LT_FUSSY, &newExplicitTaskDescr->kmpTaskDescr == newKmpTaskDescr,
131           "bad assumptions");
132   omptarget_nvptx_TaskDescr *newTaskDescr = &newExplicitTaskDescr->taskDescr;
133   ASSERT0(LT_FUSSY, (uint64_t)newTaskDescr == (uint64_t)newExplicitTaskDescr,
134           "bad assumptions");
135 
136   // 2. push new context: update new task descriptor
137   int tid = GetLogicalThreadIdInBlock(checkSPMDMode(loc));
138   omptarget_nvptx_TaskDescr *parentTaskDescr = getMyTopTaskDescriptor(tid);
139   newTaskDescr->CopyForExplicitTask(parentTaskDescr);
140   // set new task descriptor as top
141   omptarget_nvptx_threadPrivateContext->SetTopLevelTaskDescr(tid, newTaskDescr);
142   // 3... noting to call... is inline
143   // 4 & 5 ... done in complete
144 }
145 
__kmpc_omp_task_complete_if0(kmp_Ident * loc,uint32_t global_tid,kmp_TaskDescr * newKmpTaskDescr)146 EXTERN void __kmpc_omp_task_complete_if0(kmp_Ident *loc, uint32_t global_tid,
147                                          kmp_TaskDescr *newKmpTaskDescr) {
148   PRINT(LD_IO, "call to __kmpc_omp_task_complete_if0(task 0x%llx)\n",
149         (unsigned long long)newKmpTaskDescr);
150   ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc),
151           "Runtime must be initialized.");
152   // 1. get explicit task descr from kmp task descr
153   omptarget_nvptx_ExplicitTaskDescr *newExplicitTaskDescr =
154       (omptarget_nvptx_ExplicitTaskDescr *)SUB_BYTES(
155           newKmpTaskDescr, sizeof(omptarget_nvptx_TaskDescr));
156   ASSERT0(LT_FUSSY, &newExplicitTaskDescr->kmpTaskDescr == newKmpTaskDescr,
157           "bad assumptions");
158   omptarget_nvptx_TaskDescr *newTaskDescr = &newExplicitTaskDescr->taskDescr;
159   ASSERT0(LT_FUSSY, (uint64_t)newTaskDescr == (uint64_t)newExplicitTaskDescr,
160           "bad assumptions");
161   // 2. get parent
162   omptarget_nvptx_TaskDescr *parentTaskDescr = newTaskDescr->GetPrevTaskDescr();
163   // 3... noting to call... is inline
164   // 4. pop context
165   int tid = GetLogicalThreadIdInBlock(checkSPMDMode(loc));
166   omptarget_nvptx_threadPrivateContext->SetTopLevelTaskDescr(tid,
167                                                              parentTaskDescr);
168   // 5. free
169   SafeFree(newExplicitTaskDescr, "explicit task descriptor");
170 }
171 
__kmpc_omp_wait_deps(kmp_Ident * loc,uint32_t global_tid,int32_t depNum,void * depList,int32_t noAliasDepNum,void * noAliasDepList)172 EXTERN void __kmpc_omp_wait_deps(kmp_Ident *loc, uint32_t global_tid,
173                                  int32_t depNum, void *depList,
174                                  int32_t noAliasDepNum, void *noAliasDepList) {
175   PRINT0(LD_IO, "call to __kmpc_omp_wait_deps(..)\n");
176   // nothing to do as all our tasks are executed as final
177 }
178 
__kmpc_taskgroup(kmp_Ident * loc,uint32_t global_tid)179 EXTERN void __kmpc_taskgroup(kmp_Ident *loc, uint32_t global_tid) {
180   PRINT0(LD_IO, "call to __kmpc_taskgroup(..)\n");
181   // nothing to do as all our tasks are executed as final
182 }
183 
__kmpc_end_taskgroup(kmp_Ident * loc,uint32_t global_tid)184 EXTERN void __kmpc_end_taskgroup(kmp_Ident *loc, uint32_t global_tid) {
185   PRINT0(LD_IO, "call to __kmpc_end_taskgroup(..)\n");
186   // nothing to do as all our tasks are executed as final
187 }
188 
__kmpc_omp_taskyield(kmp_Ident * loc,uint32_t global_tid,int end_part)189 EXTERN int32_t __kmpc_omp_taskyield(kmp_Ident *loc, uint32_t global_tid,
190                                     int end_part) {
191   PRINT0(LD_IO, "call to __kmpc_taskyield()\n");
192   // do nothing: tasks are executed immediately, no yielding allowed
193   return 0;
194 }
195 
__kmpc_omp_taskwait(kmp_Ident * loc,uint32_t global_tid)196 EXTERN int32_t __kmpc_omp_taskwait(kmp_Ident *loc, uint32_t global_tid) {
197   PRINT0(LD_IO, "call to __kmpc_taskwait()\n");
198   // nothing to do as all our tasks are executed as final
199   return 0;
200 }
201 
__kmpc_taskloop(kmp_Ident * loc,uint32_t global_tid,kmp_TaskDescr * newKmpTaskDescr,int if_val,uint64_t * lb,uint64_t * ub,int64_t st,int nogroup,int32_t sched,uint64_t grainsize,void * task_dup)202 EXTERN void __kmpc_taskloop(kmp_Ident *loc, uint32_t global_tid,
203                             kmp_TaskDescr *newKmpTaskDescr, int if_val,
204                             uint64_t *lb, uint64_t *ub, int64_t st, int nogroup,
205                             int32_t sched, uint64_t grainsize, void *task_dup) {
206 
207   // skip task entirely if empty iteration space
208   if (*lb > *ub)
209     return;
210 
211   // the compiler has already stored lb and ub in the kmp_TaskDescr structure
212   // as we are using a single task to execute the entire loop, we can leave
213   // the initial task_t untouched
214 
215   __kmpc_omp_task_with_deps(loc, global_tid, newKmpTaskDescr, 0, 0, 0, 0);
216 }
217