1 //===----------- device.h - Target independent OpenMP target RTL ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Declarations for managing devices that are handled by RTL plugins.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef _OMPTARGET_DEVICE_H
14 #define _OMPTARGET_DEVICE_H
15 
16 #include <cassert>
17 #include <cstddef>
18 #include <list>
19 #include <map>
20 #include <memory>
21 #include <mutex>
22 #include <set>
23 #include <vector>
24 
25 #include "rtl.h"
26 
27 // Forward declarations.
28 struct RTLInfoTy;
29 struct __tgt_bin_desc;
30 struct __tgt_target_table;
31 struct __tgt_async_info;
32 class MemoryManagerTy;
33 
34 using map_var_info_t = void *;
35 
36 // enum for OMP_TARGET_OFFLOAD; keep in sync with kmp.h definition
37 enum kmp_target_offload_kind {
38   tgt_disabled = 0,
39   tgt_default = 1,
40   tgt_mandatory = 2
41 };
42 typedef enum kmp_target_offload_kind kmp_target_offload_kind_t;
43 
44 /// Map between host data and target data.
45 struct HostDataToTargetTy {
46   uintptr_t HstPtrBase; // host info.
47   uintptr_t HstPtrBegin;
48   uintptr_t HstPtrEnd; // non-inclusive.
49   map_var_info_t HstPtrName; // Optional source name of mapped variable.
50 
51   uintptr_t TgtPtrBegin; // target info.
52 
53 private:
54   /// use mutable to allow modification via std::set iterator which is const.
55   mutable uint64_t RefCount;
56   static const uint64_t INFRefCount = ~(uint64_t)0;
57 
58 public:
59   HostDataToTargetTy(uintptr_t BP, uintptr_t B, uintptr_t E, uintptr_t TB,
60                      map_var_info_t Name = nullptr, bool IsINF = false)
HstPtrBaseHostDataToTargetTy61       : HstPtrBase(BP), HstPtrBegin(B), HstPtrEnd(E), HstPtrName(Name),
62         TgtPtrBegin(TB), RefCount(IsINF ? INFRefCount : 1) {}
63 
getRefCountHostDataToTargetTy64   uint64_t getRefCount() const {
65     return RefCount;
66   }
67 
resetRefCountHostDataToTargetTy68   uint64_t resetRefCount() const {
69     if (RefCount != INFRefCount)
70       RefCount = 1;
71 
72     return RefCount;
73   }
74 
incRefCountHostDataToTargetTy75   uint64_t incRefCount() const {
76     if (RefCount != INFRefCount) {
77       ++RefCount;
78       assert(RefCount < INFRefCount && "refcount overflow");
79     }
80 
81     return RefCount;
82   }
83 
decRefCountHostDataToTargetTy84   uint64_t decRefCount() const {
85     if (RefCount != INFRefCount) {
86       assert(RefCount > 0 && "refcount underflow");
87       --RefCount;
88     }
89 
90     return RefCount;
91   }
92 
isRefCountInfHostDataToTargetTy93   bool isRefCountInf() const {
94     return RefCount == INFRefCount;
95   }
96 };
97 
98 typedef uintptr_t HstPtrBeginTy;
99 inline bool operator<(const HostDataToTargetTy &lhs, const HstPtrBeginTy &rhs) {
100   return lhs.HstPtrBegin < rhs;
101 }
102 inline bool operator<(const HstPtrBeginTy &lhs, const HostDataToTargetTy &rhs) {
103   return lhs < rhs.HstPtrBegin;
104 }
105 inline bool operator<(const HostDataToTargetTy &lhs,
106                       const HostDataToTargetTy &rhs) {
107   return lhs.HstPtrBegin < rhs.HstPtrBegin;
108 }
109 
110 typedef std::set<HostDataToTargetTy, std::less<>> HostDataToTargetListTy;
111 
112 struct LookupResult {
113   struct {
114     unsigned IsContained   : 1;
115     unsigned ExtendsBefore : 1;
116     unsigned ExtendsAfter  : 1;
117   } Flags;
118 
119   HostDataToTargetListTy::iterator Entry;
120 
LookupResultLookupResult121   LookupResult() : Flags({0,0,0}), Entry() {}
122 };
123 
124 /// Map for shadow pointers
125 struct ShadowPtrValTy {
126   void *HstPtrVal;
127   void *TgtPtrAddr;
128   void *TgtPtrVal;
129 };
130 typedef std::map<void *, ShadowPtrValTy> ShadowPtrListTy;
131 
132 ///
133 struct PendingCtorDtorListsTy {
134   std::list<void *> PendingCtors;
135   std::list<void *> PendingDtors;
136 };
137 typedef std::map<__tgt_bin_desc *, PendingCtorDtorListsTy>
138     PendingCtorsDtorsPerLibrary;
139 
140 struct DeviceTy {
141   int32_t DeviceID;
142   RTLInfoTy *RTL;
143   int32_t RTLDeviceID;
144 
145   bool IsInit;
146   std::once_flag InitFlag;
147   bool HasPendingGlobals;
148 
149   HostDataToTargetListTy HostDataToTargetMap;
150   PendingCtorsDtorsPerLibrary PendingCtorsDtors;
151 
152   ShadowPtrListTy ShadowPtrMap;
153 
154   std::mutex DataMapMtx, PendingGlobalsMtx, ShadowMtx;
155 
156   // NOTE: Once libomp gains full target-task support, this state should be
157   // moved into the target task in libomp.
158   std::map<int32_t, uint64_t> LoopTripCnt;
159 
160   /// Memory manager
161   std::unique_ptr<MemoryManagerTy> MemoryManager;
162 
163   DeviceTy(RTLInfoTy *RTL);
164 
165   // The existence of mutexes makes DeviceTy non-copyable. We need to
166   // provide a copy constructor and an assignment operator explicitly.
167   DeviceTy(const DeviceTy &D);
168 
169   DeviceTy &operator=(const DeviceTy &D);
170 
171   ~DeviceTy();
172 
173   // Return true if data can be copied to DstDevice directly
174   bool isDataExchangable(const DeviceTy& DstDevice);
175 
176   uint64_t getMapEntryRefCnt(void *HstPtrBegin);
177   LookupResult lookupMapping(void *HstPtrBegin, int64_t Size);
178   void *getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase, int64_t Size,
179                          map_var_info_t HstPtrName, bool &IsNew,
180                          bool &IsHostPtr, bool IsImplicit, bool UpdateRefCount,
181                          bool HasCloseModifier, bool HasPresentModifier);
182   void *getTgtPtrBegin(void *HstPtrBegin, int64_t Size);
183   void *getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast,
184                        bool UpdateRefCount, bool &IsHostPtr,
185                        bool MustContain = false);
186   int deallocTgtPtr(void *TgtPtrBegin, int64_t Size, bool ForceDelete,
187                     bool HasCloseModifier = false);
188   int associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size);
189   int disassociatePtr(void *HstPtrBegin);
190 
191   // calls to RTL
192   int32_t initOnce();
193   __tgt_target_table *load_binary(void *Img);
194 
195   // device memory allocation/deallocation routines
196   /// Allocates \p Size bytes on the device and returns the address/nullptr when
197   /// succeeds/fails. \p HstPtr is an address of the host data which the
198   /// allocated target data will be associated with. If it is unknown, the
199   /// default value of \p HstPtr is nullptr. Note: this function doesn't do
200   /// pointer association. Actually, all the __tgt_rtl_data_alloc
201   /// implementations ignore \p HstPtr.
202   void *allocData(int64_t Size, void *HstPtr = nullptr);
203   /// Deallocates memory which \p TgtPtrBegin points at and returns
204   /// OFFLOAD_SUCCESS/OFFLOAD_FAIL when succeeds/fails.
205   int32_t deleteData(void *TgtPtrBegin);
206 
207   // Data transfer. When AsyncInfoPtr is nullptr, the transfer will be
208   // synchronous.
209   // Copy data from host to device
210   int32_t submitData(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size,
211                      __tgt_async_info *AsyncInfoPtr);
212   // Copy data from device back to host
213   int32_t retrieveData(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size,
214                        __tgt_async_info *AsyncInfoPtr);
215   // Copy data from current device to destination device directly
216   int32_t dataExchange(void *SrcPtr, DeviceTy &DstDev, void *DstPtr,
217                        int64_t Size, __tgt_async_info *AsyncInfo);
218 
219   int32_t runRegion(void *TgtEntryPtr, void **TgtVarsPtr, ptrdiff_t *TgtOffsets,
220                     int32_t TgtVarsSize, __tgt_async_info *AsyncInfoPtr);
221   int32_t runTeamRegion(void *TgtEntryPtr, void **TgtVarsPtr,
222                         ptrdiff_t *TgtOffsets, int32_t TgtVarsSize,
223                         int32_t NumTeams, int32_t ThreadLimit,
224                         uint64_t LoopTripCount, __tgt_async_info *AsyncInfoPtr);
225 
226   /// Synchronize device/queue/event based on \p AsyncInfoPtr and return
227   /// OFFLOAD_SUCCESS/OFFLOAD_FAIL when succeeds/fails.
228   int32_t synchronize(__tgt_async_info *AsyncInfoPtr);
229 
230 private:
231   // Call to RTL
232   void init(); // To be called only via DeviceTy::initOnce()
233 };
234 
235 /// Map between Device ID (i.e. openmp device id) and its DeviceTy.
236 typedef std::vector<DeviceTy> DevicesTy;
237 
238 extern bool device_is_ready(int device_num);
239 
240 /// Struct for the data required to handle plugins
241 struct PluginManager {
242   /// RTLs identified on the host
243   RTLsTy RTLs;
244 
245   /// Devices associated with RTLs
246   DevicesTy Devices;
247   std::mutex RTLsMtx; ///< For RTLs and Devices
248 
249   /// Translation table retreived from the binary
250   HostEntriesBeginToTransTableTy HostEntriesBeginToTransTable;
251   std::mutex TrlTblMtx; ///< For Translation Table
252 
253   /// Map from ptrs on the host to an entry in the Translation Table
254   HostPtrToTableMapTy HostPtrToTableMap;
255   std::mutex TblMapMtx; ///< For HostPtrToTableMap
256 
257   // Store target policy (disabled, mandatory, default)
258   kmp_target_offload_kind_t TargetOffloadPolicy = tgt_default;
259   std::mutex TargetOffloadMtx; ///< For TargetOffloadPolicy
260 };
261 
262 extern PluginManager *PM;
263 
264 #endif
265