1 //===-------- omptarget.h - Target independent OpenMP target RTL -- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Interface to be used by Clang during the codegen of a
10 // target region.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef _OMPTARGET_H_
15 #define _OMPTARGET_H_
16 
17 #include <stdint.h>
18 #include <stddef.h>
19 
20 #include <SourceInfo.h>
21 
22 #define OFFLOAD_SUCCESS (0)
23 #define OFFLOAD_FAIL (~0)
24 
25 #define OFFLOAD_DEVICE_DEFAULT     -1
26 
27 /// Data attributes for each data reference used in an OpenMP target region.
28 enum tgt_map_type {
29   // No flags
30   OMP_TGT_MAPTYPE_NONE            = 0x000,
31   // copy data from host to device
32   OMP_TGT_MAPTYPE_TO              = 0x001,
33   // copy data from device to host
34   OMP_TGT_MAPTYPE_FROM            = 0x002,
35   // copy regardless of the reference count
36   OMP_TGT_MAPTYPE_ALWAYS          = 0x004,
37   // force unmapping of data
38   OMP_TGT_MAPTYPE_DELETE          = 0x008,
39   // map the pointer as well as the pointee
40   OMP_TGT_MAPTYPE_PTR_AND_OBJ     = 0x010,
41   // pass device base address to kernel
42   OMP_TGT_MAPTYPE_TARGET_PARAM    = 0x020,
43   // return base device address of mapped data
44   OMP_TGT_MAPTYPE_RETURN_PARAM    = 0x040,
45   // private variable - not mapped
46   OMP_TGT_MAPTYPE_PRIVATE         = 0x080,
47   // copy by value - not mapped
48   OMP_TGT_MAPTYPE_LITERAL         = 0x100,
49   // mapping is implicit
50   OMP_TGT_MAPTYPE_IMPLICIT        = 0x200,
51   // copy data to device
52   OMP_TGT_MAPTYPE_CLOSE           = 0x400,
53   // runtime error if not already allocated
54   OMP_TGT_MAPTYPE_PRESENT         = 0x1000,
55   // descriptor for non-contiguous target-update
56   OMP_TGT_MAPTYPE_NON_CONTIG      = 0x100000000000,
57   // member of struct, member given by [16 MSBs] - 1
58   OMP_TGT_MAPTYPE_MEMBER_OF       = 0xffff000000000000
59 };
60 
61 enum OpenMPOffloadingDeclareTargetFlags {
62   /// Mark the entry as having a 'link' attribute.
63   OMP_DECLARE_TARGET_LINK = 0x01,
64   /// Mark the entry as being a global constructor.
65   OMP_DECLARE_TARGET_CTOR = 0x02,
66   /// Mark the entry as being a global destructor.
67   OMP_DECLARE_TARGET_DTOR = 0x04
68 };
69 
70 enum OpenMPOffloadingRequiresDirFlags {
71   /// flag undefined.
72   OMP_REQ_UNDEFINED               = 0x000,
73   /// no requires directive present.
74   OMP_REQ_NONE                    = 0x001,
75   /// reverse_offload clause.
76   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
77   /// unified_address clause.
78   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
79   /// unified_shared_memory clause.
80   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
81   /// dynamic_allocators clause.
82   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010
83 };
84 
85 /// This struct is a record of an entry point or global. For a function
86 /// entry point the size is expected to be zero
87 struct __tgt_offload_entry {
88   void *addr;   // Pointer to the offload entry info (function or global)
89   char *name;   // Name of the function or global
90   size_t size;  // Size of the entry info (0 if it is a function)
91   int32_t flags; // Flags associated with the entry, e.g. 'link'.
92   int32_t reserved; // Reserved, to be used by the runtime library.
93 };
94 
95 /// This struct is a record of the device image information
96 struct __tgt_device_image {
97   void *ImageStart;                  // Pointer to the target code start
98   void *ImageEnd;                    // Pointer to the target code end
99   __tgt_offload_entry *EntriesBegin; // Begin of table with all target entries
100   __tgt_offload_entry *EntriesEnd;   // End of table (non inclusive)
101 };
102 
103 /// This struct is a record of all the host code that may be offloaded to a
104 /// target.
105 struct __tgt_bin_desc {
106   int32_t NumDeviceImages;           // Number of device types supported
107   __tgt_device_image *DeviceImages;  // Array of device images (1 per dev. type)
108   __tgt_offload_entry *HostEntriesBegin; // Begin of table with all host entries
109   __tgt_offload_entry *HostEntriesEnd;   // End of table (non inclusive)
110 };
111 
112 /// This struct contains the offload entries identified by the target runtime
113 struct __tgt_target_table {
114   __tgt_offload_entry *EntriesBegin; // Begin of the table with all the entries
115   __tgt_offload_entry
116       *EntriesEnd; // End of the table with all the entries (non inclusive)
117 };
118 
119 /// This struct contains information exchanged between different asynchronous
120 /// operations for device-dependent optimization and potential synchronization
121 struct __tgt_async_info {
122   // A pointer to a queue-like structure where offloading operations are issued.
123   // We assume to use this structure to do synchronization. In CUDA backend, it
124   // is CUstream.
125   void *Queue = nullptr;
126 };
127 
128 /// This struct is a record of non-contiguous information
129 struct __tgt_target_non_contig {
130   uint64_t Offset;
131   uint64_t Count;
132   uint64_t Stride;
133 };
134 
135 #ifdef __cplusplus
136 extern "C" {
137 #endif
138 
139 int omp_get_num_devices(void);
140 int omp_get_initial_device(void);
141 void *omp_target_alloc(size_t size, int device_num);
142 void omp_target_free(void *device_ptr, int device_num);
143 int omp_target_is_present(void *ptr, int device_num);
144 int omp_target_memcpy(void *dst, void *src, size_t length, size_t dst_offset,
145     size_t src_offset, int dst_device, int src_device);
146 int omp_target_memcpy_rect(void *dst, void *src, size_t element_size,
147     int num_dims, const size_t *volume, const size_t *dst_offsets,
148     const size_t *src_offsets, const size_t *dst_dimensions,
149     const size_t *src_dimensions, int dst_device, int src_device);
150 int omp_target_associate_ptr(void *host_ptr, void *device_ptr, size_t size,
151     size_t device_offset, int device_num);
152 int omp_target_disassociate_ptr(void *host_ptr, int device_num);
153 
154 /// add the clauses of the requires directives in a given file
155 void __tgt_register_requires(int64_t flags);
156 
157 /// adds a target shared library to the target execution image
158 void __tgt_register_lib(__tgt_bin_desc *desc);
159 
160 /// removes a target shared library from the target execution image
161 void __tgt_unregister_lib(__tgt_bin_desc *desc);
162 
163 // creates the host to target data mapping, stores it in the
164 // libomptarget.so internal structure (an entry in a stack of data maps) and
165 // passes the data to the device;
166 void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
167                              void **args_base, void **args, int64_t *arg_sizes,
168                              int64_t *arg_types);
169 void __tgt_target_data_begin_nowait(int64_t device_id, int32_t arg_num,
170                                     void **args_base, void **args,
171                                     int64_t *arg_sizes, int64_t *arg_types,
172                                     int32_t depNum, void *depList,
173                                     int32_t noAliasDepNum,
174                                     void *noAliasDepList);
175 void __tgt_target_data_begin_mapper(ident_t *loc, int64_t device_id,
176                                     int32_t arg_num, void **args_base,
177                                     void **args, int64_t *arg_sizes,
178                                     int64_t *arg_types,
179                                     map_var_info_t *arg_names,
180                                     void **arg_mappers);
181 void __tgt_target_data_begin_nowait_mapper(
182     ident_t *loc, int64_t device_id, int32_t arg_num, void **args_base,
183     void **args, int64_t *arg_sizes, int64_t *arg_types,
184     map_var_info_t *arg_names, void **arg_mappers, int32_t depNum,
185     void *depList, int32_t noAliasDepNum, void *noAliasDepList);
186 
187 // passes data from the target, release target memory and destroys the
188 // host-target mapping (top entry from the stack of data maps) created by
189 // the last __tgt_target_data_begin
190 void __tgt_target_data_end(int64_t device_id, int32_t arg_num, void **args_base,
191                            void **args, int64_t *arg_sizes, int64_t *arg_types);
192 void __tgt_target_data_end_nowait(int64_t device_id, int32_t arg_num,
193                                   void **args_base, void **args,
194                                   int64_t *arg_sizes, int64_t *arg_types,
195                                   int32_t depNum, void *depList,
196                                   int32_t noAliasDepNum, void *noAliasDepList);
197 void __tgt_target_data_end_mapper(ident_t *loc, int64_t device_id,
198                                   int32_t arg_num, void **args_base,
199                                   void **args, int64_t *arg_sizes,
200                                   int64_t *arg_types, map_var_info_t *arg_names,
201                                   void **arg_mappers);
202 void __tgt_target_data_end_nowait_mapper(
203     ident_t *loc, int64_t device_id, int32_t arg_num, void **args_base,
204     void **args, int64_t *arg_sizes, int64_t *arg_types,
205     map_var_info_t *arg_names, void **arg_mappers, int32_t depNum,
206     void *depList, int32_t noAliasDepNum, void *noAliasDepList);
207 
208 /// passes data to/from the target
209 void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
210                               void **args_base, void **args, int64_t *arg_sizes,
211                               int64_t *arg_types);
212 void __tgt_target_data_update_nowait(int64_t device_id, int32_t arg_num,
213                                      void **args_base, void **args,
214                                      int64_t *arg_sizes, int64_t *arg_types,
215                                      int32_t depNum, void *depList,
216                                      int32_t noAliasDepNum,
217                                      void *noAliasDepList);
218 void __tgt_target_data_update_mapper(ident_t *loc, int64_t device_id,
219                                      int32_t arg_num, void **args_base,
220                                      void **args, int64_t *arg_sizes,
221                                      int64_t *arg_types,
222                                      map_var_info_t *arg_names,
223                                      void **arg_mappers);
224 void __tgt_target_data_update_nowait_mapper(
225     ident_t *loc, int64_t device_id, int32_t arg_num, void **args_base,
226     void **args, int64_t *arg_sizes, int64_t *arg_types,
227     map_var_info_t *arg_names, void **arg_mappers, int32_t depNum,
228     void *depList, int32_t noAliasDepNum, void *noAliasDepList);
229 
230 // Performs the same actions as data_begin in case arg_num is non-zero
231 // and initiates run of offloaded region on target platform; if arg_num
232 // is non-zero after the region execution is done it also performs the
233 // same action as data_end above. The following types are used; this
234 // function returns 0 if it was able to transfer the execution to a
235 // target and an int different from zero otherwise.
236 int __tgt_target(int64_t device_id, void *host_ptr, int32_t arg_num,
237                  void **args_base, void **args, int64_t *arg_sizes,
238                  int64_t *arg_types);
239 int __tgt_target_nowait(int64_t device_id, void *host_ptr, int32_t arg_num,
240                         void **args_base, void **args, int64_t *arg_sizes,
241                         int64_t *arg_types, int32_t depNum, void *depList,
242                         int32_t noAliasDepNum, void *noAliasDepList);
243 int __tgt_target_mapper(ident_t *loc, int64_t device_id, void *host_ptr,
244                         int32_t arg_num, void **args_base, void **args,
245                         int64_t *arg_sizes, int64_t *arg_types,
246                         map_var_info_t *arg_names, void **arg_mappers);
247 int __tgt_target_nowait_mapper(ident_t *loc, int64_t device_id, void *host_ptr,
248                                int32_t arg_num, void **args_base, void **args,
249                                int64_t *arg_sizes, int64_t *arg_types,
250                                map_var_info_t *arg_names, void **arg_mappers,
251                                int32_t depNum, void *depList,
252                                int32_t noAliasDepNum, void *noAliasDepList);
253 
254 int __tgt_target_teams(int64_t device_id, void *host_ptr, int32_t arg_num,
255                        void **args_base, void **args, int64_t *arg_sizes,
256                        int64_t *arg_types, int32_t num_teams,
257                        int32_t thread_limit);
258 int __tgt_target_teams_nowait(int64_t device_id, void *host_ptr,
259                               int32_t arg_num, void **args_base, void **args,
260                               int64_t *arg_sizes, int64_t *arg_types,
261                               int32_t num_teams, int32_t thread_limit,
262                               int32_t depNum, void *depList,
263                               int32_t noAliasDepNum, void *noAliasDepList);
264 int __tgt_target_teams_mapper(ident_t *loc, int64_t device_id, void *host_ptr,
265                               int32_t arg_num, void **args_base, void **args,
266                               int64_t *arg_sizes, int64_t *arg_types,
267                               map_var_info_t *arg_names, void **arg_mappers,
268                               int32_t num_teams, int32_t thread_limit);
269 int __tgt_target_teams_nowait_mapper(
270     ident_t *loc, int64_t device_id, void *host_ptr, int32_t arg_num,
271     void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
272     map_var_info_t *arg_names, void **arg_mappers, int32_t num_teams,
273     int32_t thread_limit, int32_t depNum, void *depList, int32_t noAliasDepNum,
274     void *noAliasDepList);
275 
276 void __kmpc_push_target_tripcount(ident_t *loc, int64_t device_id,
277                                   uint64_t loop_tripcount);
278 
279 #ifdef __cplusplus
280 }
281 #endif
282 
283 #ifdef __cplusplus
284 #define EXTERN extern "C"
285 #else
286 #define EXTERN extern
287 #endif
288 
289 #endif // _OMPTARGET_H_
290