1//===- IntrinsicsAMDGPU.td - Defines AMDGPU intrinsics -----*- tablegen -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines all of the R600-specific intrinsics.
11//
12//===----------------------------------------------------------------------===//
13
14class AMDGPUReadPreloadRegisterIntrinsic
15  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>;
16
17class AMDGPUReadPreloadRegisterIntrinsicNamed<string name>
18  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>, GCCBuiltin<name>;
19
20let TargetPrefix = "r600" in {
21
22multiclass AMDGPUReadPreloadRegisterIntrinsic_xyz {
23  def _x : AMDGPUReadPreloadRegisterIntrinsic;
24  def _y : AMDGPUReadPreloadRegisterIntrinsic;
25  def _z : AMDGPUReadPreloadRegisterIntrinsic;
26}
27
28multiclass AMDGPUReadPreloadRegisterIntrinsic_xyz_named<string prefix> {
29  def _x : AMDGPUReadPreloadRegisterIntrinsicNamed<!strconcat(prefix, "_x")>;
30  def _y : AMDGPUReadPreloadRegisterIntrinsicNamed<!strconcat(prefix, "_y")>;
31  def _z : AMDGPUReadPreloadRegisterIntrinsicNamed<!strconcat(prefix, "_z")>;
32}
33
34defm int_r600_read_global_size : AMDGPUReadPreloadRegisterIntrinsic_xyz_named
35                                 <"__builtin_r600_read_global_size">;
36defm int_r600_read_ngroups : AMDGPUReadPreloadRegisterIntrinsic_xyz_named
37                             <"__builtin_r600_read_ngroups">;
38defm int_r600_read_tgid : AMDGPUReadPreloadRegisterIntrinsic_xyz_named
39                          <"__builtin_r600_read_tgid">;
40
41defm int_r600_read_local_size : AMDGPUReadPreloadRegisterIntrinsic_xyz;
42defm int_r600_read_tidig : AMDGPUReadPreloadRegisterIntrinsic_xyz;
43
44def int_r600_read_workdim : AMDGPUReadPreloadRegisterIntrinsic;
45
46
47// AS 7 is PARAM_I_ADDRESS, used for kernel arguments
48def int_r600_implicitarg_ptr :
49  GCCBuiltin<"__builtin_r600_implicitarg_ptr">,
50  Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 7>], [], [IntrNoMem]>;
51
52def int_r600_rat_store_typed :
53  // 1st parameter: Data
54  // 2nd parameter: Index
55  // 3rd parameter: Constant RAT ID
56  Intrinsic<[], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], []>,
57  GCCBuiltin<"__builtin_r600_rat_store_typed">;
58
59def int_r600_rsq : Intrinsic<
60  [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
61>;
62
63
64} // End TargetPrefix = "r600"
65
66// FIXME: These should be renamed/moved to r600
67let TargetPrefix = "AMDGPU" in {
68def int_AMDGPU_ldexp : Intrinsic<
69  [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]
70>;
71}
72
73let TargetPrefix = "amdgcn" in {
74
75defm int_amdgcn_workitem_id : AMDGPUReadPreloadRegisterIntrinsic_xyz;
76defm int_amdgcn_workgroup_id : AMDGPUReadPreloadRegisterIntrinsic_xyz_named
77                               <"__builtin_amdgcn_workgroup_id">;
78
79def int_amdgcn_s_barrier : GCCBuiltin<"__builtin_amdgcn_s_barrier">,
80  Intrinsic<[], [], [IntrConvergent]>;
81
82def int_amdgcn_s_waitcnt : Intrinsic<[], [llvm_i32_ty], []>;
83
84def int_amdgcn_div_scale : Intrinsic<
85  // 1st parameter: Numerator
86  // 2nd parameter: Denominator
87  // 3rd parameter: Constant to select select between first and
88  //                second. (0 = first, 1 = second).
89  [llvm_anyfloat_ty, llvm_i1_ty],
90  [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty],
91  [IntrNoMem]
92>;
93
94def int_amdgcn_div_fmas : Intrinsic<[llvm_anyfloat_ty],
95  [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty],
96  [IntrNoMem]
97>;
98
99def int_amdgcn_div_fixup : Intrinsic<[llvm_anyfloat_ty],
100  [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
101  [IntrNoMem]
102>;
103
104def int_amdgcn_trig_preop : Intrinsic<
105  [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]
106>;
107
108def int_amdgcn_sin : Intrinsic<
109  [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
110>;
111
112def int_amdgcn_cos : Intrinsic<
113  [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
114>;
115
116def int_amdgcn_log_clamp : Intrinsic<
117  [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
118>;
119
120def int_amdgcn_rcp : Intrinsic<
121  [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
122>;
123
124def int_amdgcn_rsq :  Intrinsic<
125  [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
126>;
127
128def int_amdgcn_rsq_legacy :  GCCBuiltin<"__builtin_amdgcn_rsq_legacy">,
129  Intrinsic<
130  [llvm_float_ty], [llvm_float_ty], [IntrNoMem]
131>;
132
133def int_amdgcn_rsq_clamp : Intrinsic<
134  [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
135
136def int_amdgcn_ldexp : Intrinsic<
137  [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]
138>;
139
140def int_amdgcn_frexp_mant : Intrinsic<
141  [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
142>;
143
144def int_amdgcn_frexp_exp : Intrinsic<
145  [llvm_i32_ty], [llvm_anyfloat_ty], [IntrNoMem]
146>;
147
148// v_fract is buggy on SI/CI. It mishandles infinities, may return 1.0
149// and always uses rtz, so is not suitable for implementing the OpenCL
150// fract function. It should be ok on VI.
151def int_amdgcn_fract : Intrinsic<
152  [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
153>;
154
155def int_amdgcn_class : Intrinsic<
156  [llvm_i1_ty], [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem]
157>;
158
159def int_amdgcn_cubeid : GCCBuiltin<"__builtin_amdgcn_cubeid">,
160  Intrinsic<[llvm_float_ty],
161    [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]
162>;
163
164def int_amdgcn_cubema : GCCBuiltin<"__builtin_amdgcn_cubema">,
165  Intrinsic<[llvm_float_ty],
166  [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]
167>;
168
169def int_amdgcn_cubesc : GCCBuiltin<"__builtin_amdgcn_cubesc">,
170  Intrinsic<[llvm_float_ty],
171    [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]
172>;
173
174def int_amdgcn_cubetc : GCCBuiltin<"__builtin_amdgcn_cubetc">,
175  Intrinsic<[llvm_float_ty],
176    [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]
177>;
178
179// TODO: Do we want an ordering for these?
180def int_amdgcn_atomic_inc : Intrinsic<[llvm_anyint_ty],
181  [llvm_anyptr_ty, LLVMMatchType<0>],
182  [IntrArgMemOnly, NoCapture<0>]
183>;
184
185def int_amdgcn_atomic_dec : Intrinsic<[llvm_anyint_ty],
186  [llvm_anyptr_ty, LLVMMatchType<0>],
187  [IntrArgMemOnly, NoCapture<0>]
188>;
189
190class AMDGPUImageLoad : Intrinsic <
191  [llvm_v4f32_ty],    // vdata(VGPR)
192  [llvm_anyint_ty,    // vaddr(VGPR)
193   llvm_v8i32_ty,     // rsrc(SGPR)
194   llvm_i32_ty,       // dmask(imm)
195   llvm_i1_ty,        // r128(imm)
196   llvm_i1_ty,        // da(imm)
197   llvm_i1_ty,        // glc(imm)
198   llvm_i1_ty],       // slc(imm)
199  [IntrReadMem]>;
200
201def int_amdgcn_image_load : AMDGPUImageLoad;
202def int_amdgcn_image_load_mip : AMDGPUImageLoad;
203
204class AMDGPUImageStore : Intrinsic <
205  [],
206  [llvm_v4f32_ty,     // vdata(VGPR)
207   llvm_anyint_ty,    // vaddr(VGPR)
208   llvm_v8i32_ty,     // rsrc(SGPR)
209   llvm_i32_ty,       // dmask(imm)
210   llvm_i1_ty,        // r128(imm)
211   llvm_i1_ty,        // da(imm)
212   llvm_i1_ty,        // glc(imm)
213   llvm_i1_ty],       // slc(imm)
214  []>;
215
216def int_amdgcn_image_store : AMDGPUImageStore;
217def int_amdgcn_image_store_mip : AMDGPUImageStore;
218
219class AMDGPUImageAtomic : Intrinsic <
220  [llvm_i32_ty],
221  [llvm_i32_ty,       // vdata(VGPR)
222   llvm_anyint_ty,    // vaddr(VGPR)
223   llvm_v8i32_ty,     // rsrc(SGPR)
224   llvm_i1_ty,        // r128(imm)
225   llvm_i1_ty,        // da(imm)
226   llvm_i1_ty],       // slc(imm)
227  []>;
228
229def int_amdgcn_image_atomic_swap : AMDGPUImageAtomic;
230def int_amdgcn_image_atomic_add : AMDGPUImageAtomic;
231def int_amdgcn_image_atomic_sub : AMDGPUImageAtomic;
232def int_amdgcn_image_atomic_smin : AMDGPUImageAtomic;
233def int_amdgcn_image_atomic_umin : AMDGPUImageAtomic;
234def int_amdgcn_image_atomic_smax : AMDGPUImageAtomic;
235def int_amdgcn_image_atomic_umax : AMDGPUImageAtomic;
236def int_amdgcn_image_atomic_and : AMDGPUImageAtomic;
237def int_amdgcn_image_atomic_or : AMDGPUImageAtomic;
238def int_amdgcn_image_atomic_xor : AMDGPUImageAtomic;
239def int_amdgcn_image_atomic_inc : AMDGPUImageAtomic;
240def int_amdgcn_image_atomic_dec : AMDGPUImageAtomic;
241def int_amdgcn_image_atomic_cmpswap : Intrinsic <
242  [llvm_i32_ty],
243  [llvm_i32_ty,       // src(VGPR)
244   llvm_i32_ty,       // cmp(VGPR)
245   llvm_anyint_ty,    // vaddr(VGPR)
246   llvm_v8i32_ty,     // rsrc(SGPR)
247   llvm_i1_ty,        // r128(imm)
248   llvm_i1_ty,        // da(imm)
249   llvm_i1_ty],       // slc(imm)
250  []>;
251
252class AMDGPUBufferLoad : Intrinsic <
253  [llvm_anyfloat_ty],
254  [llvm_v4i32_ty,     // rsrc(SGPR)
255   llvm_i32_ty,       // vindex(VGPR)
256   llvm_i32_ty,       // offset(SGPR/VGPR/imm)
257   llvm_i1_ty,        // glc(imm)
258   llvm_i1_ty],       // slc(imm)
259  [IntrReadMem]>;
260def int_amdgcn_buffer_load_format : AMDGPUBufferLoad;
261def int_amdgcn_buffer_load : AMDGPUBufferLoad;
262
263class AMDGPUBufferStore : Intrinsic <
264  [],
265  [llvm_anyfloat_ty,  // vdata(VGPR) -- can currently only select f32, v2f32, v4f32
266   llvm_v4i32_ty,     // rsrc(SGPR)
267   llvm_i32_ty,       // vindex(VGPR)
268   llvm_i32_ty,       // offset(SGPR/VGPR/imm)
269   llvm_i1_ty,        // glc(imm)
270   llvm_i1_ty],       // slc(imm)
271  [IntrWriteMem]>;
272def int_amdgcn_buffer_store_format : AMDGPUBufferStore;
273def int_amdgcn_buffer_store : AMDGPUBufferStore;
274
275class AMDGPUBufferAtomic : Intrinsic <
276  [llvm_i32_ty],
277  [llvm_i32_ty,       // vdata(VGPR)
278   llvm_v4i32_ty,     // rsrc(SGPR)
279   llvm_i32_ty,       // vindex(VGPR)
280   llvm_i32_ty,       // offset(SGPR/VGPR/imm)
281   llvm_i1_ty],       // slc(imm)
282  []>;
283def int_amdgcn_buffer_atomic_swap : AMDGPUBufferAtomic;
284def int_amdgcn_buffer_atomic_add : AMDGPUBufferAtomic;
285def int_amdgcn_buffer_atomic_sub : AMDGPUBufferAtomic;
286def int_amdgcn_buffer_atomic_smin : AMDGPUBufferAtomic;
287def int_amdgcn_buffer_atomic_umin : AMDGPUBufferAtomic;
288def int_amdgcn_buffer_atomic_smax : AMDGPUBufferAtomic;
289def int_amdgcn_buffer_atomic_umax : AMDGPUBufferAtomic;
290def int_amdgcn_buffer_atomic_and : AMDGPUBufferAtomic;
291def int_amdgcn_buffer_atomic_or : AMDGPUBufferAtomic;
292def int_amdgcn_buffer_atomic_xor : AMDGPUBufferAtomic;
293def int_amdgcn_buffer_atomic_cmpswap : Intrinsic<
294  [llvm_i32_ty],
295  [llvm_i32_ty,       // src(VGPR)
296   llvm_i32_ty,       // cmp(VGPR)
297   llvm_v4i32_ty,     // rsrc(SGPR)
298   llvm_i32_ty,       // vindex(VGPR)
299   llvm_i32_ty,       // offset(SGPR/VGPR/imm)
300   llvm_i1_ty],       // slc(imm)
301  []>;
302
303def int_amdgcn_read_workdim : AMDGPUReadPreloadRegisterIntrinsic;
304
305
306def int_amdgcn_buffer_wbinvl1_sc :
307  GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1_sc">,
308  Intrinsic<[], [], []>;
309
310def int_amdgcn_buffer_wbinvl1 :
311  GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1">,
312  Intrinsic<[], [], []>;
313
314def int_amdgcn_s_dcache_inv :
315  GCCBuiltin<"__builtin_amdgcn_s_dcache_inv">,
316  Intrinsic<[], [], []>;
317
318def int_amdgcn_s_memtime :
319  GCCBuiltin<"__builtin_amdgcn_s_memtime">,
320  Intrinsic<[llvm_i64_ty], [], []>;
321
322def int_amdgcn_s_sleep :
323  GCCBuiltin<"__builtin_amdgcn_s_sleep">,
324  Intrinsic<[], [llvm_i32_ty], []> {
325}
326
327def int_amdgcn_s_getreg :
328  GCCBuiltin<"__builtin_amdgcn_s_getreg">,
329  Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrReadMem]>;
330
331def int_amdgcn_groupstaticsize :
332  GCCBuiltin<"__builtin_amdgcn_groupstaticsize">,
333  Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>;
334
335def int_amdgcn_dispatch_ptr :
336  GCCBuiltin<"__builtin_amdgcn_dispatch_ptr">,
337  Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>;
338
339def int_amdgcn_queue_ptr :
340  GCCBuiltin<"__builtin_amdgcn_queue_ptr">,
341  Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>;
342
343def int_amdgcn_kernarg_segment_ptr :
344  GCCBuiltin<"__builtin_amdgcn_kernarg_segment_ptr">,
345  Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>;
346
347def int_amdgcn_implicitarg_ptr :
348  GCCBuiltin<"__builtin_amdgcn_implicitarg_ptr">,
349  Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>;
350
351// __builtin_amdgcn_interp_p1 <i>, <attr_chan>, <attr>, <m0>
352def int_amdgcn_interp_p1 :
353  GCCBuiltin<"__builtin_amdgcn_interp_p1">,
354  Intrinsic<[llvm_float_ty],
355            [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
356            [IntrNoMem]>;  // This intrinsic reads from lds, but the memory
357                           // values are constant, so it behaves like IntrNoMem.
358
359// __builtin_amdgcn_interp_p2 <p1>, <j>, <attr_chan>, <attr>, <m0>
360def int_amdgcn_interp_p2 :
361  GCCBuiltin<"__builtin_amdgcn_interp_p2">,
362  Intrinsic<[llvm_float_ty],
363            [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
364            [IntrNoMem]>;  // See int_amdgcn_v_interp_p1 for why this is
365                           // IntrNoMem.
366
367// Pixel shaders only: whether the current pixel is live (i.e. not a helper
368// invocation for derivative computation).
369def int_amdgcn_ps_live : Intrinsic <
370  [llvm_i1_ty],
371  [],
372  [IntrNoMem]>;
373
374def int_amdgcn_mbcnt_lo :
375  GCCBuiltin<"__builtin_amdgcn_mbcnt_lo">,
376  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
377
378def int_amdgcn_mbcnt_hi :
379  GCCBuiltin<"__builtin_amdgcn_mbcnt_hi">,
380  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
381
382// llvm.amdgcn.ds.swizzle src offset
383def int_amdgcn_ds_swizzle :
384  GCCBuiltin<"__builtin_amdgcn_ds_swizzle">,
385  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>;
386
387// llvm.amdgcn.lerp
388def int_amdgcn_lerp :
389  GCCBuiltin<"__builtin_amdgcn_lerp">,
390  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
391
392//===----------------------------------------------------------------------===//
393// CI+ Intrinsics
394//===----------------------------------------------------------------------===//
395
396def int_amdgcn_s_dcache_inv_vol :
397  GCCBuiltin<"__builtin_amdgcn_s_dcache_inv_vol">,
398  Intrinsic<[], [], []>;
399
400def int_amdgcn_buffer_wbinvl1_vol :
401  GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1_vol">,
402  Intrinsic<[], [], []>;
403
404//===----------------------------------------------------------------------===//
405// VI Intrinsics
406//===----------------------------------------------------------------------===//
407
408// llvm.amdgcn.mov.dpp.i32 <src> <dpp_ctrl> <row_mask> <bank_mask> <bound_ctrl>
409def int_amdgcn_mov_dpp :
410  Intrinsic<[llvm_anyint_ty],
411            [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
412             llvm_i1_ty], [IntrNoMem, IntrConvergent]>;
413
414def int_amdgcn_s_dcache_wb :
415  GCCBuiltin<"__builtin_amdgcn_s_dcache_wb">,
416  Intrinsic<[], [], []>;
417
418def int_amdgcn_s_dcache_wb_vol :
419  GCCBuiltin<"__builtin_amdgcn_s_dcache_wb_vol">,
420  Intrinsic<[], [], []>;
421
422def int_amdgcn_s_memrealtime :
423  GCCBuiltin<"__builtin_amdgcn_s_memrealtime">,
424  Intrinsic<[llvm_i64_ty], [], []>;
425
426// llvm.amdgcn.ds.permute <index> <src>
427def int_amdgcn_ds_permute :
428  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>;
429
430// llvm.amdgcn.ds.bpermute <index> <src>
431def int_amdgcn_ds_bpermute :
432  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>;
433
434}
435