1//===- IntrinsicsAMDGPU.td - Defines AMDGPU intrinsics -----*- tablegen -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines all of the R600-specific intrinsics. 11// 12//===----------------------------------------------------------------------===// 13 14class AMDGPUReadPreloadRegisterIntrinsic 15 : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>; 16 17class AMDGPUReadPreloadRegisterIntrinsicNamed<string name> 18 : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>, GCCBuiltin<name>; 19 20let TargetPrefix = "r600" in { 21 22multiclass AMDGPUReadPreloadRegisterIntrinsic_xyz { 23 def _x : AMDGPUReadPreloadRegisterIntrinsic; 24 def _y : AMDGPUReadPreloadRegisterIntrinsic; 25 def _z : AMDGPUReadPreloadRegisterIntrinsic; 26} 27 28multiclass AMDGPUReadPreloadRegisterIntrinsic_xyz_named<string prefix> { 29 def _x : AMDGPUReadPreloadRegisterIntrinsicNamed<!strconcat(prefix, "_x")>; 30 def _y : AMDGPUReadPreloadRegisterIntrinsicNamed<!strconcat(prefix, "_y")>; 31 def _z : AMDGPUReadPreloadRegisterIntrinsicNamed<!strconcat(prefix, "_z")>; 32} 33 34defm int_r600_read_global_size : AMDGPUReadPreloadRegisterIntrinsic_xyz_named 35 <"__builtin_r600_read_global_size">; 36defm int_r600_read_ngroups : AMDGPUReadPreloadRegisterIntrinsic_xyz_named 37 <"__builtin_r600_read_ngroups">; 38defm int_r600_read_tgid : AMDGPUReadPreloadRegisterIntrinsic_xyz_named 39 <"__builtin_r600_read_tgid">; 40 41defm int_r600_read_local_size : AMDGPUReadPreloadRegisterIntrinsic_xyz; 42defm int_r600_read_tidig : AMDGPUReadPreloadRegisterIntrinsic_xyz; 43 44def int_r600_read_workdim : AMDGPUReadPreloadRegisterIntrinsic; 45 46 47// AS 7 is PARAM_I_ADDRESS, used for kernel arguments 48def int_r600_implicitarg_ptr : 49 GCCBuiltin<"__builtin_r600_implicitarg_ptr">, 50 Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 7>], [], [IntrNoMem]>; 51 52def int_r600_rat_store_typed : 53 // 1st parameter: Data 54 // 2nd parameter: Index 55 // 3rd parameter: Constant RAT ID 56 Intrinsic<[], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], []>, 57 GCCBuiltin<"__builtin_r600_rat_store_typed">; 58 59def int_r600_rsq : Intrinsic< 60 [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem] 61>; 62 63 64} // End TargetPrefix = "r600" 65 66// FIXME: These should be renamed/moved to r600 67let TargetPrefix = "AMDGPU" in { 68def int_AMDGPU_ldexp : Intrinsic< 69 [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem] 70>; 71} 72 73let TargetPrefix = "amdgcn" in { 74 75defm int_amdgcn_workitem_id : AMDGPUReadPreloadRegisterIntrinsic_xyz; 76defm int_amdgcn_workgroup_id : AMDGPUReadPreloadRegisterIntrinsic_xyz_named 77 <"__builtin_amdgcn_workgroup_id">; 78 79def int_amdgcn_s_barrier : GCCBuiltin<"__builtin_amdgcn_s_barrier">, 80 Intrinsic<[], [], [IntrConvergent]>; 81 82def int_amdgcn_s_waitcnt : Intrinsic<[], [llvm_i32_ty], []>; 83 84def int_amdgcn_div_scale : Intrinsic< 85 // 1st parameter: Numerator 86 // 2nd parameter: Denominator 87 // 3rd parameter: Constant to select select between first and 88 // second. (0 = first, 1 = second). 89 [llvm_anyfloat_ty, llvm_i1_ty], 90 [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty], 91 [IntrNoMem] 92>; 93 94def int_amdgcn_div_fmas : Intrinsic<[llvm_anyfloat_ty], 95 [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty], 96 [IntrNoMem] 97>; 98 99def int_amdgcn_div_fixup : Intrinsic<[llvm_anyfloat_ty], 100 [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], 101 [IntrNoMem] 102>; 103 104def int_amdgcn_trig_preop : Intrinsic< 105 [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem] 106>; 107 108def int_amdgcn_sin : Intrinsic< 109 [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem] 110>; 111 112def int_amdgcn_cos : Intrinsic< 113 [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem] 114>; 115 116def int_amdgcn_log_clamp : Intrinsic< 117 [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem] 118>; 119 120def int_amdgcn_rcp : Intrinsic< 121 [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem] 122>; 123 124def int_amdgcn_rsq : Intrinsic< 125 [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem] 126>; 127 128def int_amdgcn_rsq_legacy : GCCBuiltin<"__builtin_amdgcn_rsq_legacy">, 129 Intrinsic< 130 [llvm_float_ty], [llvm_float_ty], [IntrNoMem] 131>; 132 133def int_amdgcn_rsq_clamp : Intrinsic< 134 [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; 135 136def int_amdgcn_ldexp : Intrinsic< 137 [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem] 138>; 139 140def int_amdgcn_frexp_mant : Intrinsic< 141 [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem] 142>; 143 144def int_amdgcn_frexp_exp : Intrinsic< 145 [llvm_i32_ty], [llvm_anyfloat_ty], [IntrNoMem] 146>; 147 148// v_fract is buggy on SI/CI. It mishandles infinities, may return 1.0 149// and always uses rtz, so is not suitable for implementing the OpenCL 150// fract function. It should be ok on VI. 151def int_amdgcn_fract : Intrinsic< 152 [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem] 153>; 154 155def int_amdgcn_class : Intrinsic< 156 [llvm_i1_ty], [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem] 157>; 158 159def int_amdgcn_cubeid : GCCBuiltin<"__builtin_amdgcn_cubeid">, 160 Intrinsic<[llvm_float_ty], 161 [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem] 162>; 163 164def int_amdgcn_cubema : GCCBuiltin<"__builtin_amdgcn_cubema">, 165 Intrinsic<[llvm_float_ty], 166 [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem] 167>; 168 169def int_amdgcn_cubesc : GCCBuiltin<"__builtin_amdgcn_cubesc">, 170 Intrinsic<[llvm_float_ty], 171 [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem] 172>; 173 174def int_amdgcn_cubetc : GCCBuiltin<"__builtin_amdgcn_cubetc">, 175 Intrinsic<[llvm_float_ty], 176 [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem] 177>; 178 179// TODO: Do we want an ordering for these? 180def int_amdgcn_atomic_inc : Intrinsic<[llvm_anyint_ty], 181 [llvm_anyptr_ty, LLVMMatchType<0>], 182 [IntrArgMemOnly, NoCapture<0>] 183>; 184 185def int_amdgcn_atomic_dec : Intrinsic<[llvm_anyint_ty], 186 [llvm_anyptr_ty, LLVMMatchType<0>], 187 [IntrArgMemOnly, NoCapture<0>] 188>; 189 190class AMDGPUImageLoad : Intrinsic < 191 [llvm_v4f32_ty], // vdata(VGPR) 192 [llvm_anyint_ty, // vaddr(VGPR) 193 llvm_v8i32_ty, // rsrc(SGPR) 194 llvm_i32_ty, // dmask(imm) 195 llvm_i1_ty, // r128(imm) 196 llvm_i1_ty, // da(imm) 197 llvm_i1_ty, // glc(imm) 198 llvm_i1_ty], // slc(imm) 199 [IntrReadMem]>; 200 201def int_amdgcn_image_load : AMDGPUImageLoad; 202def int_amdgcn_image_load_mip : AMDGPUImageLoad; 203 204class AMDGPUImageStore : Intrinsic < 205 [], 206 [llvm_v4f32_ty, // vdata(VGPR) 207 llvm_anyint_ty, // vaddr(VGPR) 208 llvm_v8i32_ty, // rsrc(SGPR) 209 llvm_i32_ty, // dmask(imm) 210 llvm_i1_ty, // r128(imm) 211 llvm_i1_ty, // da(imm) 212 llvm_i1_ty, // glc(imm) 213 llvm_i1_ty], // slc(imm) 214 []>; 215 216def int_amdgcn_image_store : AMDGPUImageStore; 217def int_amdgcn_image_store_mip : AMDGPUImageStore; 218 219class AMDGPUImageAtomic : Intrinsic < 220 [llvm_i32_ty], 221 [llvm_i32_ty, // vdata(VGPR) 222 llvm_anyint_ty, // vaddr(VGPR) 223 llvm_v8i32_ty, // rsrc(SGPR) 224 llvm_i1_ty, // r128(imm) 225 llvm_i1_ty, // da(imm) 226 llvm_i1_ty], // slc(imm) 227 []>; 228 229def int_amdgcn_image_atomic_swap : AMDGPUImageAtomic; 230def int_amdgcn_image_atomic_add : AMDGPUImageAtomic; 231def int_amdgcn_image_atomic_sub : AMDGPUImageAtomic; 232def int_amdgcn_image_atomic_smin : AMDGPUImageAtomic; 233def int_amdgcn_image_atomic_umin : AMDGPUImageAtomic; 234def int_amdgcn_image_atomic_smax : AMDGPUImageAtomic; 235def int_amdgcn_image_atomic_umax : AMDGPUImageAtomic; 236def int_amdgcn_image_atomic_and : AMDGPUImageAtomic; 237def int_amdgcn_image_atomic_or : AMDGPUImageAtomic; 238def int_amdgcn_image_atomic_xor : AMDGPUImageAtomic; 239def int_amdgcn_image_atomic_inc : AMDGPUImageAtomic; 240def int_amdgcn_image_atomic_dec : AMDGPUImageAtomic; 241def int_amdgcn_image_atomic_cmpswap : Intrinsic < 242 [llvm_i32_ty], 243 [llvm_i32_ty, // src(VGPR) 244 llvm_i32_ty, // cmp(VGPR) 245 llvm_anyint_ty, // vaddr(VGPR) 246 llvm_v8i32_ty, // rsrc(SGPR) 247 llvm_i1_ty, // r128(imm) 248 llvm_i1_ty, // da(imm) 249 llvm_i1_ty], // slc(imm) 250 []>; 251 252class AMDGPUBufferLoad : Intrinsic < 253 [llvm_anyfloat_ty], 254 [llvm_v4i32_ty, // rsrc(SGPR) 255 llvm_i32_ty, // vindex(VGPR) 256 llvm_i32_ty, // offset(SGPR/VGPR/imm) 257 llvm_i1_ty, // glc(imm) 258 llvm_i1_ty], // slc(imm) 259 [IntrReadMem]>; 260def int_amdgcn_buffer_load_format : AMDGPUBufferLoad; 261def int_amdgcn_buffer_load : AMDGPUBufferLoad; 262 263class AMDGPUBufferStore : Intrinsic < 264 [], 265 [llvm_anyfloat_ty, // vdata(VGPR) -- can currently only select f32, v2f32, v4f32 266 llvm_v4i32_ty, // rsrc(SGPR) 267 llvm_i32_ty, // vindex(VGPR) 268 llvm_i32_ty, // offset(SGPR/VGPR/imm) 269 llvm_i1_ty, // glc(imm) 270 llvm_i1_ty], // slc(imm) 271 [IntrWriteMem]>; 272def int_amdgcn_buffer_store_format : AMDGPUBufferStore; 273def int_amdgcn_buffer_store : AMDGPUBufferStore; 274 275class AMDGPUBufferAtomic : Intrinsic < 276 [llvm_i32_ty], 277 [llvm_i32_ty, // vdata(VGPR) 278 llvm_v4i32_ty, // rsrc(SGPR) 279 llvm_i32_ty, // vindex(VGPR) 280 llvm_i32_ty, // offset(SGPR/VGPR/imm) 281 llvm_i1_ty], // slc(imm) 282 []>; 283def int_amdgcn_buffer_atomic_swap : AMDGPUBufferAtomic; 284def int_amdgcn_buffer_atomic_add : AMDGPUBufferAtomic; 285def int_amdgcn_buffer_atomic_sub : AMDGPUBufferAtomic; 286def int_amdgcn_buffer_atomic_smin : AMDGPUBufferAtomic; 287def int_amdgcn_buffer_atomic_umin : AMDGPUBufferAtomic; 288def int_amdgcn_buffer_atomic_smax : AMDGPUBufferAtomic; 289def int_amdgcn_buffer_atomic_umax : AMDGPUBufferAtomic; 290def int_amdgcn_buffer_atomic_and : AMDGPUBufferAtomic; 291def int_amdgcn_buffer_atomic_or : AMDGPUBufferAtomic; 292def int_amdgcn_buffer_atomic_xor : AMDGPUBufferAtomic; 293def int_amdgcn_buffer_atomic_cmpswap : Intrinsic< 294 [llvm_i32_ty], 295 [llvm_i32_ty, // src(VGPR) 296 llvm_i32_ty, // cmp(VGPR) 297 llvm_v4i32_ty, // rsrc(SGPR) 298 llvm_i32_ty, // vindex(VGPR) 299 llvm_i32_ty, // offset(SGPR/VGPR/imm) 300 llvm_i1_ty], // slc(imm) 301 []>; 302 303def int_amdgcn_read_workdim : AMDGPUReadPreloadRegisterIntrinsic; 304 305 306def int_amdgcn_buffer_wbinvl1_sc : 307 GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1_sc">, 308 Intrinsic<[], [], []>; 309 310def int_amdgcn_buffer_wbinvl1 : 311 GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1">, 312 Intrinsic<[], [], []>; 313 314def int_amdgcn_s_dcache_inv : 315 GCCBuiltin<"__builtin_amdgcn_s_dcache_inv">, 316 Intrinsic<[], [], []>; 317 318def int_amdgcn_s_memtime : 319 GCCBuiltin<"__builtin_amdgcn_s_memtime">, 320 Intrinsic<[llvm_i64_ty], [], []>; 321 322def int_amdgcn_s_sleep : 323 GCCBuiltin<"__builtin_amdgcn_s_sleep">, 324 Intrinsic<[], [llvm_i32_ty], []> { 325} 326 327def int_amdgcn_s_getreg : 328 GCCBuiltin<"__builtin_amdgcn_s_getreg">, 329 Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrReadMem]>; 330 331def int_amdgcn_groupstaticsize : 332 GCCBuiltin<"__builtin_amdgcn_groupstaticsize">, 333 Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>; 334 335def int_amdgcn_dispatch_ptr : 336 GCCBuiltin<"__builtin_amdgcn_dispatch_ptr">, 337 Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>; 338 339def int_amdgcn_queue_ptr : 340 GCCBuiltin<"__builtin_amdgcn_queue_ptr">, 341 Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>; 342 343def int_amdgcn_kernarg_segment_ptr : 344 GCCBuiltin<"__builtin_amdgcn_kernarg_segment_ptr">, 345 Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>; 346 347def int_amdgcn_implicitarg_ptr : 348 GCCBuiltin<"__builtin_amdgcn_implicitarg_ptr">, 349 Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>; 350 351// __builtin_amdgcn_interp_p1 <i>, <attr_chan>, <attr>, <m0> 352def int_amdgcn_interp_p1 : 353 GCCBuiltin<"__builtin_amdgcn_interp_p1">, 354 Intrinsic<[llvm_float_ty], 355 [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 356 [IntrNoMem]>; // This intrinsic reads from lds, but the memory 357 // values are constant, so it behaves like IntrNoMem. 358 359// __builtin_amdgcn_interp_p2 <p1>, <j>, <attr_chan>, <attr>, <m0> 360def int_amdgcn_interp_p2 : 361 GCCBuiltin<"__builtin_amdgcn_interp_p2">, 362 Intrinsic<[llvm_float_ty], 363 [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 364 [IntrNoMem]>; // See int_amdgcn_v_interp_p1 for why this is 365 // IntrNoMem. 366 367// Pixel shaders only: whether the current pixel is live (i.e. not a helper 368// invocation for derivative computation). 369def int_amdgcn_ps_live : Intrinsic < 370 [llvm_i1_ty], 371 [], 372 [IntrNoMem]>; 373 374def int_amdgcn_mbcnt_lo : 375 GCCBuiltin<"__builtin_amdgcn_mbcnt_lo">, 376 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; 377 378def int_amdgcn_mbcnt_hi : 379 GCCBuiltin<"__builtin_amdgcn_mbcnt_hi">, 380 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; 381 382// llvm.amdgcn.ds.swizzle src offset 383def int_amdgcn_ds_swizzle : 384 GCCBuiltin<"__builtin_amdgcn_ds_swizzle">, 385 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>; 386 387// llvm.amdgcn.lerp 388def int_amdgcn_lerp : 389 GCCBuiltin<"__builtin_amdgcn_lerp">, 390 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; 391 392//===----------------------------------------------------------------------===// 393// CI+ Intrinsics 394//===----------------------------------------------------------------------===// 395 396def int_amdgcn_s_dcache_inv_vol : 397 GCCBuiltin<"__builtin_amdgcn_s_dcache_inv_vol">, 398 Intrinsic<[], [], []>; 399 400def int_amdgcn_buffer_wbinvl1_vol : 401 GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1_vol">, 402 Intrinsic<[], [], []>; 403 404//===----------------------------------------------------------------------===// 405// VI Intrinsics 406//===----------------------------------------------------------------------===// 407 408// llvm.amdgcn.mov.dpp.i32 <src> <dpp_ctrl> <row_mask> <bank_mask> <bound_ctrl> 409def int_amdgcn_mov_dpp : 410 Intrinsic<[llvm_anyint_ty], 411 [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, 412 llvm_i1_ty], [IntrNoMem, IntrConvergent]>; 413 414def int_amdgcn_s_dcache_wb : 415 GCCBuiltin<"__builtin_amdgcn_s_dcache_wb">, 416 Intrinsic<[], [], []>; 417 418def int_amdgcn_s_dcache_wb_vol : 419 GCCBuiltin<"__builtin_amdgcn_s_dcache_wb_vol">, 420 Intrinsic<[], [], []>; 421 422def int_amdgcn_s_memrealtime : 423 GCCBuiltin<"__builtin_amdgcn_s_memrealtime">, 424 Intrinsic<[llvm_i64_ty], [], []>; 425 426// llvm.amdgcn.ds.permute <index> <src> 427def int_amdgcn_ds_permute : 428 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>; 429 430// llvm.amdgcn.ds.bpermute <index> <src> 431def int_amdgcn_ds_bpermute : 432 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>; 433 434} 435