1//===-- SIRegisterInfo.td - SI Register defs ---------------*- tablegen -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10//===----------------------------------------------------------------------===// 11// Helpers 12//===----------------------------------------------------------------------===// 13 14class getSubRegs<int size> { 15 list<SubRegIndex> ret2 = [sub0, sub1]; 16 list<SubRegIndex> ret3 = [sub0, sub1, sub2]; 17 list<SubRegIndex> ret4 = [sub0, sub1, sub2, sub3]; 18 list<SubRegIndex> ret8 = [sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7]; 19 list<SubRegIndex> ret16 = [sub0, sub1, sub2, sub3, 20 sub4, sub5, sub6, sub7, 21 sub8, sub9, sub10, sub11, 22 sub12, sub13, sub14, sub15]; 23 24 list<SubRegIndex> ret = !if(!eq(size, 2), ret2, 25 !if(!eq(size, 3), ret3, 26 !if(!eq(size, 4), ret4, 27 !if(!eq(size, 8), ret8, ret16)))); 28} 29 30//===----------------------------------------------------------------------===// 31// Declarations that describe the SI registers 32//===----------------------------------------------------------------------===// 33class SIReg <string n, bits<16> regIdx = 0> : Register<n>, 34 DwarfRegNum<[!cast<int>(HWEncoding)]> { 35 let Namespace = "AMDGPU"; 36 37 // This is the not yet the complete register encoding. An additional 38 // bit is set for VGPRs. 39 let HWEncoding = regIdx; 40} 41 42// Special Registers 43def VCC_LO : SIReg<"vcc_lo", 106>; 44def VCC_HI : SIReg<"vcc_hi", 107>; 45 46// Pseudo-registers: Used as placeholders during isel and immediately 47// replaced, never seeing the verifier. 48def PRIVATE_RSRC_REG : SIReg<"", 0>; 49def FP_REG : SIReg<"", 0>; 50def SP_REG : SIReg<"", 0>; 51def SCRATCH_WAVE_OFFSET_REG : SIReg<"", 0>; 52 53// VCC for 64-bit instructions 54def VCC : RegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]>, 55 DwarfRegAlias<VCC_LO> { 56 let Namespace = "AMDGPU"; 57 let SubRegIndices = [sub0, sub1]; 58 let HWEncoding = 106; 59} 60 61def EXEC_LO : SIReg<"exec_lo", 126>; 62def EXEC_HI : SIReg<"exec_hi", 127>; 63 64def EXEC : RegisterWithSubRegs<"EXEC", [EXEC_LO, EXEC_HI]>, 65 DwarfRegAlias<EXEC_LO> { 66 let Namespace = "AMDGPU"; 67 let SubRegIndices = [sub0, sub1]; 68 let HWEncoding = 126; 69} 70 71def SCC : SIReg<"scc", 253>; 72def M0 : SIReg <"m0", 124>; 73 74def SRC_SHARED_BASE : SIReg<"src_shared_base", 235>; 75def SRC_SHARED_LIMIT : SIReg<"src_shared_limit", 236>; 76def SRC_PRIVATE_BASE : SIReg<"src_private_base", 237>; 77def SRC_PRIVATE_LIMIT : SIReg<"src_private_limit", 238>; 78 79def XNACK_MASK_LO : SIReg<"xnack_mask_lo", 104>; 80def XNACK_MASK_HI : SIReg<"xnack_mask_hi", 105>; 81 82def XNACK_MASK : RegisterWithSubRegs<"xnack_mask", [XNACK_MASK_LO, XNACK_MASK_HI]>, 83 DwarfRegAlias<XNACK_MASK_LO> { 84 let Namespace = "AMDGPU"; 85 let SubRegIndices = [sub0, sub1]; 86 let HWEncoding = 104; 87} 88 89// Trap handler registers 90def TBA_LO : SIReg<"tba_lo", 108>; 91def TBA_HI : SIReg<"tba_hi", 109>; 92 93def TBA : RegisterWithSubRegs<"tba", [TBA_LO, TBA_HI]>, 94 DwarfRegAlias<TBA_LO> { 95 let Namespace = "AMDGPU"; 96 let SubRegIndices = [sub0, sub1]; 97 let HWEncoding = 108; 98} 99 100def TMA_LO : SIReg<"tma_lo", 110>; 101def TMA_HI : SIReg<"tma_hi", 111>; 102 103def TMA : RegisterWithSubRegs<"tma", [TMA_LO, TMA_HI]>, 104 DwarfRegAlias<TMA_LO> { 105 let Namespace = "AMDGPU"; 106 let SubRegIndices = [sub0, sub1]; 107 let HWEncoding = 110; 108} 109 110foreach Index = 0-15 in { 111 def TTMP#Index#_vi : SIReg<"ttmp"#Index, !add(112, Index)>; 112 def TTMP#Index#_gfx9 : SIReg<"ttmp"#Index, !add(108, Index)>; 113 def TTMP#Index : SIReg<"", 0>; 114} 115 116multiclass FLAT_SCR_LOHI_m <string n, bits<16> ci_e, bits<16> vi_e> { 117 def _ci : SIReg<n, ci_e>; 118 def _vi : SIReg<n, vi_e>; 119 def "" : SIReg<"", 0>; 120} 121 122class FlatReg <Register lo, Register hi, bits<16> encoding> : 123 RegisterWithSubRegs<"flat_scratch", [lo, hi]>, 124 DwarfRegAlias<lo> { 125 let Namespace = "AMDGPU"; 126 let SubRegIndices = [sub0, sub1]; 127 let HWEncoding = encoding; 128} 129 130defm FLAT_SCR_LO : FLAT_SCR_LOHI_m<"flat_scratch_lo", 104, 102>; // Offset in units of 256-bytes. 131defm FLAT_SCR_HI : FLAT_SCR_LOHI_m<"flat_scratch_hi", 105, 103>; // Size is the per-thread scratch size, in bytes. 132 133def FLAT_SCR_ci : FlatReg<FLAT_SCR_LO_ci, FLAT_SCR_HI_ci, 104>; 134def FLAT_SCR_vi : FlatReg<FLAT_SCR_LO_vi, FLAT_SCR_HI_vi, 102>; 135def FLAT_SCR : FlatReg<FLAT_SCR_LO, FLAT_SCR_HI, 0>; 136 137// SGPR registers 138foreach Index = 0-103 in { 139 def SGPR#Index : SIReg <"SGPR"#Index, Index>; 140} 141 142// VGPR registers 143foreach Index = 0-255 in { 144 def VGPR#Index : SIReg <"VGPR"#Index, Index> { 145 let HWEncoding{8} = 1; 146 } 147} 148 149//===----------------------------------------------------------------------===// 150// Groupings using register classes and tuples 151//===----------------------------------------------------------------------===// 152 153def SCC_CLASS : RegisterClass<"AMDGPU", [i1], 1, (add SCC)> { 154 let CopyCost = -1; 155 let isAllocatable = 0; 156} 157 158def M0_CLASS : RegisterClass<"AMDGPU", [i32], 32, (add M0)> { 159 let CopyCost = 1; 160 let isAllocatable = 0; 161} 162 163// TODO: Do we need to set DwarfRegAlias on register tuples? 164 165// SGPR 32-bit registers 166def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, 167 (add (sequence "SGPR%u", 0, 103))> { 168 // Give all SGPR classes higher priority than VGPR classes, because 169 // we want to spill SGPRs to VGPRs. 170 let AllocationPriority = 7; 171} 172 173// SGPR 64-bit registers 174def SGPR_64Regs : RegisterTuples<getSubRegs<2>.ret, 175 [(add (decimate SGPR_32, 2)), 176 (add (decimate (shl SGPR_32, 1), 2))]>; 177 178// SGPR 128-bit registers 179def SGPR_128Regs : RegisterTuples<getSubRegs<4>.ret, 180 [(add (decimate SGPR_32, 4)), 181 (add (decimate (shl SGPR_32, 1), 4)), 182 (add (decimate (shl SGPR_32, 2), 4)), 183 (add (decimate (shl SGPR_32, 3), 4))]>; 184 185// SGPR 256-bit registers 186def SGPR_256Regs : RegisterTuples<getSubRegs<8>.ret, 187 [(add (decimate SGPR_32, 4)), 188 (add (decimate (shl SGPR_32, 1), 4)), 189 (add (decimate (shl SGPR_32, 2), 4)), 190 (add (decimate (shl SGPR_32, 3), 4)), 191 (add (decimate (shl SGPR_32, 4), 4)), 192 (add (decimate (shl SGPR_32, 5), 4)), 193 (add (decimate (shl SGPR_32, 6), 4)), 194 (add (decimate (shl SGPR_32, 7), 4))]>; 195 196// SGPR 512-bit registers 197def SGPR_512Regs : RegisterTuples<getSubRegs<16>.ret, 198 [(add (decimate SGPR_32, 4)), 199 (add (decimate (shl SGPR_32, 1), 4)), 200 (add (decimate (shl SGPR_32, 2), 4)), 201 (add (decimate (shl SGPR_32, 3), 4)), 202 (add (decimate (shl SGPR_32, 4), 4)), 203 (add (decimate (shl SGPR_32, 5), 4)), 204 (add (decimate (shl SGPR_32, 6), 4)), 205 (add (decimate (shl SGPR_32, 7), 4)), 206 (add (decimate (shl SGPR_32, 8), 4)), 207 (add (decimate (shl SGPR_32, 9), 4)), 208 (add (decimate (shl SGPR_32, 10), 4)), 209 (add (decimate (shl SGPR_32, 11), 4)), 210 (add (decimate (shl SGPR_32, 12), 4)), 211 (add (decimate (shl SGPR_32, 13), 4)), 212 (add (decimate (shl SGPR_32, 14), 4)), 213 (add (decimate (shl SGPR_32, 15), 4))]>; 214 215// Trap handler TMP 32-bit registers 216def TTMP_32 : RegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16], 32, 217 (add (sequence "TTMP%u", 0, 15))> { 218 let isAllocatable = 0; 219} 220 221// Trap handler TMP 64-bit registers 222def TTMP_64Regs : RegisterTuples<getSubRegs<2>.ret, 223 [(add (decimate TTMP_32, 2)), 224 (add (decimate (shl TTMP_32, 1), 2))]>; 225 226// Trap handler TMP 128-bit registers 227def TTMP_128Regs : RegisterTuples<getSubRegs<4>.ret, 228 [(add (decimate TTMP_32, 4)), 229 (add (decimate (shl TTMP_32, 1), 4)), 230 (add (decimate (shl TTMP_32, 2), 4)), 231 (add (decimate (shl TTMP_32, 3), 4))]>; 232 233def TTMP_256Regs : RegisterTuples<getSubRegs<8>.ret, 234 [(add (decimate TTMP_32, 4)), 235 (add (decimate (shl TTMP_32, 1), 4)), 236 (add (decimate (shl TTMP_32, 2), 4)), 237 (add (decimate (shl TTMP_32, 3), 4)), 238 (add (decimate (shl TTMP_32, 4), 4)), 239 (add (decimate (shl TTMP_32, 5), 4)), 240 (add (decimate (shl TTMP_32, 6), 4)), 241 (add (decimate (shl TTMP_32, 7), 4))]>; 242 243def TTMP_512Regs : RegisterTuples<getSubRegs<16>.ret, 244 [(add (decimate TTMP_32, 4)), 245 (add (decimate (shl TTMP_32, 1), 4)), 246 (add (decimate (shl TTMP_32, 2), 4)), 247 (add (decimate (shl TTMP_32, 3), 4)), 248 (add (decimate (shl TTMP_32, 4), 4)), 249 (add (decimate (shl TTMP_32, 5), 4)), 250 (add (decimate (shl TTMP_32, 6), 4)), 251 (add (decimate (shl TTMP_32, 7), 4)), 252 (add (decimate (shl TTMP_32, 8), 4)), 253 (add (decimate (shl TTMP_32, 9), 4)), 254 (add (decimate (shl TTMP_32, 10), 4)), 255 (add (decimate (shl TTMP_32, 11), 4)), 256 (add (decimate (shl TTMP_32, 12), 4)), 257 (add (decimate (shl TTMP_32, 13), 4)), 258 (add (decimate (shl TTMP_32, 14), 4)), 259 (add (decimate (shl TTMP_32, 15), 4))]>; 260 261class TmpRegTuplesBase<int index, int size, 262 list<Register> subRegs, 263 list<SubRegIndex> indices = getSubRegs<size>.ret, 264 int index1 = !add(index, !add(size, -1)), 265 string name = "ttmp["#index#":"#index1#"]"> : 266 RegisterWithSubRegs<name, subRegs> { 267 let HWEncoding = subRegs[0].HWEncoding; 268 let SubRegIndices = indices; 269} 270 271class TmpRegTuples<string tgt, 272 int size, 273 int index0, 274 int index1 = !add(index0, 1), 275 int index2 = !add(index0, !if(!eq(size, 2), 1, 2)), 276 int index3 = !add(index0, !if(!eq(size, 2), 1, 3)), 277 int index4 = !add(index0, !if(!eq(size, 8), 4, 1)), 278 int index5 = !add(index0, !if(!eq(size, 8), 5, 1)), 279 int index6 = !add(index0, !if(!eq(size, 8), 6, 1)), 280 int index7 = !add(index0, !if(!eq(size, 8), 7, 1)), 281 Register r0 = !cast<Register>("TTMP"#index0#tgt), 282 Register r1 = !cast<Register>("TTMP"#index1#tgt), 283 Register r2 = !cast<Register>("TTMP"#index2#tgt), 284 Register r3 = !cast<Register>("TTMP"#index3#tgt), 285 Register r4 = !cast<Register>("TTMP"#index4#tgt), 286 Register r5 = !cast<Register>("TTMP"#index5#tgt), 287 Register r6 = !cast<Register>("TTMP"#index6#tgt), 288 Register r7 = !cast<Register>("TTMP"#index7#tgt)> : 289 TmpRegTuplesBase<index0, size, 290 !if(!eq(size, 2), [r0, r1], 291 !if(!eq(size, 4), [r0, r1, r2, r3], 292 [r0, r1, r2, r3, r4, r5, r6, r7])), 293 getSubRegs<size>.ret>; 294 295foreach Index = {0, 2, 4, 6, 8, 10, 12, 14} in { 296 def TTMP#Index#_TTMP#!add(Index,1)#_vi : TmpRegTuples<"_vi", 2, Index>; 297 def TTMP#Index#_TTMP#!add(Index,1)#_gfx9 : TmpRegTuples<"_gfx9", 2, Index>; 298} 299 300foreach Index = {0, 4, 8, 12} in { 301 def TTMP#Index#_TTMP#!add(Index,1)# 302 _TTMP#!add(Index,2)# 303 _TTMP#!add(Index,3)#_vi : TmpRegTuples<"_vi", 4, Index>; 304 def TTMP#Index#_TTMP#!add(Index,1)# 305 _TTMP#!add(Index,2)# 306 _TTMP#!add(Index,3)#_gfx9 : TmpRegTuples<"_gfx9", 4, Index>; 307} 308 309foreach Index = {0, 4, 8} in { 310 def TTMP#Index#_TTMP#!add(Index,1)# 311 _TTMP#!add(Index,2)# 312 _TTMP#!add(Index,3)# 313 _TTMP#!add(Index,4)# 314 _TTMP#!add(Index,5)# 315 _TTMP#!add(Index,6)# 316 _TTMP#!add(Index,7)#_vi : TmpRegTuples<"_vi", 8, Index>; 317 def TTMP#Index#_TTMP#!add(Index,1)# 318 _TTMP#!add(Index,2)# 319 _TTMP#!add(Index,3)# 320 _TTMP#!add(Index,4)# 321 _TTMP#!add(Index,5)# 322 _TTMP#!add(Index,6)# 323 _TTMP#!add(Index,7)#_gfx9 : TmpRegTuples<"_gfx9", 8, Index>; 324} 325 326def TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15_vi : 327 TmpRegTuplesBase<0, 16, 328 [TTMP0_vi, TTMP1_vi, TTMP2_vi, TTMP3_vi, 329 TTMP4_vi, TTMP5_vi, TTMP6_vi, TTMP7_vi, 330 TTMP8_vi, TTMP9_vi, TTMP10_vi, TTMP11_vi, 331 TTMP12_vi, TTMP13_vi, TTMP14_vi, TTMP15_vi]>; 332 333def TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15_gfx9 : 334 TmpRegTuplesBase<0, 16, 335 [TTMP0_gfx9, TTMP1_gfx9, TTMP2_gfx9, TTMP3_gfx9, 336 TTMP4_gfx9, TTMP5_gfx9, TTMP6_gfx9, TTMP7_gfx9, 337 TTMP8_gfx9, TTMP9_gfx9, TTMP10_gfx9, TTMP11_gfx9, 338 TTMP12_gfx9, TTMP13_gfx9, TTMP14_gfx9, TTMP15_gfx9]>; 339 340 341// VGPR 32-bit registers 342// i16/f16 only on VI+ 343def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, 344 (add (sequence "VGPR%u", 0, 255))> { 345 let AllocationPriority = 1; 346 let Size = 32; 347} 348 349// VGPR 64-bit registers 350def VGPR_64 : RegisterTuples<getSubRegs<2>.ret, 351 [(add (trunc VGPR_32, 255)), 352 (add (shl VGPR_32, 1))]>; 353 354// VGPR 96-bit registers 355def VGPR_96 : RegisterTuples<getSubRegs<3>.ret, 356 [(add (trunc VGPR_32, 254)), 357 (add (shl VGPR_32, 1)), 358 (add (shl VGPR_32, 2))]>; 359 360// VGPR 128-bit registers 361def VGPR_128 : RegisterTuples<getSubRegs<4>.ret, 362 [(add (trunc VGPR_32, 253)), 363 (add (shl VGPR_32, 1)), 364 (add (shl VGPR_32, 2)), 365 (add (shl VGPR_32, 3))]>; 366 367// VGPR 256-bit registers 368def VGPR_256 : RegisterTuples<getSubRegs<8>.ret, 369 [(add (trunc VGPR_32, 249)), 370 (add (shl VGPR_32, 1)), 371 (add (shl VGPR_32, 2)), 372 (add (shl VGPR_32, 3)), 373 (add (shl VGPR_32, 4)), 374 (add (shl VGPR_32, 5)), 375 (add (shl VGPR_32, 6)), 376 (add (shl VGPR_32, 7))]>; 377 378// VGPR 512-bit registers 379def VGPR_512 : RegisterTuples<getSubRegs<16>.ret, 380 [(add (trunc VGPR_32, 241)), 381 (add (shl VGPR_32, 1)), 382 (add (shl VGPR_32, 2)), 383 (add (shl VGPR_32, 3)), 384 (add (shl VGPR_32, 4)), 385 (add (shl VGPR_32, 5)), 386 (add (shl VGPR_32, 6)), 387 (add (shl VGPR_32, 7)), 388 (add (shl VGPR_32, 8)), 389 (add (shl VGPR_32, 9)), 390 (add (shl VGPR_32, 10)), 391 (add (shl VGPR_32, 11)), 392 (add (shl VGPR_32, 12)), 393 (add (shl VGPR_32, 13)), 394 (add (shl VGPR_32, 14)), 395 (add (shl VGPR_32, 15))]>; 396 397//===----------------------------------------------------------------------===// 398// Register classes used as source and destination 399//===----------------------------------------------------------------------===// 400 401def Pseudo_SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, 402 (add FP_REG, SP_REG, SCRATCH_WAVE_OFFSET_REG)> { 403 let isAllocatable = 0; 404 let CopyCost = -1; 405} 406 407def Pseudo_SReg_128 : RegisterClass<"AMDGPU", [v4i32, v2i64, v2f64], 32, 408 (add PRIVATE_RSRC_REG)> { 409 let isAllocatable = 0; 410 let CopyCost = -1; 411} 412 413// Subset of SReg_32 without M0 for SMRD instructions and alike. 414// See comments in SIInstructions.td for more info. 415def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, 416 (add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI, XNACK_MASK_LO, XNACK_MASK_HI, 417 TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE, SRC_SHARED_LIMIT, 418 SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT)> { 419 let AllocationPriority = 7; 420} 421 422def SReg_32_XEXEC_HI : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, 423 (add SReg_32_XM0_XEXEC, EXEC_LO, M0_CLASS)> { 424 let AllocationPriority = 7; 425} 426 427def SReg_32_XM0 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, 428 (add SReg_32_XM0_XEXEC, EXEC_LO, EXEC_HI)> { 429 let AllocationPriority = 7; 430} 431 432// Register class for all scalar registers (SGPRs + Special Registers) 433def SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, 434 (add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI, SReg_32_XEXEC_HI)> { 435 let AllocationPriority = 7; 436} 437 438def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32, (add SGPR_64Regs)> { 439 let CopyCost = 1; 440 let AllocationPriority = 8; 441} 442 443def TTMP_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32, (add TTMP_64Regs)> { 444 let isAllocatable = 0; 445} 446 447def SReg_64_XEXEC : RegisterClass<"AMDGPU", [v2i32, i64, f64, i1, v4i16, v4f16], 32, 448 (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, TTMP_64, TBA, TMA)> { 449 let CopyCost = 1; 450 let AllocationPriority = 8; 451} 452 453def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, i1, v4i16, v4f16], 32, 454 (add SReg_64_XEXEC, EXEC)> { 455 let CopyCost = 1; 456 let AllocationPriority = 8; 457} 458 459// Requires 2 s_mov_b64 to copy 460let CopyCost = 2 in { 461 462def SGPR_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add SGPR_128Regs)> { 463 let AllocationPriority = 10; 464} 465 466def TTMP_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add TTMP_128Regs)> { 467 let isAllocatable = 0; 468} 469 470def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64, v2f64], 32, 471 (add SGPR_128, TTMP_128)> { 472 let AllocationPriority = 10; 473} 474 475} // End CopyCost = 2 476 477def SGPR_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256Regs)> { 478 let AllocationPriority = 11; 479} 480 481def TTMP_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add TTMP_256Regs)> { 482 let isAllocatable = 0; 483} 484 485def SReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, 486 (add SGPR_256, TTMP_256)> { 487 // Requires 4 s_mov_b64 to copy 488 let CopyCost = 4; 489 let AllocationPriority = 11; 490} 491 492def SGPR_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add SGPR_512Regs)> { 493 let AllocationPriority = 12; 494} 495 496def TTMP_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add TTMP_512Regs)> { 497 let isAllocatable = 0; 498} 499 500def SReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, 501 (add SGPR_512, TTMP_512)> { 502 // Requires 8 s_mov_b64 to copy 503 let CopyCost = 8; 504 let AllocationPriority = 12; 505} 506 507// Register class for all vector registers (VGPRs + Interploation Registers) 508def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32, v4f16, v4i16], 32, (add VGPR_64)> { 509 let Size = 64; 510 511 // Requires 2 v_mov_b32 to copy 512 let CopyCost = 2; 513 let AllocationPriority = 2; 514} 515 516def VReg_96 : RegisterClass<"AMDGPU", [untyped], 32, (add VGPR_96)> { 517 let Size = 96; 518 519 // Requires 3 v_mov_b32 to copy 520 let CopyCost = 3; 521 let AllocationPriority = 3; 522} 523 524def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32, (add VGPR_128)> { 525 let Size = 128; 526 527 // Requires 4 v_mov_b32 to copy 528 let CopyCost = 4; 529 let AllocationPriority = 4; 530} 531 532def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add VGPR_256)> { 533 let Size = 256; 534 let CopyCost = 8; 535 let AllocationPriority = 5; 536} 537 538def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add VGPR_512)> { 539 let Size = 512; 540 let CopyCost = 16; 541 let AllocationPriority = 6; 542} 543 544def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32)> { 545 let Size = 32; 546} 547 548def VS_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, 549 (add VGPR_32, SReg_32)> { 550 let isAllocatable = 0; 551} 552 553def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 32, (add VReg_64, SReg_64)> { 554 let isAllocatable = 0; 555} 556 557//===----------------------------------------------------------------------===// 558// Register operands 559//===----------------------------------------------------------------------===// 560 561class RegImmMatcher<string name> : AsmOperandClass { 562 let Name = name; 563 let RenderMethod = "addRegOrImmOperands"; 564} 565 566multiclass SIRegOperand <string rc, string MatchName, string opType> { 567 let OperandNamespace = "AMDGPU" in { 568 def _b16 : RegisterOperand<!cast<RegisterClass>(rc#"_32")> { 569 let OperandType = opType#"_INT16"; 570 let ParserMatchClass = RegImmMatcher<MatchName#"B16">; 571 let DecoderMethod = "decodeOperand_VSrc16"; 572 } 573 574 def _f16 : RegisterOperand<!cast<RegisterClass>(rc#"_32")> { 575 let OperandType = opType#"_FP16"; 576 let ParserMatchClass = RegImmMatcher<MatchName#"F16">; 577 let DecoderMethod = "decodeOperand_VSrc16"; 578 } 579 580 def _b32 : RegisterOperand<!cast<RegisterClass>(rc#"_32")> { 581 let OperandType = opType#"_INT32"; 582 let ParserMatchClass = RegImmMatcher<MatchName#"B32">; 583 } 584 585 def _f32 : RegisterOperand<!cast<RegisterClass>(rc#"_32")> { 586 let OperandType = opType#"_FP32"; 587 let ParserMatchClass = RegImmMatcher<MatchName#"F32">; 588 } 589 590 def _b64 : RegisterOperand<!cast<RegisterClass>(rc#"_64")> { 591 let OperandType = opType#"_INT64"; 592 let ParserMatchClass = RegImmMatcher<MatchName#"B64">; 593 } 594 595 def _f64 : RegisterOperand<!cast<RegisterClass>(rc#"_64")> { 596 let OperandType = opType#"_FP64"; 597 let ParserMatchClass = RegImmMatcher<MatchName#"F64">; 598 } 599 600 def _v2b16 : RegisterOperand<!cast<RegisterClass>(rc#"_32")> { 601 let OperandType = opType#"_V2INT16"; 602 let ParserMatchClass = RegImmMatcher<MatchName#"V2B16">; 603 let DecoderMethod = "decodeOperand_VSrcV216"; 604 } 605 606 def _v2f16 : RegisterOperand<!cast<RegisterClass>(rc#"_32")> { 607 let OperandType = opType#"_V2FP16"; 608 let ParserMatchClass = RegImmMatcher<MatchName#"V2F16">; 609 let DecoderMethod = "decodeOperand_VSrcV216"; 610 } 611 } 612} 613 614// FIXME: 64-bit sources can sometimes use 32-bit constants. 615multiclass RegImmOperand <string rc, string MatchName> 616 : SIRegOperand<rc, MatchName, "OPERAND_REG_IMM">; 617 618multiclass RegInlineOperand <string rc, string MatchName> 619 : SIRegOperand<rc, MatchName, "OPERAND_REG_INLINE_C">; 620 621//===----------------------------------------------------------------------===// 622// SSrc_* Operands with an SGPR or a 32-bit immediate 623//===----------------------------------------------------------------------===// 624 625defm SSrc : RegImmOperand<"SReg", "SSrc">; 626 627//===----------------------------------------------------------------------===// 628// SCSrc_* Operands with an SGPR or a inline constant 629//===----------------------------------------------------------------------===// 630 631defm SCSrc : RegInlineOperand<"SReg", "SCSrc"> ; 632 633def SCSrc_i1 : RegisterOperand<SReg_64_XEXEC>; 634 635//===----------------------------------------------------------------------===// 636// VSrc_* Operands with an SGPR, VGPR or a 32-bit immediate 637//===----------------------------------------------------------------------===// 638 639defm VSrc : RegImmOperand<"VS", "VSrc">; 640 641def VSrc_128 : RegisterOperand<VReg_128> { 642 let DecoderMethod = "DecodeVS_128RegisterClass"; 643} 644 645//===----------------------------------------------------------------------===// 646// VSrc_* Operands with an VGPR 647//===----------------------------------------------------------------------===// 648 649// This is for operands with the enum(9), VSrc encoding restriction, 650// but only allows VGPRs. 651def VRegSrc_32 : RegisterOperand<VGPR_32> { 652 //let ParserMatchClass = RegImmMatcher<"VRegSrc32">; 653 let DecoderMethod = "DecodeVS_32RegisterClass"; 654} 655 656//===----------------------------------------------------------------------===// 657// VCSrc_* Operands with an SGPR, VGPR or an inline constant 658//===----------------------------------------------------------------------===// 659 660defm VCSrc : RegInlineOperand<"VS", "VCSrc">; 661