1//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains instruction defs that are common to all hw codegen 11// targets. 12// 13//===----------------------------------------------------------------------===// 14 15class AMDGPUInst <dag outs, dag ins, string asm = "", 16 list<dag> pattern = []> : Instruction { 17 field bit isRegisterLoad = 0; 18 field bit isRegisterStore = 0; 19 20 let Namespace = "AMDGPU"; 21 let OutOperandList = outs; 22 let InOperandList = ins; 23 let AsmString = asm; 24 let Pattern = pattern; 25 let Itinerary = NullALU; 26 27 // SoftFail is a field the disassembler can use to provide a way for 28 // instructions to not match without killing the whole decode process. It is 29 // mainly used for ARM, but Tablegen expects this field to exist or it fails 30 // to build the decode table. 31 field bits<64> SoftFail = 0; 32 33 let DecoderNamespace = Namespace; 34 35 let TSFlags{63} = isRegisterLoad; 36 let TSFlags{62} = isRegisterStore; 37} 38 39class AMDGPUShaderInst <dag outs, dag ins, string asm = "", 40 list<dag> pattern = []> : AMDGPUInst<outs, ins, asm, pattern> { 41 42 field bits<32> Inst = 0xffffffff; 43} 44 45def FP32Denormals : Predicate<"Subtarget.hasFP32Denormals()">; 46def FP64Denormals : Predicate<"Subtarget.hasFP64Denormals()">; 47def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">; 48 49def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>; 50def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>; 51 52// 32-bit VALU immediate operand that uses the constant bus. 53def u32kimm : Operand<i32> { 54 let OperandNamespace = "AMDGPU"; 55 let OperandType = "OPERAND_KIMM32"; 56 let PrintMethod = "printU32ImmOperand"; 57} 58 59let OperandType = "OPERAND_IMMEDIATE" in { 60 61def u32imm : Operand<i32> { 62 let PrintMethod = "printU32ImmOperand"; 63} 64 65def u16imm : Operand<i16> { 66 let PrintMethod = "printU16ImmOperand"; 67} 68 69def u8imm : Operand<i8> { 70 let PrintMethod = "printU8ImmOperand"; 71} 72 73} // End OperandType = "OPERAND_IMMEDIATE" 74 75//===--------------------------------------------------------------------===// 76// Custom Operands 77//===--------------------------------------------------------------------===// 78def brtarget : Operand<OtherVT>; 79 80//===----------------------------------------------------------------------===// 81// PatLeafs for floating-point comparisons 82//===----------------------------------------------------------------------===// 83 84def COND_OEQ : PatLeaf < 85 (cond), 86 [{return N->get() == ISD::SETOEQ || N->get() == ISD::SETEQ;}] 87>; 88 89def COND_ONE : PatLeaf < 90 (cond), 91 [{return N->get() == ISD::SETONE || N->get() == ISD::SETNE;}] 92>; 93 94def COND_OGT : PatLeaf < 95 (cond), 96 [{return N->get() == ISD::SETOGT || N->get() == ISD::SETGT;}] 97>; 98 99def COND_OGE : PatLeaf < 100 (cond), 101 [{return N->get() == ISD::SETOGE || N->get() == ISD::SETGE;}] 102>; 103 104def COND_OLT : PatLeaf < 105 (cond), 106 [{return N->get() == ISD::SETOLT || N->get() == ISD::SETLT;}] 107>; 108 109def COND_OLE : PatLeaf < 110 (cond), 111 [{return N->get() == ISD::SETOLE || N->get() == ISD::SETLE;}] 112>; 113 114 115def COND_O : PatLeaf <(cond), [{return N->get() == ISD::SETO;}]>; 116def COND_UO : PatLeaf <(cond), [{return N->get() == ISD::SETUO;}]>; 117 118//===----------------------------------------------------------------------===// 119// PatLeafs for unsigned / unordered comparisons 120//===----------------------------------------------------------------------===// 121 122def COND_UEQ : PatLeaf <(cond), [{return N->get() == ISD::SETUEQ;}]>; 123def COND_UNE : PatLeaf <(cond), [{return N->get() == ISD::SETUNE;}]>; 124def COND_UGT : PatLeaf <(cond), [{return N->get() == ISD::SETUGT;}]>; 125def COND_UGE : PatLeaf <(cond), [{return N->get() == ISD::SETUGE;}]>; 126def COND_ULT : PatLeaf <(cond), [{return N->get() == ISD::SETULT;}]>; 127def COND_ULE : PatLeaf <(cond), [{return N->get() == ISD::SETULE;}]>; 128 129// XXX - For some reason R600 version is preferring to use unordered 130// for setne? 131def COND_UNE_NE : PatLeaf < 132 (cond), 133 [{return N->get() == ISD::SETUNE || N->get() == ISD::SETNE;}] 134>; 135 136//===----------------------------------------------------------------------===// 137// PatLeafs for signed comparisons 138//===----------------------------------------------------------------------===// 139 140def COND_SGT : PatLeaf <(cond), [{return N->get() == ISD::SETGT;}]>; 141def COND_SGE : PatLeaf <(cond), [{return N->get() == ISD::SETGE;}]>; 142def COND_SLT : PatLeaf <(cond), [{return N->get() == ISD::SETLT;}]>; 143def COND_SLE : PatLeaf <(cond), [{return N->get() == ISD::SETLE;}]>; 144 145//===----------------------------------------------------------------------===// 146// PatLeafs for integer equality 147//===----------------------------------------------------------------------===// 148 149def COND_EQ : PatLeaf < 150 (cond), 151 [{return N->get() == ISD::SETEQ || N->get() == ISD::SETUEQ;}] 152>; 153 154def COND_NE : PatLeaf < 155 (cond), 156 [{return N->get() == ISD::SETNE || N->get() == ISD::SETUNE;}] 157>; 158 159def COND_NULL : PatLeaf < 160 (cond), 161 [{(void)N; return false;}] 162>; 163 164 165//===----------------------------------------------------------------------===// 166// Misc. PatFrags 167//===----------------------------------------------------------------------===// 168 169class HasOneUseBinOp<SDPatternOperator op> : PatFrag< 170 (ops node:$src0, node:$src1), 171 (op $src0, $src1), 172 [{ return N->hasOneUse(); }] 173>; 174 175//===----------------------------------------------------------------------===// 176// Load/Store Pattern Fragments 177//===----------------------------------------------------------------------===// 178 179class PrivateMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{ 180 return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS; 181}]>; 182 183class PrivateLoad <SDPatternOperator op> : PrivateMemOp < 184 (ops node:$ptr), (op node:$ptr) 185>; 186 187class PrivateStore <SDPatternOperator op> : PrivateMemOp < 188 (ops node:$value, node:$ptr), (op node:$value, node:$ptr) 189>; 190 191def load_private : PrivateLoad <load>; 192 193def truncstorei8_private : PrivateStore <truncstorei8>; 194def truncstorei16_private : PrivateStore <truncstorei16>; 195def store_private : PrivateStore <store>; 196 197class GlobalMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{ 198 return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; 199}]>; 200 201// Global address space loads 202class GlobalLoad <SDPatternOperator op> : GlobalMemOp < 203 (ops node:$ptr), (op node:$ptr) 204>; 205 206def global_load : GlobalLoad <load>; 207 208// Global address space stores 209class GlobalStore <SDPatternOperator op> : GlobalMemOp < 210 (ops node:$value, node:$ptr), (op node:$value, node:$ptr) 211>; 212 213def global_store : GlobalStore <store>; 214def global_store_atomic : GlobalStore<atomic_store>; 215 216 217class ConstantMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{ 218 return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS; 219}]>; 220 221// Constant address space loads 222class ConstantLoad <SDPatternOperator op> : ConstantMemOp < 223 (ops node:$ptr), (op node:$ptr) 224>; 225 226def constant_load : ConstantLoad<load>; 227 228class LocalMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{ 229 return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; 230}]>; 231 232// Local address space loads 233class LocalLoad <SDPatternOperator op> : LocalMemOp < 234 (ops node:$ptr), (op node:$ptr) 235>; 236 237class LocalStore <SDPatternOperator op> : LocalMemOp < 238 (ops node:$value, node:$ptr), (op node:$value, node:$ptr) 239>; 240 241class FlatMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{ 242 return cast<MemSDNode>(N)->getAddressSPace() == AMDGPUAS::FLAT_ADDRESS; 243}]>; 244 245class FlatLoad <SDPatternOperator op> : FlatMemOp < 246 (ops node:$ptr), (op node:$ptr) 247>; 248 249class AZExtLoadBase <SDPatternOperator ld_node>: PatFrag<(ops node:$ptr), 250 (ld_node node:$ptr), [{ 251 LoadSDNode *L = cast<LoadSDNode>(N); 252 return L->getExtensionType() == ISD::ZEXTLOAD || 253 L->getExtensionType() == ISD::EXTLOAD; 254}]>; 255 256def az_extload : AZExtLoadBase <unindexedload>; 257 258def az_extloadi8 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{ 259 return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8; 260}]>; 261 262def az_extloadi8_global : GlobalLoad <az_extloadi8>; 263def sextloadi8_global : GlobalLoad <sextloadi8>; 264 265def az_extloadi8_constant : ConstantLoad <az_extloadi8>; 266def sextloadi8_constant : ConstantLoad <sextloadi8>; 267 268def az_extloadi8_local : LocalLoad <az_extloadi8>; 269def sextloadi8_local : LocalLoad <sextloadi8>; 270 271def extloadi8_private : PrivateLoad <az_extloadi8>; 272def sextloadi8_private : PrivateLoad <sextloadi8>; 273 274def az_extloadi16 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{ 275 return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16; 276}]>; 277 278def az_extloadi16_global : GlobalLoad <az_extloadi16>; 279def sextloadi16_global : GlobalLoad <sextloadi16>; 280 281def az_extloadi16_constant : ConstantLoad <az_extloadi16>; 282def sextloadi16_constant : ConstantLoad <sextloadi16>; 283 284def az_extloadi16_local : LocalLoad <az_extloadi16>; 285def sextloadi16_local : LocalLoad <sextloadi16>; 286 287def extloadi16_private : PrivateLoad <az_extloadi16>; 288def sextloadi16_private : PrivateLoad <sextloadi16>; 289 290def az_extloadi32 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{ 291 return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32; 292}]>; 293 294def az_extloadi32_global : GlobalLoad <az_extloadi32>; 295 296def az_extloadi32_flat : FlatLoad <az_extloadi32>; 297 298def az_extloadi32_constant : ConstantLoad <az_extloadi32>; 299 300def truncstorei8_global : GlobalStore <truncstorei8>; 301def truncstorei16_global : GlobalStore <truncstorei16>; 302 303def local_store : LocalStore <store>; 304def truncstorei8_local : LocalStore <truncstorei8>; 305def truncstorei16_local : LocalStore <truncstorei16>; 306 307def local_load : LocalLoad <load>; 308 309class Aligned8Bytes <dag ops, dag frag> : PatFrag <ops, frag, [{ 310 return cast<MemSDNode>(N)->getAlignment() % 8 == 0; 311}]>; 312 313def local_load_aligned8bytes : Aligned8Bytes < 314 (ops node:$ptr), (local_load node:$ptr) 315>; 316 317def local_store_aligned8bytes : Aligned8Bytes < 318 (ops node:$val, node:$ptr), (local_store node:$val, node:$ptr) 319>; 320 321class local_binary_atomic_op<SDNode atomic_op> : 322 PatFrag<(ops node:$ptr, node:$value), 323 (atomic_op node:$ptr, node:$value), [{ 324 return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; 325}]>; 326 327 328def atomic_swap_local : local_binary_atomic_op<atomic_swap>; 329def atomic_load_add_local : local_binary_atomic_op<atomic_load_add>; 330def atomic_load_sub_local : local_binary_atomic_op<atomic_load_sub>; 331def atomic_load_and_local : local_binary_atomic_op<atomic_load_and>; 332def atomic_load_or_local : local_binary_atomic_op<atomic_load_or>; 333def atomic_load_xor_local : local_binary_atomic_op<atomic_load_xor>; 334def atomic_load_nand_local : local_binary_atomic_op<atomic_load_nand>; 335def atomic_load_min_local : local_binary_atomic_op<atomic_load_min>; 336def atomic_load_max_local : local_binary_atomic_op<atomic_load_max>; 337def atomic_load_umin_local : local_binary_atomic_op<atomic_load_umin>; 338def atomic_load_umax_local : local_binary_atomic_op<atomic_load_umax>; 339 340def mskor_global : PatFrag<(ops node:$val, node:$ptr), 341 (AMDGPUstore_mskor node:$val, node:$ptr), [{ 342 return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; 343}]>; 344 345multiclass AtomicCmpSwapLocal <SDNode cmp_swap_node> { 346 347 def _32_local : PatFrag < 348 (ops node:$ptr, node:$cmp, node:$swap), 349 (cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{ 350 AtomicSDNode *AN = cast<AtomicSDNode>(N); 351 return AN->getMemoryVT() == MVT::i32 && 352 AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; 353 }]>; 354 355 def _64_local : PatFrag< 356 (ops node:$ptr, node:$cmp, node:$swap), 357 (cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{ 358 AtomicSDNode *AN = cast<AtomicSDNode>(N); 359 return AN->getMemoryVT() == MVT::i64 && 360 AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; 361 }]>; 362} 363 364defm atomic_cmp_swap : AtomicCmpSwapLocal <atomic_cmp_swap>; 365 366def mskor_flat : PatFrag<(ops node:$val, node:$ptr), 367 (AMDGPUstore_mskor node:$val, node:$ptr), [{ 368 return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS; 369}]>; 370 371class global_binary_atomic_op<SDNode atomic_op> : PatFrag< 372 (ops node:$ptr, node:$value), 373 (atomic_op node:$ptr, node:$value), 374 [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}] 375>; 376 377class flat_binary_atomic_op<SDNode atomic_op> : PatFrag< 378 (ops node:$ptr, node:$value), 379 (atomic_op node:$ptr, node:$value), 380 [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;}] 381>; 382 383def atomic_swap_global : global_binary_atomic_op<atomic_swap>; 384def atomic_add_global : global_binary_atomic_op<atomic_load_add>; 385def atomic_and_global : global_binary_atomic_op<atomic_load_and>; 386def atomic_max_global : global_binary_atomic_op<atomic_load_max>; 387def atomic_min_global : global_binary_atomic_op<atomic_load_min>; 388def atomic_or_global : global_binary_atomic_op<atomic_load_or>; 389def atomic_sub_global : global_binary_atomic_op<atomic_load_sub>; 390def atomic_umax_global : global_binary_atomic_op<atomic_load_umax>; 391def atomic_umin_global : global_binary_atomic_op<atomic_load_umin>; 392def atomic_xor_global : global_binary_atomic_op<atomic_load_xor>; 393 394def atomic_cmp_swap_global : global_binary_atomic_op<AMDGPUatomic_cmp_swap>; 395def atomic_cmp_swap_global_nortn : PatFrag< 396 (ops node:$ptr, node:$value), 397 (atomic_cmp_swap_global node:$ptr, node:$value), 398 [{ return SDValue(N, 0).use_empty(); }] 399>; 400 401def atomic_swap_flat : flat_binary_atomic_op<atomic_swap>; 402def atomic_add_flat : flat_binary_atomic_op<atomic_load_add>; 403def atomic_and_flat : flat_binary_atomic_op<atomic_load_and>; 404def atomic_max_flat : flat_binary_atomic_op<atomic_load_max>; 405def atomic_min_flat : flat_binary_atomic_op<atomic_load_min>; 406def atomic_or_flat : flat_binary_atomic_op<atomic_load_or>; 407def atomic_sub_flat : flat_binary_atomic_op<atomic_load_sub>; 408def atomic_umax_flat : flat_binary_atomic_op<atomic_load_umax>; 409def atomic_umin_flat : flat_binary_atomic_op<atomic_load_umin>; 410def atomic_xor_flat : flat_binary_atomic_op<atomic_load_xor>; 411 412def atomic_cmp_swap_flat : flat_binary_atomic_op<AMDGPUatomic_cmp_swap>; 413 414//===----------------------------------------------------------------------===// 415// Misc Pattern Fragments 416//===----------------------------------------------------------------------===// 417 418class Constants { 419int TWO_PI = 0x40c90fdb; 420int PI = 0x40490fdb; 421int TWO_PI_INV = 0x3e22f983; 422int FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding 423int FP32_NEG_ONE = 0xbf800000; 424int FP32_ONE = 0x3f800000; 425int FP64_ONE = 0x3ff0000000000000; 426} 427def CONST : Constants; 428 429def FP_ZERO : PatLeaf < 430 (fpimm), 431 [{return N->getValueAPF().isZero();}] 432>; 433 434def FP_ONE : PatLeaf < 435 (fpimm), 436 [{return N->isExactlyValue(1.0);}] 437>; 438 439def FP_HALF : PatLeaf < 440 (fpimm), 441 [{return N->isExactlyValue(0.5);}] 442>; 443 444let isCodeGenOnly = 1, isPseudo = 1 in { 445 446let usesCustomInserter = 1 in { 447 448class CLAMP <RegisterClass rc> : AMDGPUShaderInst < 449 (outs rc:$dst), 450 (ins rc:$src0), 451 "CLAMP $dst, $src0", 452 [(set f32:$dst, (AMDGPUclamp f32:$src0, (f32 FP_ZERO), (f32 FP_ONE)))] 453>; 454 455class FABS <RegisterClass rc> : AMDGPUShaderInst < 456 (outs rc:$dst), 457 (ins rc:$src0), 458 "FABS $dst, $src0", 459 [(set f32:$dst, (fabs f32:$src0))] 460>; 461 462class FNEG <RegisterClass rc> : AMDGPUShaderInst < 463 (outs rc:$dst), 464 (ins rc:$src0), 465 "FNEG $dst, $src0", 466 [(set f32:$dst, (fneg f32:$src0))] 467>; 468 469} // usesCustomInserter = 1 470 471multiclass RegisterLoadStore <RegisterClass dstClass, Operand addrClass, 472 ComplexPattern addrPat> { 473let UseNamedOperandTable = 1 in { 474 475 def RegisterLoad : AMDGPUShaderInst < 476 (outs dstClass:$dst), 477 (ins addrClass:$addr, i32imm:$chan), 478 "RegisterLoad $dst, $addr", 479 [(set i32:$dst, (AMDGPUregister_load addrPat:$addr, (i32 timm:$chan)))] 480 > { 481 let isRegisterLoad = 1; 482 } 483 484 def RegisterStore : AMDGPUShaderInst < 485 (outs), 486 (ins dstClass:$val, addrClass:$addr, i32imm:$chan), 487 "RegisterStore $val, $addr", 488 [(AMDGPUregister_store i32:$val, addrPat:$addr, (i32 timm:$chan))] 489 > { 490 let isRegisterStore = 1; 491 } 492} 493} 494 495} // End isCodeGenOnly = 1, isPseudo = 1 496 497/* Generic helper patterns for intrinsics */ 498/* -------------------------------------- */ 499 500class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul> 501 : Pat < 502 (fpow f32:$src0, f32:$src1), 503 (exp_ieee (mul f32:$src1, (log_ieee f32:$src0))) 504>; 505 506/* Other helper patterns */ 507/* --------------------- */ 508 509/* Extract element pattern */ 510class Extract_Element <ValueType sub_type, ValueType vec_type, int sub_idx, 511 SubRegIndex sub_reg> 512 : Pat< 513 (sub_type (extractelt vec_type:$src, sub_idx)), 514 (EXTRACT_SUBREG $src, sub_reg) 515>; 516 517/* Insert element pattern */ 518class Insert_Element <ValueType elem_type, ValueType vec_type, 519 int sub_idx, SubRegIndex sub_reg> 520 : Pat < 521 (insertelt vec_type:$vec, elem_type:$elem, sub_idx), 522 (INSERT_SUBREG $vec, $elem, sub_reg) 523>; 524 525// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer 526// can handle COPY instructions. 527// bitconvert pattern 528class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : Pat < 529 (dt (bitconvert (st rc:$src0))), 530 (dt rc:$src0) 531>; 532 533// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer 534// can handle COPY instructions. 535class DwordAddrPat<ValueType vt, RegisterClass rc> : Pat < 536 (vt (AMDGPUdwordaddr (vt rc:$addr))), 537 (vt rc:$addr) 538>; 539 540// BFI_INT patterns 541 542multiclass BFIPatterns <Instruction BFI_INT, 543 Instruction LoadImm32, 544 RegisterClass RC64> { 545 // Definition from ISA doc: 546 // (y & x) | (z & ~x) 547 def : Pat < 548 (or (and i32:$y, i32:$x), (and i32:$z, (not i32:$x))), 549 (BFI_INT $x, $y, $z) 550 >; 551 552 // SHA-256 Ch function 553 // z ^ (x & (y ^ z)) 554 def : Pat < 555 (xor i32:$z, (and i32:$x, (xor i32:$y, i32:$z))), 556 (BFI_INT $x, $y, $z) 557 >; 558 559 def : Pat < 560 (fcopysign f32:$src0, f32:$src1), 561 (BFI_INT (LoadImm32 0x7fffffff), $src0, $src1) 562 >; 563 564 def : Pat < 565 (f64 (fcopysign f64:$src0, f64:$src1)), 566 (REG_SEQUENCE RC64, 567 (i32 (EXTRACT_SUBREG $src0, sub0)), sub0, 568 (BFI_INT (LoadImm32 0x7fffffff), 569 (i32 (EXTRACT_SUBREG $src0, sub1)), 570 (i32 (EXTRACT_SUBREG $src1, sub1))), sub1) 571 >; 572} 573 574// SHA-256 Ma patterns 575 576// ((x & z) | (y & (x | z))) -> BFI_INT (XOR x, y), z, y 577class SHA256MaPattern <Instruction BFI_INT, Instruction XOR> : Pat < 578 (or (and i32:$x, i32:$z), (and i32:$y, (or i32:$x, i32:$z))), 579 (BFI_INT (XOR i32:$x, i32:$y), i32:$z, i32:$y) 580>; 581 582// Bitfield extract patterns 583 584def IMMZeroBasedBitfieldMask : PatLeaf <(imm), [{ 585 return isMask_32(N->getZExtValue()); 586}]>; 587 588def IMMPopCount : SDNodeXForm<imm, [{ 589 return CurDAG->getTargetConstant(countPopulation(N->getZExtValue()), SDLoc(N), 590 MVT::i32); 591}]>; 592 593class BFEPattern <Instruction BFE, Instruction MOV> : Pat < 594 (i32 (and (i32 (srl i32:$src, i32:$rshift)), IMMZeroBasedBitfieldMask:$mask)), 595 (BFE $src, $rshift, (MOV (i32 (IMMPopCount $mask)))) 596>; 597 598// rotr pattern 599class ROTRPattern <Instruction BIT_ALIGN> : Pat < 600 (rotr i32:$src0, i32:$src1), 601 (BIT_ALIGN $src0, $src0, $src1) 602>; 603 604// This matches 16 permutations of 605// max(min(x, y), min(max(x, y), z)) 606class IntMed3Pat<Instruction med3Inst, 607 SDPatternOperator max, 608 SDPatternOperator max_oneuse, 609 SDPatternOperator min_oneuse> : Pat< 610 (max (min_oneuse i32:$src0, i32:$src1), 611 (min_oneuse (max_oneuse i32:$src0, i32:$src1), i32:$src2)), 612 (med3Inst $src0, $src1, $src2) 613>; 614 615let Properties = [SDNPCommutative, SDNPAssociative] in { 616def smax_oneuse : HasOneUseBinOp<smax>; 617def smin_oneuse : HasOneUseBinOp<smin>; 618def umax_oneuse : HasOneUseBinOp<umax>; 619def umin_oneuse : HasOneUseBinOp<umin>; 620} // Properties = [SDNPCommutative, SDNPAssociative] 621 622 623// 24-bit arithmetic patterns 624def umul24 : PatFrag <(ops node:$x, node:$y), (mul node:$x, node:$y)>; 625 626// Special conversion patterns 627 628def cvt_rpi_i32_f32 : PatFrag < 629 (ops node:$src), 630 (fp_to_sint (ffloor (fadd $src, FP_HALF))), 631 [{ (void) N; return TM.Options.NoNaNsFPMath; }] 632>; 633 634def cvt_flr_i32_f32 : PatFrag < 635 (ops node:$src), 636 (fp_to_sint (ffloor $src)), 637 [{ (void)N; return TM.Options.NoNaNsFPMath; }] 638>; 639 640class IMad24Pat<Instruction Inst> : Pat < 641 (add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2), 642 (Inst $src0, $src1, $src2) 643>; 644 645class UMad24Pat<Instruction Inst> : Pat < 646 (add (AMDGPUmul_u24 i32:$src0, i32:$src1), i32:$src2), 647 (Inst $src0, $src1, $src2) 648>; 649 650class RcpPat<Instruction RcpInst, ValueType vt> : Pat < 651 (fdiv FP_ONE, vt:$src), 652 (RcpInst $src) 653>; 654 655class RsqPat<Instruction RsqInst, ValueType vt> : Pat < 656 (AMDGPUrcp (fsqrt vt:$src)), 657 (RsqInst $src) 658>; 659 660include "R600Instructions.td" 661include "R700Instructions.td" 662include "EvergreenInstructions.td" 663include "CaymanInstructions.td" 664 665include "SIInstrInfo.td" 666 667