1 //===-- PPCISelLowering.h - PPC32 DAG Lowering Interface --------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the interfaces that PPC uses to lower LLVM code into a 11 // selection DAG. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H 16 #define LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H 17 18 #include "PPC.h" 19 #include "PPCInstrInfo.h" 20 #include "PPCRegisterInfo.h" 21 #include "llvm/CodeGen/CallingConvLower.h" 22 #include "llvm/CodeGen/SelectionDAG.h" 23 #include "llvm/Target/TargetLowering.h" 24 25 namespace llvm { 26 namespace PPCISD { 27 enum NodeType { 28 // Start the numbering where the builtin ops and target ops leave off. 29 FIRST_NUMBER = ISD::BUILTIN_OP_END, 30 31 /// FSEL - Traditional three-operand fsel node. 32 /// 33 FSEL, 34 35 /// FCFID - The FCFID instruction, taking an f64 operand and producing 36 /// and f64 value containing the FP representation of the integer that 37 /// was temporarily in the f64 operand. 38 FCFID, 39 40 /// Newer FCFID[US] integer-to-floating-point conversion instructions for 41 /// unsigned integers and single-precision outputs. 42 FCFIDU, FCFIDS, FCFIDUS, 43 44 /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 45 /// operand, producing an f64 value containing the integer representation 46 /// of that FP value. 47 FCTIDZ, FCTIWZ, 48 49 /// Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for 50 /// unsigned integers. 51 FCTIDUZ, FCTIWUZ, 52 53 /// Reciprocal estimate instructions (unary FP ops). 54 FRE, FRSQRTE, 55 56 // VMADDFP, VNMSUBFP - The VMADDFP and VNMSUBFP instructions, taking 57 // three v4f32 operands and producing a v4f32 result. 58 VMADDFP, VNMSUBFP, 59 60 /// VPERM - The PPC VPERM Instruction. 61 /// 62 VPERM, 63 64 /// The CMPB instruction (takes two operands of i32 or i64). 65 CMPB, 66 67 /// Hi/Lo - These represent the high and low 16-bit parts of a global 68 /// address respectively. These nodes have two operands, the first of 69 /// which must be a TargetGlobalAddress, and the second of which must be a 70 /// Constant. Selected naively, these turn into 'lis G+C' and 'li G+C', 71 /// though these are usually folded into other nodes. 72 Hi, Lo, 73 74 /// The following two target-specific nodes are used for calls through 75 /// function pointers in the 64-bit SVR4 ABI. 76 77 /// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX) 78 /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to 79 /// compute an allocation on the stack. 80 DYNALLOC, 81 82 /// GlobalBaseReg - On Darwin, this node represents the result of the mflr 83 /// at function entry, used for PIC code. 84 GlobalBaseReg, 85 86 /// These nodes represent the 32-bit PPC shifts that operate on 6-bit 87 /// shift amounts. These nodes are generated by the multi-precision shift 88 /// code. 89 SRL, SRA, SHL, 90 91 /// The combination of sra[wd]i and addze used to implemented signed 92 /// integer division by a power of 2. The first operand is the dividend, 93 /// and the second is the constant shift amount (representing the 94 /// divisor). 95 SRA_ADDZE, 96 97 /// CALL - A direct function call. 98 /// CALL_NOP is a call with the special NOP which follows 64-bit 99 /// SVR4 calls. 100 CALL, CALL_NOP, 101 102 /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a 103 /// MTCTR instruction. 104 MTCTR, 105 106 /// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a 107 /// BCTRL instruction. 108 BCTRL, 109 110 /// CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl 111 /// instruction and the TOC reload required on SVR4 PPC64. 112 BCTRL_LOAD_TOC, 113 114 /// Return with a flag operand, matched by 'blr' 115 RET_FLAG, 116 117 /// R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction. 118 /// This copies the bits corresponding to the specified CRREG into the 119 /// resultant GPR. Bits corresponding to other CR regs are undefined. 120 MFOCRF, 121 122 /// Direct move from a VSX register to a GPR 123 MFVSR, 124 125 /// Direct move from a GPR to a VSX register (algebraic) 126 MTVSRA, 127 128 /// Direct move from a GPR to a VSX register (zero) 129 MTVSRZ, 130 131 // FIXME: Remove these once the ANDI glue bug is fixed: 132 /// i1 = ANDIo_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the 133 /// eq or gt bit of CR0 after executing andi. x, 1. This is used to 134 /// implement truncation of i32 or i64 to i1. 135 ANDIo_1_EQ_BIT, ANDIo_1_GT_BIT, 136 137 // READ_TIME_BASE - A read of the 64-bit time-base register on a 32-bit 138 // target (returns (Lo, Hi)). It takes a chain operand. 139 READ_TIME_BASE, 140 141 // EH_SJLJ_SETJMP - SjLj exception handling setjmp. 142 EH_SJLJ_SETJMP, 143 144 // EH_SJLJ_LONGJMP - SjLj exception handling longjmp. 145 EH_SJLJ_LONGJMP, 146 147 /// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* 148 /// instructions. For lack of better number, we use the opcode number 149 /// encoding for the OPC field to identify the compare. For example, 838 150 /// is VCMPGTSH. 151 VCMP, 152 153 /// RESVEC, OUTFLAG = VCMPo(LHS, RHS, OPC) - Represents one of the 154 /// altivec VCMP*o instructions. For lack of better number, we use the 155 /// opcode number encoding for the OPC field to identify the compare. For 156 /// example, 838 is VCMPGTSH. 157 VCMPo, 158 159 /// CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This 160 /// corresponds to the COND_BRANCH pseudo instruction. CRRC is the 161 /// condition register to branch on, OPC is the branch opcode to use (e.g. 162 /// PPC::BLE), DESTBB is the destination block to branch to, and INFLAG is 163 /// an optional input flag argument. 164 COND_BRANCH, 165 166 /// CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based 167 /// loops. 168 BDNZ, BDZ, 169 170 /// F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding 171 /// towards zero. Used only as part of the long double-to-int 172 /// conversion sequence. 173 FADDRTZ, 174 175 /// F8RC = MFFS - This moves the FPSCR (not modeled) into the register. 176 MFFS, 177 178 /// TC_RETURN - A tail call return. 179 /// operand #0 chain 180 /// operand #1 callee (register or absolute) 181 /// operand #2 stack adjustment 182 /// operand #3 optional in flag 183 TC_RETURN, 184 185 /// ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls 186 CR6SET, 187 CR6UNSET, 188 189 /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by initial-exec TLS 190 /// on PPC32. 191 PPC32_GOT, 192 193 /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by general dynamic and 194 /// local dynamic TLS on PPC32. 195 PPC32_PICGOT, 196 197 /// G8RC = ADDIS_GOT_TPREL_HA %X2, Symbol - Used by the initial-exec 198 /// TLS model, produces an ADDIS8 instruction that adds the GOT 199 /// base to sym\@got\@tprel\@ha. 200 ADDIS_GOT_TPREL_HA, 201 202 /// G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec 203 /// TLS model, produces a LD instruction with base register G8RReg 204 /// and offset sym\@got\@tprel\@l. This completes the addition that 205 /// finds the offset of "sym" relative to the thread pointer. 206 LD_GOT_TPREL_L, 207 208 /// G8RC = ADD_TLS G8RReg, Symbol - Used by the initial-exec TLS 209 /// model, produces an ADD instruction that adds the contents of 210 /// G8RReg to the thread pointer. Symbol contains a relocation 211 /// sym\@tls which is to be replaced by the thread pointer and 212 /// identifies to the linker that the instruction is part of a 213 /// TLS sequence. 214 ADD_TLS, 215 216 /// G8RC = ADDIS_TLSGD_HA %X2, Symbol - For the general-dynamic TLS 217 /// model, produces an ADDIS8 instruction that adds the GOT base 218 /// register to sym\@got\@tlsgd\@ha. 219 ADDIS_TLSGD_HA, 220 221 /// %X3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS 222 /// model, produces an ADDI8 instruction that adds G8RReg to 223 /// sym\@got\@tlsgd\@l and stores the result in X3. Hidden by 224 /// ADDIS_TLSGD_L_ADDR until after register assignment. 225 ADDI_TLSGD_L, 226 227 /// %X3 = GET_TLS_ADDR %X3, Symbol - For the general-dynamic TLS 228 /// model, produces a call to __tls_get_addr(sym\@tlsgd). Hidden by 229 /// ADDIS_TLSGD_L_ADDR until after register assignment. 230 GET_TLS_ADDR, 231 232 /// G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that 233 /// combines ADDI_TLSGD_L and GET_TLS_ADDR until expansion following 234 /// register assignment. 235 ADDI_TLSGD_L_ADDR, 236 237 /// G8RC = ADDIS_TLSLD_HA %X2, Symbol - For the local-dynamic TLS 238 /// model, produces an ADDIS8 instruction that adds the GOT base 239 /// register to sym\@got\@tlsld\@ha. 240 ADDIS_TLSLD_HA, 241 242 /// %X3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS 243 /// model, produces an ADDI8 instruction that adds G8RReg to 244 /// sym\@got\@tlsld\@l and stores the result in X3. Hidden by 245 /// ADDIS_TLSLD_L_ADDR until after register assignment. 246 ADDI_TLSLD_L, 247 248 /// %X3 = GET_TLSLD_ADDR %X3, Symbol - For the local-dynamic TLS 249 /// model, produces a call to __tls_get_addr(sym\@tlsld). Hidden by 250 /// ADDIS_TLSLD_L_ADDR until after register assignment. 251 GET_TLSLD_ADDR, 252 253 /// G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that 254 /// combines ADDI_TLSLD_L and GET_TLSLD_ADDR until expansion 255 /// following register assignment. 256 ADDI_TLSLD_L_ADDR, 257 258 /// G8RC = ADDIS_DTPREL_HA %X3, Symbol - For the local-dynamic TLS 259 /// model, produces an ADDIS8 instruction that adds X3 to 260 /// sym\@dtprel\@ha. 261 ADDIS_DTPREL_HA, 262 263 /// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS 264 /// model, produces an ADDI8 instruction that adds G8RReg to 265 /// sym\@got\@dtprel\@l. 266 ADDI_DTPREL_L, 267 268 /// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded 269 /// during instruction selection to optimize a BUILD_VECTOR into 270 /// operations on splats. This is necessary to avoid losing these 271 /// optimizations due to constant folding. 272 VADD_SPLAT, 273 274 /// CHAIN = SC CHAIN, Imm128 - System call. The 7-bit unsigned 275 /// operand identifies the operating system entry point. 276 SC, 277 278 /// VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little 279 /// endian. Maps to an xxswapd instruction that corrects an lxvd2x 280 /// or stxvd2x instruction. The chain is necessary because the 281 /// sequence replaces a load and needs to provide the same number 282 /// of outputs. 283 XXSWAPD, 284 285 /// QVFPERM = This corresponds to the QPX qvfperm instruction. 286 QVFPERM, 287 288 /// QVGPCI = This corresponds to the QPX qvgpci instruction. 289 QVGPCI, 290 291 /// QVALIGNI = This corresponds to the QPX qvaligni instruction. 292 QVALIGNI, 293 294 /// QVESPLATI = This corresponds to the QPX qvesplati instruction. 295 QVESPLATI, 296 297 /// QBFLT = Access the underlying QPX floating-point boolean 298 /// representation. 299 QBFLT, 300 301 /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a 302 /// byte-swapping store instruction. It byte-swaps the low "Type" bits of 303 /// the GPRC input, then stores it through Ptr. Type can be either i16 or 304 /// i32. 305 STBRX = ISD::FIRST_TARGET_MEMORY_OPCODE, 306 307 /// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a 308 /// byte-swapping load instruction. It loads "Type" bits, byte swaps it, 309 /// then puts it in the bottom bits of the GPRC. TYPE can be either i16 310 /// or i32. 311 LBRX, 312 313 /// STFIWX - The STFIWX instruction. The first operand is an input token 314 /// chain, then an f64 value to store, then an address to store it to. 315 STFIWX, 316 317 /// GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point 318 /// load which sign-extends from a 32-bit integer value into the 319 /// destination 64-bit register. 320 LFIWAX, 321 322 /// GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point 323 /// load which zero-extends from a 32-bit integer value into the 324 /// destination 64-bit register. 325 LFIWZX, 326 327 /// VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian. 328 /// Maps directly to an lxvd2x instruction that will be followed by 329 /// an xxswapd. 330 LXVD2X, 331 332 /// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian. 333 /// Maps directly to an stxvd2x instruction that will be preceded by 334 /// an xxswapd. 335 STXVD2X, 336 337 /// QBRC, CHAIN = QVLFSb CHAIN, Ptr 338 /// The 4xf32 load used for v4i1 constants. 339 QVLFSb, 340 341 /// GPRC = TOC_ENTRY GA, TOC 342 /// Loads the entry for GA from the TOC, where the TOC base is given by 343 /// the last operand. 344 TOC_ENTRY 345 }; 346 } 347 348 /// Define some predicates that are used for node matching. 349 namespace PPC { 350 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a 351 /// VPKUHUM instruction. 352 bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, 353 SelectionDAG &DAG); 354 355 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a 356 /// VPKUWUM instruction. 357 bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, 358 SelectionDAG &DAG); 359 360 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for 361 /// a VRGL* instruction with the specified unit size (1,2 or 4 bytes). 362 bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, 363 unsigned ShuffleKind, SelectionDAG &DAG); 364 365 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for 366 /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes). 367 bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, 368 unsigned ShuffleKind, SelectionDAG &DAG); 369 370 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the 371 /// shift amount, otherwise return -1. 372 int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, 373 SelectionDAG &DAG); 374 375 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand 376 /// specifies a splat of a single element that is suitable for input to 377 /// VSPLTB/VSPLTH/VSPLTW. 378 bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize); 379 380 /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the 381 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. 382 unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize, SelectionDAG &DAG); 383 384 /// get_VSPLTI_elt - If this is a build_vector of constants which can be 385 /// formed by using a vspltis[bhw] instruction of the specified element 386 /// size, return the constant being splatted. The ByteSize field indicates 387 /// the number of bytes of each element [124] -> [bhw]. 388 SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG); 389 390 /// If this is a qvaligni shuffle mask, return the shift 391 /// amount, otherwise return -1. 392 int isQVALIGNIShuffleMask(SDNode *N); 393 } 394 395 class PPCTargetLowering : public TargetLowering { 396 const PPCSubtarget &Subtarget; 397 398 public: 399 explicit PPCTargetLowering(const PPCTargetMachine &TM, 400 const PPCSubtarget &STI); 401 402 /// getTargetNodeName() - This method returns the name of a target specific 403 /// DAG node. 404 const char *getTargetNodeName(unsigned Opcode) const override; 405 getScalarShiftAmountTy(EVT LHSTy)406 MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; } 407 isCheapToSpeculateCttz()408 bool isCheapToSpeculateCttz() const override { 409 return true; 410 } 411 isCheapToSpeculateCtlz()412 bool isCheapToSpeculateCtlz() const override { 413 return true; 414 } 415 416 /// getSetCCResultType - Return the ISD::SETCC ValueType 417 EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override; 418 419 /// Return true if target always beneficiates from combining into FMA for a 420 /// given value type. This must typically return false on targets where FMA 421 /// takes more cycles to execute than FADD. 422 bool enableAggressiveFMAFusion(EVT VT) const override; 423 424 /// getPreIndexedAddressParts - returns true by value, base pointer and 425 /// offset pointer and addressing mode by reference if the node's address 426 /// can be legally represented as pre-indexed load / store address. 427 bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, 428 SDValue &Offset, 429 ISD::MemIndexedMode &AM, 430 SelectionDAG &DAG) const override; 431 432 /// SelectAddressRegReg - Given the specified addressed, check to see if it 433 /// can be represented as an indexed [r+r] operation. Returns false if it 434 /// can be more efficiently represented with [r+imm]. 435 bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, 436 SelectionDAG &DAG) const; 437 438 /// SelectAddressRegImm - Returns true if the address N can be represented 439 /// by a base register plus a signed 16-bit displacement [r+imm], and if it 440 /// is not better represented as reg+reg. If Aligned is true, only accept 441 /// displacements suitable for STD and friends, i.e. multiples of 4. 442 bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, 443 SelectionDAG &DAG, bool Aligned) const; 444 445 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be 446 /// represented as an indexed [r+r] operation. 447 bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, 448 SelectionDAG &DAG) const; 449 450 Sched::Preference getSchedulingPreference(SDNode *N) const override; 451 452 /// LowerOperation - Provide custom lowering hooks for some operations. 453 /// 454 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; 455 456 /// ReplaceNodeResults - Replace the results of node with an illegal result 457 /// type with new values built out of custom code. 458 /// 459 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, 460 SelectionDAG &DAG) const override; 461 462 SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const; 463 SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const; 464 465 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; 466 467 SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, 468 std::vector<SDNode *> *Created) const override; 469 470 unsigned getRegisterByName(const char* RegName, EVT VT) const override; 471 472 void computeKnownBitsForTargetNode(const SDValue Op, 473 APInt &KnownZero, 474 APInt &KnownOne, 475 const SelectionDAG &DAG, 476 unsigned Depth = 0) const override; 477 478 unsigned getPrefLoopAlignment(MachineLoop *ML) const override; 479 480 Instruction* emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord, 481 bool IsStore, bool IsLoad) const override; 482 Instruction* emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord, 483 bool IsStore, bool IsLoad) const override; 484 485 MachineBasicBlock * 486 EmitInstrWithCustomInserter(MachineInstr *MI, 487 MachineBasicBlock *MBB) const override; 488 MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, 489 MachineBasicBlock *MBB, 490 unsigned AtomicSize, 491 unsigned BinOpcode) const; 492 MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr *MI, 493 MachineBasicBlock *MBB, 494 bool is8bit, unsigned Opcode) const; 495 496 MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr *MI, 497 MachineBasicBlock *MBB) const; 498 499 MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr *MI, 500 MachineBasicBlock *MBB) const; 501 502 ConstraintType 503 getConstraintType(const std::string &Constraint) const override; 504 505 /// Examine constraint string and operand type and determine a weight value. 506 /// The operand object must already have been set up with the operand type. 507 ConstraintWeight getSingleConstraintMatchWeight( 508 AsmOperandInfo &info, const char *constraint) const override; 509 510 std::pair<unsigned, const TargetRegisterClass *> 511 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 512 const std::string &Constraint, 513 MVT VT) const override; 514 515 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate 516 /// function arguments in the caller parameter area. This is the actual 517 /// alignment, not its logarithm. 518 unsigned getByValTypeAlignment(Type *Ty) const override; 519 520 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 521 /// vector. If it is invalid, don't add anything to Ops. 522 void LowerAsmOperandForConstraint(SDValue Op, 523 std::string &Constraint, 524 std::vector<SDValue> &Ops, 525 SelectionDAG &DAG) const override; 526 getInlineAsmMemConstraint(const std::string & ConstraintCode)527 unsigned getInlineAsmMemConstraint( 528 const std::string &ConstraintCode) const override { 529 if (ConstraintCode == "es") 530 return InlineAsm::Constraint_es; 531 else if (ConstraintCode == "o") 532 return InlineAsm::Constraint_o; 533 else if (ConstraintCode == "Q") 534 return InlineAsm::Constraint_Q; 535 else if (ConstraintCode == "Z") 536 return InlineAsm::Constraint_Z; 537 else if (ConstraintCode == "Zy") 538 return InlineAsm::Constraint_Zy; 539 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 540 } 541 542 /// isLegalAddressingMode - Return true if the addressing mode represented 543 /// by AM is legal for this target, for a load/store of the specified type. 544 bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override; 545 546 /// isLegalICmpImmediate - Return true if the specified immediate is legal 547 /// icmp immediate, that is the target has icmp instructions which can 548 /// compare a register against the immediate without having to materialize 549 /// the immediate into a register. 550 bool isLegalICmpImmediate(int64_t Imm) const override; 551 552 /// isLegalAddImmediate - Return true if the specified immediate is legal 553 /// add immediate, that is the target has add instructions which can 554 /// add a register and the immediate without having to materialize 555 /// the immediate into a register. 556 bool isLegalAddImmediate(int64_t Imm) const override; 557 558 /// isTruncateFree - Return true if it's free to truncate a value of 559 /// type Ty1 to type Ty2. e.g. On PPC it's free to truncate a i64 value in 560 /// register X1 to i32 by referencing its sub-register R1. 561 bool isTruncateFree(Type *Ty1, Type *Ty2) const override; 562 bool isTruncateFree(EVT VT1, EVT VT2) const override; 563 564 bool isZExtFree(SDValue Val, EVT VT2) const override; 565 566 bool isFPExtFree(EVT VT) const override; 567 568 /// \brief Returns true if it is beneficial to convert a load of a constant 569 /// to just the constant itself. 570 bool shouldConvertConstantLoadToIntImm(const APInt &Imm, 571 Type *Ty) const override; 572 573 bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; 574 575 bool getTgtMemIntrinsic(IntrinsicInfo &Info, 576 const CallInst &I, 577 unsigned Intrinsic) const override; 578 579 /// getOptimalMemOpType - Returns the target specific optimal type for load 580 /// and store operations as a result of memset, memcpy, and memmove 581 /// lowering. If DstAlign is zero that means it's safe to destination 582 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it 583 /// means there isn't a need to check it against alignment requirement, 584 /// probably because the source does not need to be loaded. If 'IsMemset' is 585 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that 586 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy 587 /// source is constant so it does not need to be loaded. 588 /// It returns EVT::Other if the type should be determined using generic 589 /// target-independent logic. 590 EVT 591 getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, 592 bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, 593 MachineFunction &MF) const override; 594 595 /// Is unaligned memory access allowed for the given type, and is it fast 596 /// relative to software emulation. 597 bool allowsMisalignedMemoryAccesses(EVT VT, 598 unsigned AddrSpace, 599 unsigned Align = 1, 600 bool *Fast = nullptr) const override; 601 602 /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster 603 /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be 604 /// expanded to FMAs when this method returns true, otherwise fmuladd is 605 /// expanded to fmul + fadd. 606 bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; 607 608 const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; 609 610 // Should we expand the build vector with shuffles? 611 bool 612 shouldExpandBuildVectorWithShuffles(EVT VT, 613 unsigned DefinedValues) const override; 614 615 /// createFastISel - This method returns a target-specific FastISel object, 616 /// or null if the target does not support "fast" instruction selection. 617 FastISel *createFastISel(FunctionLoweringInfo &FuncInfo, 618 const TargetLibraryInfo *LibInfo) const override; 619 620 /// \brief Returns true if an argument of type Ty needs to be passed in a 621 /// contiguous block of registers in calling convention CallConv. functionArgumentNeedsConsecutiveRegisters(Type * Ty,CallingConv::ID CallConv,bool isVarArg)622 bool functionArgumentNeedsConsecutiveRegisters( 623 Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override { 624 // We support any array type as "consecutive" block in the parameter 625 // save area. The element type defines the alignment requirement and 626 // whether the argument should go in GPRs, FPRs, or VRs if available. 627 // 628 // Note that clang uses this capability both to implement the ELFv2 629 // homogeneous float/vector aggregate ABI, and to avoid having to use 630 // "byval" when passing aggregates that might fully fit in registers. 631 return Ty->isArrayTy(); 632 } 633 634 private: 635 636 struct ReuseLoadInfo { 637 SDValue Ptr; 638 SDValue Chain; 639 SDValue ResChain; 640 MachinePointerInfo MPI; 641 bool IsInvariant; 642 unsigned Alignment; 643 AAMDNodes AAInfo; 644 const MDNode *Ranges; 645 ReuseLoadInfoReuseLoadInfo646 ReuseLoadInfo() : IsInvariant(false), Alignment(0), Ranges(nullptr) {} 647 }; 648 649 bool canReuseLoadAddress(SDValue Op, EVT MemVT, ReuseLoadInfo &RLI, 650 SelectionDAG &DAG, 651 ISD::LoadExtType ET = ISD::NON_EXTLOAD) const; 652 void spliceIntoChain(SDValue ResChain, SDValue NewResChain, 653 SelectionDAG &DAG) const; 654 655 void LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, 656 SelectionDAG &DAG, SDLoc dl) const; 657 SDValue LowerFP_TO_INTDirectMove(SDValue Op, SelectionDAG &DAG, 658 SDLoc dl) const; 659 SDValue LowerINT_TO_FPDirectMove(SDValue Op, SelectionDAG &DAG, 660 SDLoc dl) const; 661 662 SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const; 663 SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const; 664 665 bool 666 IsEligibleForTailCallOptimization(SDValue Callee, 667 CallingConv::ID CalleeCC, 668 bool isVarArg, 669 const SmallVectorImpl<ISD::InputArg> &Ins, 670 SelectionDAG& DAG) const; 671 672 SDValue EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, 673 int SPDiff, 674 SDValue Chain, 675 SDValue &LROpOut, 676 SDValue &FPOpOut, 677 bool isDarwinABI, 678 SDLoc dl) const; 679 680 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 681 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; 682 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; 683 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; 684 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 685 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; 686 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; 687 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; 688 SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; 689 SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; 690 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG, 691 const PPCSubtarget &Subtarget) const; 692 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG, 693 const PPCSubtarget &Subtarget) const; 694 SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG, 695 const PPCSubtarget &Subtarget) const; 696 SDValue LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, 697 const PPCSubtarget &Subtarget) const; 698 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG, 699 const PPCSubtarget &Subtarget) const; 700 SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; 701 SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; 702 SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; 703 SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; 704 SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, SDLoc dl) const; 705 SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 706 SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; 707 SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const; 708 SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const; 709 SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const; 710 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; 711 SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; 712 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 713 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; 714 SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; 715 SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; 716 SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; 717 718 SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const; 719 SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const; 720 721 SDValue LowerCallResult(SDValue Chain, SDValue InFlag, 722 CallingConv::ID CallConv, bool isVarArg, 723 const SmallVectorImpl<ISD::InputArg> &Ins, 724 SDLoc dl, SelectionDAG &DAG, 725 SmallVectorImpl<SDValue> &InVals) const; 726 SDValue FinishCall(CallingConv::ID CallConv, SDLoc dl, bool isTailCall, 727 bool isVarArg, bool IsPatchPoint, 728 SelectionDAG &DAG, 729 SmallVector<std::pair<unsigned, SDValue>, 8> 730 &RegsToPass, 731 SDValue InFlag, SDValue Chain, SDValue CallSeqStart, 732 SDValue &Callee, 733 int SPDiff, unsigned NumBytes, 734 const SmallVectorImpl<ISD::InputArg> &Ins, 735 SmallVectorImpl<SDValue> &InVals, 736 ImmutableCallSite *CS) const; 737 738 SDValue 739 LowerFormalArguments(SDValue Chain, 740 CallingConv::ID CallConv, bool isVarArg, 741 const SmallVectorImpl<ISD::InputArg> &Ins, 742 SDLoc dl, SelectionDAG &DAG, 743 SmallVectorImpl<SDValue> &InVals) const override; 744 745 SDValue 746 LowerCall(TargetLowering::CallLoweringInfo &CLI, 747 SmallVectorImpl<SDValue> &InVals) const override; 748 749 bool 750 CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, 751 bool isVarArg, 752 const SmallVectorImpl<ISD::OutputArg> &Outs, 753 LLVMContext &Context) const override; 754 755 SDValue 756 LowerReturn(SDValue Chain, 757 CallingConv::ID CallConv, bool isVarArg, 758 const SmallVectorImpl<ISD::OutputArg> &Outs, 759 const SmallVectorImpl<SDValue> &OutVals, 760 SDLoc dl, SelectionDAG &DAG) const override; 761 762 SDValue 763 extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, SelectionDAG &DAG, 764 SDValue ArgVal, SDLoc dl) const; 765 766 SDValue 767 LowerFormalArguments_Darwin(SDValue Chain, 768 CallingConv::ID CallConv, bool isVarArg, 769 const SmallVectorImpl<ISD::InputArg> &Ins, 770 SDLoc dl, SelectionDAG &DAG, 771 SmallVectorImpl<SDValue> &InVals) const; 772 SDValue 773 LowerFormalArguments_64SVR4(SDValue Chain, 774 CallingConv::ID CallConv, bool isVarArg, 775 const SmallVectorImpl<ISD::InputArg> &Ins, 776 SDLoc dl, SelectionDAG &DAG, 777 SmallVectorImpl<SDValue> &InVals) const; 778 SDValue 779 LowerFormalArguments_32SVR4(SDValue Chain, 780 CallingConv::ID CallConv, bool isVarArg, 781 const SmallVectorImpl<ISD::InputArg> &Ins, 782 SDLoc dl, SelectionDAG &DAG, 783 SmallVectorImpl<SDValue> &InVals) const; 784 785 SDValue 786 createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff, 787 SDValue CallSeqStart, ISD::ArgFlagsTy Flags, 788 SelectionDAG &DAG, SDLoc dl) const; 789 790 SDValue 791 LowerCall_Darwin(SDValue Chain, SDValue Callee, 792 CallingConv::ID CallConv, 793 bool isVarArg, bool isTailCall, bool IsPatchPoint, 794 const SmallVectorImpl<ISD::OutputArg> &Outs, 795 const SmallVectorImpl<SDValue> &OutVals, 796 const SmallVectorImpl<ISD::InputArg> &Ins, 797 SDLoc dl, SelectionDAG &DAG, 798 SmallVectorImpl<SDValue> &InVals, 799 ImmutableCallSite *CS) const; 800 SDValue 801 LowerCall_64SVR4(SDValue Chain, SDValue Callee, 802 CallingConv::ID CallConv, 803 bool isVarArg, bool isTailCall, bool IsPatchPoint, 804 const SmallVectorImpl<ISD::OutputArg> &Outs, 805 const SmallVectorImpl<SDValue> &OutVals, 806 const SmallVectorImpl<ISD::InputArg> &Ins, 807 SDLoc dl, SelectionDAG &DAG, 808 SmallVectorImpl<SDValue> &InVals, 809 ImmutableCallSite *CS) const; 810 SDValue 811 LowerCall_32SVR4(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, 812 bool isVarArg, bool isTailCall, bool IsPatchPoint, 813 const SmallVectorImpl<ISD::OutputArg> &Outs, 814 const SmallVectorImpl<SDValue> &OutVals, 815 const SmallVectorImpl<ISD::InputArg> &Ins, 816 SDLoc dl, SelectionDAG &DAG, 817 SmallVectorImpl<SDValue> &InVals, 818 ImmutableCallSite *CS) const; 819 820 SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; 821 SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; 822 823 SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const; 824 SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const; 825 SDValue combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const; 826 827 SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI, 828 unsigned &RefinementSteps, 829 bool &UseOneConstNR) const override; 830 SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI, 831 unsigned &RefinementSteps) const override; 832 bool combineRepeatedFPDivisors(unsigned NumUsers) const override; 833 834 CCAssignFn *useFastISelCCs(unsigned Flag) const; 835 }; 836 837 namespace PPC { 838 FastISel *createFastISel(FunctionLoweringInfo &FuncInfo, 839 const TargetLibraryInfo *LibInfo); 840 } 841 842 bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, 843 CCValAssign::LocInfo &LocInfo, 844 ISD::ArgFlagsTy &ArgFlags, 845 CCState &State); 846 847 bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, 848 MVT &LocVT, 849 CCValAssign::LocInfo &LocInfo, 850 ISD::ArgFlagsTy &ArgFlags, 851 CCState &State); 852 853 bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, 854 MVT &LocVT, 855 CCValAssign::LocInfo &LocInfo, 856 ISD::ArgFlagsTy &ArgFlags, 857 CCState &State); 858 } 859 860 #endif // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H 861