1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the interfaces that X86 uses to lower LLVM code into a 11 // selection DAG. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H 16 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H 17 18 #include "llvm/CodeGen/CallingConvLower.h" 19 #include "llvm/CodeGen/SelectionDAG.h" 20 #include "llvm/Target/TargetLowering.h" 21 #include "llvm/Target/TargetOptions.h" 22 23 namespace llvm { 24 class X86Subtarget; 25 class X86TargetMachine; 26 27 namespace X86ISD { 28 // X86 Specific DAG Nodes 29 enum NodeType { 30 // Start the numbering where the builtin ops leave off. 31 FIRST_NUMBER = ISD::BUILTIN_OP_END, 32 33 /// Bit scan forward. 34 BSF, 35 /// Bit scan reverse. 36 BSR, 37 38 /// Double shift instructions. These correspond to 39 /// X86::SHLDxx and X86::SHRDxx instructions. 40 SHLD, 41 SHRD, 42 43 /// Bitwise logical AND of floating point values. This corresponds 44 /// to X86::ANDPS or X86::ANDPD. 45 FAND, 46 47 /// Bitwise logical OR of floating point values. This corresponds 48 /// to X86::ORPS or X86::ORPD. 49 FOR, 50 51 /// Bitwise logical XOR of floating point values. This corresponds 52 /// to X86::XORPS or X86::XORPD. 53 FXOR, 54 55 /// Bitwise logical ANDNOT of floating point values. This 56 /// corresponds to X86::ANDNPS or X86::ANDNPD. 57 FANDN, 58 59 /// Bitwise logical right shift of floating point values. This 60 /// corresponds to X86::PSRLDQ. 61 FSRL, 62 63 /// These operations represent an abstract X86 call 64 /// instruction, which includes a bunch of information. In particular the 65 /// operands of these node are: 66 /// 67 /// #0 - The incoming token chain 68 /// #1 - The callee 69 /// #2 - The number of arg bytes the caller pushes on the stack. 70 /// #3 - The number of arg bytes the callee pops off the stack. 71 /// #4 - The value to pass in AL/AX/EAX (optional) 72 /// #5 - The value to pass in DL/DX/EDX (optional) 73 /// 74 /// The result values of these nodes are: 75 /// 76 /// #0 - The outgoing token chain 77 /// #1 - The first register result value (optional) 78 /// #2 - The second register result value (optional) 79 /// 80 CALL, 81 82 /// This operation implements the lowering for readcyclecounter 83 RDTSC_DAG, 84 85 /// X86 Read Time-Stamp Counter and Processor ID. 86 RDTSCP_DAG, 87 88 /// X86 Read Performance Monitoring Counters. 89 RDPMC_DAG, 90 91 /// X86 compare and logical compare instructions. 92 CMP, COMI, UCOMI, 93 94 /// X86 bit-test instructions. 95 BT, 96 97 /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS 98 /// operand, usually produced by a CMP instruction. 99 SETCC, 100 101 /// X86 Select 102 SELECT, 103 104 // Same as SETCC except it's materialized with a sbb and the value is all 105 // one's or all zero's. 106 SETCC_CARRY, // R = carry_bit ? ~0 : 0 107 108 /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD. 109 /// Operands are two FP values to compare; result is a mask of 110 /// 0s or 1s. Generally DTRT for C/C++ with NaNs. 111 FSETCC, 112 113 /// X86 MOVMSK{pd|ps}, extracts sign bits of two or four FP values, 114 /// result in an integer GPR. Needs masking for scalar result. 115 FGETSIGNx86, 116 117 /// X86 conditional moves. Operand 0 and operand 1 are the two values 118 /// to select from. Operand 2 is the condition code, and operand 3 is the 119 /// flag operand produced by a CMP or TEST instruction. It also writes a 120 /// flag result. 121 CMOV, 122 123 /// X86 conditional branches. Operand 0 is the chain operand, operand 1 124 /// is the block to branch if condition is true, operand 2 is the 125 /// condition code, and operand 3 is the flag operand produced by a CMP 126 /// or TEST instruction. 127 BRCOND, 128 129 /// Return with a flag operand. Operand 0 is the chain operand, operand 130 /// 1 is the number of bytes of stack to pop. 131 RET_FLAG, 132 133 /// Repeat fill, corresponds to X86::REP_STOSx. 134 REP_STOS, 135 136 /// Repeat move, corresponds to X86::REP_MOVSx. 137 REP_MOVS, 138 139 /// On Darwin, this node represents the result of the popl 140 /// at function entry, used for PIC code. 141 GlobalBaseReg, 142 143 /// A wrapper node for TargetConstantPool, 144 /// TargetExternalSymbol, and TargetGlobalAddress. 145 Wrapper, 146 147 /// Special wrapper used under X86-64 PIC mode for RIP 148 /// relative displacements. 149 WrapperRIP, 150 151 /// Copies a 64-bit value from the low word of an XMM vector 152 /// to an MMX vector. If you think this is too close to the previous 153 /// mnemonic, so do I; blame Intel. 154 MOVDQ2Q, 155 156 /// Copies a 32-bit value from the low word of a MMX 157 /// vector to a GPR. 158 MMX_MOVD2W, 159 160 /// Copies a GPR into the low 32-bit word of a MMX vector 161 /// and zero out the high word. 162 MMX_MOVW2D, 163 164 /// Extract an 8-bit value from a vector and zero extend it to 165 /// i32, corresponds to X86::PEXTRB. 166 PEXTRB, 167 168 /// Extract a 16-bit value from a vector and zero extend it to 169 /// i32, corresponds to X86::PEXTRW. 170 PEXTRW, 171 172 /// Insert any element of a 4 x float vector into any element 173 /// of a destination 4 x floatvector. 174 INSERTPS, 175 176 /// Insert the lower 8-bits of a 32-bit value to a vector, 177 /// corresponds to X86::PINSRB. 178 PINSRB, 179 180 /// Insert the lower 16-bits of a 32-bit value to a vector, 181 /// corresponds to X86::PINSRW. 182 PINSRW, MMX_PINSRW, 183 184 /// Shuffle 16 8-bit values within a vector. 185 PSHUFB, 186 187 /// Bitwise Logical AND NOT of Packed FP values. 188 ANDNP, 189 190 /// Copy integer sign. 191 PSIGN, 192 193 /// Blend where the selector is an immediate. 194 BLENDI, 195 196 /// Blend where the condition has been shrunk. 197 /// This is used to emphasize that the condition mask is 198 /// no more valid for generic VSELECT optimizations. 199 SHRUNKBLEND, 200 201 /// Combined add and sub on an FP vector. 202 ADDSUB, 203 // FP vector ops with rounding mode. 204 FADD_RND, 205 FSUB_RND, 206 FMUL_RND, 207 FDIV_RND, 208 209 // Integer sub with unsigned saturation. 210 SUBUS, 211 212 /// Integer horizontal add. 213 HADD, 214 215 /// Integer horizontal sub. 216 HSUB, 217 218 /// Floating point horizontal add. 219 FHADD, 220 221 /// Floating point horizontal sub. 222 FHSUB, 223 224 /// Unsigned integer max and min. 225 UMAX, UMIN, 226 227 /// Signed integer max and min. 228 SMAX, SMIN, 229 230 /// Floating point max and min. 231 FMAX, FMIN, 232 233 /// Commutative FMIN and FMAX. 234 FMAXC, FMINC, 235 236 /// Floating point reciprocal-sqrt and reciprocal approximation. 237 /// Note that these typically require refinement 238 /// in order to obtain suitable precision. 239 FRSQRT, FRCP, 240 241 // Thread Local Storage. 242 TLSADDR, 243 244 // Thread Local Storage. A call to get the start address 245 // of the TLS block for the current module. 246 TLSBASEADDR, 247 248 // Thread Local Storage. When calling to an OS provided 249 // thunk at the address from an earlier relocation. 250 TLSCALL, 251 252 // Exception Handling helpers. 253 EH_RETURN, 254 255 // SjLj exception handling setjmp. 256 EH_SJLJ_SETJMP, 257 258 // SjLj exception handling longjmp. 259 EH_SJLJ_LONGJMP, 260 261 /// Tail call return. See X86TargetLowering::LowerCall for 262 /// the list of operands. 263 TC_RETURN, 264 265 // Vector move to low scalar and zero higher vector elements. 266 VZEXT_MOVL, 267 268 // Vector integer zero-extend. 269 VZEXT, 270 271 // Vector integer signed-extend. 272 VSEXT, 273 274 // Vector integer truncate. 275 VTRUNC, 276 277 // Vector integer truncate with mask. 278 VTRUNCM, 279 280 // Vector FP extend. 281 VFPEXT, 282 283 // Vector FP round. 284 VFPROUND, 285 286 // 128-bit vector logical left / right shift 287 VSHLDQ, VSRLDQ, 288 289 // Vector shift elements 290 VSHL, VSRL, VSRA, 291 292 // Vector shift elements by immediate 293 VSHLI, VSRLI, VSRAI, 294 295 // Vector packed double/float comparison. 296 CMPP, 297 298 // Vector integer comparisons. 299 PCMPEQ, PCMPGT, 300 // Vector integer comparisons, the result is in a mask vector. 301 PCMPEQM, PCMPGTM, 302 303 /// Vector comparison generating mask bits for fp and 304 /// integer signed and unsigned data types. 305 CMPM, 306 CMPMU, 307 308 // Arithmetic operations with FLAGS results. 309 ADD, SUB, ADC, SBB, SMUL, 310 INC, DEC, OR, XOR, AND, 311 312 BEXTR, // Bit field extract 313 314 UMUL, // LOW, HI, FLAGS = umul LHS, RHS 315 316 // 8-bit SMUL/UMUL - AX, FLAGS = smul8/umul8 AL, RHS 317 SMUL8, UMUL8, 318 319 // 8-bit divrem that zero-extend the high result (AH). 320 UDIVREM8_ZEXT_HREG, 321 SDIVREM8_SEXT_HREG, 322 323 // X86-specific multiply by immediate. 324 MUL_IMM, 325 326 // Vector bitwise comparisons. 327 PTEST, 328 329 // Vector packed fp sign bitwise comparisons. 330 TESTP, 331 332 // Vector "test" in AVX-512, the result is in a mask vector. 333 TESTM, 334 TESTNM, 335 336 // OR/AND test for masks 337 KORTEST, 338 339 // Several flavors of instructions with vector shuffle behaviors. 340 PACKSS, 341 PACKUS, 342 // Intra-lane alignr 343 PALIGNR, 344 // AVX512 inter-lane alignr 345 VALIGN, 346 PSHUFD, 347 PSHUFHW, 348 PSHUFLW, 349 SHUFP, 350 MOVDDUP, 351 MOVSHDUP, 352 MOVSLDUP, 353 MOVLHPS, 354 MOVLHPD, 355 MOVHLPS, 356 MOVLPS, 357 MOVLPD, 358 MOVSD, 359 MOVSS, 360 UNPCKL, 361 UNPCKH, 362 VPERMILPV, 363 VPERMILPI, 364 VPERMV, 365 VPERMV3, 366 VPERMIV3, 367 VPERMI, 368 VPERM2X128, 369 VBROADCAST, 370 // masked broadcast 371 VBROADCASTM, 372 // Insert/Extract vector element 373 VINSERT, 374 VEXTRACT, 375 376 // Vector multiply packed unsigned doubleword integers 377 PMULUDQ, 378 // Vector multiply packed signed doubleword integers 379 PMULDQ, 380 381 // FMA nodes 382 FMADD, 383 FNMADD, 384 FMSUB, 385 FNMSUB, 386 FMADDSUB, 387 FMSUBADD, 388 // FMA with rounding mode 389 FMADD_RND, 390 FNMADD_RND, 391 FMSUB_RND, 392 FNMSUB_RND, 393 FMADDSUB_RND, 394 FMSUBADD_RND, 395 RNDSCALE, 396 397 // Compress and expand 398 COMPRESS, 399 EXPAND, 400 401 // Save xmm argument registers to the stack, according to %al. An operator 402 // is needed so that this can be expanded with control flow. 403 VASTART_SAVE_XMM_REGS, 404 405 // Windows's _chkstk call to do stack probing. 406 WIN_ALLOCA, 407 408 // For allocating variable amounts of stack space when using 409 // segmented stacks. Check if the current stacklet has enough space, and 410 // falls back to heap allocation if not. 411 SEG_ALLOCA, 412 413 // Windows's _ftol2 runtime routine to do fptoui. 414 WIN_FTOL, 415 416 // Memory barrier 417 MEMBARRIER, 418 MFENCE, 419 SFENCE, 420 LFENCE, 421 422 // Store FP status word into i16 register. 423 FNSTSW16r, 424 425 // Store contents of %ah into %eflags. 426 SAHF, 427 428 // Get a random integer and indicate whether it is valid in CF. 429 RDRAND, 430 431 // Get a NIST SP800-90B & C compliant random integer and 432 // indicate whether it is valid in CF. 433 RDSEED, 434 435 PCMPISTRI, 436 PCMPESTRI, 437 438 // Test if in transactional execution. 439 XTEST, 440 441 // ERI instructions 442 RSQRT28, RCP28, EXP2, 443 444 // Compare and swap. 445 LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE, 446 LCMPXCHG8_DAG, 447 LCMPXCHG16_DAG, 448 449 // Load, scalar_to_vector, and zero extend. 450 VZEXT_LOAD, 451 452 // Store FP control world into i16 memory. 453 FNSTCW16m, 454 455 /// This instruction implements FP_TO_SINT with the 456 /// integer destination in memory and a FP reg source. This corresponds 457 /// to the X86::FIST*m instructions and the rounding mode change stuff. It 458 /// has two inputs (token chain and address) and two outputs (int value 459 /// and token chain). 460 FP_TO_INT16_IN_MEM, 461 FP_TO_INT32_IN_MEM, 462 FP_TO_INT64_IN_MEM, 463 464 /// This instruction implements SINT_TO_FP with the 465 /// integer source in memory and FP reg result. This corresponds to the 466 /// X86::FILD*m instructions. It has three inputs (token chain, address, 467 /// and source type) and two outputs (FP value and token chain). FILD_FLAG 468 /// also produces a flag). 469 FILD, 470 FILD_FLAG, 471 472 /// This instruction implements an extending load to FP stack slots. 473 /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain 474 /// operand, ptr to load from, and a ValueType node indicating the type 475 /// to load to. 476 FLD, 477 478 /// This instruction implements a truncating store to FP stack 479 /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a 480 /// chain operand, value to store, address, and a ValueType to store it 481 /// as. 482 FST, 483 484 /// This instruction grabs the address of the next argument 485 /// from a va_list. (reads and modifies the va_list in memory) 486 VAARG_64 487 488 // WARNING: Do not add anything in the end unless you want the node to 489 // have memop! In fact, starting from ATOMADD64_DAG all opcodes will be 490 // thought as target memory ops! 491 }; 492 } 493 494 /// Define some predicates that are used for node matching. 495 namespace X86 { 496 /// Return true if the specified 497 /// EXTRACT_SUBVECTOR operand specifies a vector extract that is 498 /// suitable for input to VEXTRACTF128, VEXTRACTI128 instructions. 499 bool isVEXTRACT128Index(SDNode *N); 500 501 /// Return true if the specified 502 /// INSERT_SUBVECTOR operand specifies a subvector insert that is 503 /// suitable for input to VINSERTF128, VINSERTI128 instructions. 504 bool isVINSERT128Index(SDNode *N); 505 506 /// Return true if the specified 507 /// EXTRACT_SUBVECTOR operand specifies a vector extract that is 508 /// suitable for input to VEXTRACTF64X4, VEXTRACTI64X4 instructions. 509 bool isVEXTRACT256Index(SDNode *N); 510 511 /// Return true if the specified 512 /// INSERT_SUBVECTOR operand specifies a subvector insert that is 513 /// suitable for input to VINSERTF64X4, VINSERTI64X4 instructions. 514 bool isVINSERT256Index(SDNode *N); 515 516 /// Return the appropriate 517 /// immediate to extract the specified EXTRACT_SUBVECTOR index 518 /// with VEXTRACTF128, VEXTRACTI128 instructions. 519 unsigned getExtractVEXTRACT128Immediate(SDNode *N); 520 521 /// Return the appropriate 522 /// immediate to insert at the specified INSERT_SUBVECTOR index 523 /// with VINSERTF128, VINSERT128 instructions. 524 unsigned getInsertVINSERT128Immediate(SDNode *N); 525 526 /// Return the appropriate 527 /// immediate to extract the specified EXTRACT_SUBVECTOR index 528 /// with VEXTRACTF64X4, VEXTRACTI64x4 instructions. 529 unsigned getExtractVEXTRACT256Immediate(SDNode *N); 530 531 /// Return the appropriate 532 /// immediate to insert at the specified INSERT_SUBVECTOR index 533 /// with VINSERTF64x4, VINSERTI64x4 instructions. 534 unsigned getInsertVINSERT256Immediate(SDNode *N); 535 536 /// Returns true if Elt is a constant zero or floating point constant +0.0. 537 bool isZeroNode(SDValue Elt); 538 539 /// Returns true of the given offset can be 540 /// fit into displacement field of the instruction. 541 bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, 542 bool hasSymbolicDisplacement = true); 543 544 545 /// Determines whether the callee is required to pop its 546 /// own arguments. Callee pop is necessary to support tail calls. 547 bool isCalleePop(CallingConv::ID CallingConv, 548 bool is64Bit, bool IsVarArg, bool TailCallOpt); 549 550 /// AVX512 static rounding constants. These need to match the values in 551 /// avx512fintrin.h. 552 enum STATIC_ROUNDING { 553 TO_NEAREST_INT = 0, 554 TO_NEG_INF = 1, 555 TO_POS_INF = 2, 556 TO_ZERO = 3, 557 CUR_DIRECTION = 4 558 }; 559 } 560 561 //===--------------------------------------------------------------------===// 562 // X86 Implementation of the TargetLowering interface 563 class X86TargetLowering final : public TargetLowering { 564 public: 565 explicit X86TargetLowering(const X86TargetMachine &TM, 566 const X86Subtarget &STI); 567 568 unsigned getJumpTableEncoding() const override; 569 getScalarShiftAmountTy(EVT LHSTy)570 MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i8; } 571 572 const MCExpr * 573 LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, 574 const MachineBasicBlock *MBB, unsigned uid, 575 MCContext &Ctx) const override; 576 577 /// Returns relocation base for the given PIC jumptable. 578 SDValue getPICJumpTableRelocBase(SDValue Table, 579 SelectionDAG &DAG) const override; 580 const MCExpr * 581 getPICJumpTableRelocBaseExpr(const MachineFunction *MF, 582 unsigned JTI, MCContext &Ctx) const override; 583 584 /// Return the desired alignment for ByVal aggregate 585 /// function arguments in the caller parameter area. For X86, aggregates 586 /// that contains are placed at 16-byte boundaries while the rest are at 587 /// 4-byte boundaries. 588 unsigned getByValTypeAlignment(Type *Ty) const override; 589 590 /// Returns the target specific optimal type for load 591 /// and store operations as a result of memset, memcpy, and memmove 592 /// lowering. If DstAlign is zero that means it's safe to destination 593 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it 594 /// means there isn't a need to check it against alignment requirement, 595 /// probably because the source does not need to be loaded. If 'IsMemset' is 596 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that 597 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy 598 /// source is constant so it does not need to be loaded. 599 /// It returns EVT::Other if the type should be determined using generic 600 /// target-independent logic. 601 EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, 602 bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, 603 MachineFunction &MF) const override; 604 605 /// Returns true if it's safe to use load / store of the 606 /// specified type to expand memcpy / memset inline. This is mostly true 607 /// for all types except for some special cases. For example, on X86 608 /// targets without SSE2 f64 load / store are done with fldl / fstpl which 609 /// also does type conversion. Note the specified type doesn't have to be 610 /// legal as the hook is used before type legalization. 611 bool isSafeMemOpType(MVT VT) const override; 612 613 /// Returns true if the target allows 614 /// unaligned memory accesses. of the specified type. Returns whether it 615 /// is "fast" by reference in the second argument. 616 bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, 617 bool *Fast) const override; 618 619 /// Provide custom lowering hooks for some operations. 620 /// 621 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; 622 623 /// Replace the results of node with an illegal result 624 /// type with new values built out of custom code. 625 /// 626 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, 627 SelectionDAG &DAG) const override; 628 629 630 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; 631 632 /// Return true if the target has native support for 633 /// the specified value type and it is 'desirable' to use the type for the 634 /// given node type. e.g. On x86 i16 is legal, but undesirable since i16 635 /// instruction encodings are longer and some i16 instructions are slow. 636 bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override; 637 638 /// Return true if the target has native support for the 639 /// specified value type and it is 'desirable' to use the type. e.g. On x86 640 /// i16 is legal, but undesirable since i16 instruction encodings are longer 641 /// and some i16 instructions are slow. 642 bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override; 643 644 MachineBasicBlock * 645 EmitInstrWithCustomInserter(MachineInstr *MI, 646 MachineBasicBlock *MBB) const override; 647 648 649 /// This method returns the name of a target specific DAG node. 650 const char *getTargetNodeName(unsigned Opcode) const override; 651 652 bool isCheapToSpeculateCttz() const override; 653 654 bool isCheapToSpeculateCtlz() const override; 655 656 /// Return the value type to use for ISD::SETCC. 657 EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override; 658 659 /// Determine which of the bits specified in Mask are known to be either 660 /// zero or one and return them in the KnownZero/KnownOne bitsets. 661 void computeKnownBitsForTargetNode(const SDValue Op, 662 APInt &KnownZero, 663 APInt &KnownOne, 664 const SelectionDAG &DAG, 665 unsigned Depth = 0) const override; 666 667 /// Determine the number of bits in the operation that are sign bits. 668 unsigned ComputeNumSignBitsForTargetNode(SDValue Op, 669 const SelectionDAG &DAG, 670 unsigned Depth) const override; 671 672 bool isGAPlusOffset(SDNode *N, const GlobalValue* &GA, 673 int64_t &Offset) const override; 674 675 SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const; 676 677 bool ExpandInlineAsm(CallInst *CI) const override; 678 679 ConstraintType 680 getConstraintType(const std::string &Constraint) const override; 681 682 /// Examine constraint string and operand type and determine a weight value. 683 /// The operand object must already have been set up with the operand type. 684 ConstraintWeight 685 getSingleConstraintMatchWeight(AsmOperandInfo &info, 686 const char *constraint) const override; 687 688 const char *LowerXConstraint(EVT ConstraintVT) const override; 689 690 /// Lower the specified operand into the Ops vector. If it is invalid, don't 691 /// add anything to Ops. If hasMemory is true it means one of the asm 692 /// constraint of the inline asm instruction being processed is 'm'. 693 void LowerAsmOperandForConstraint(SDValue Op, 694 std::string &Constraint, 695 std::vector<SDValue> &Ops, 696 SelectionDAG &DAG) const override; 697 getInlineAsmMemConstraint(const std::string & ConstraintCode)698 unsigned getInlineAsmMemConstraint( 699 const std::string &ConstraintCode) const override { 700 // FIXME: Map different constraints differently. 701 return InlineAsm::Constraint_m; 702 } 703 704 /// Given a physical register constraint 705 /// (e.g. {edx}), return the register number and the register class for the 706 /// register. This should only be used for C_Register constraints. On 707 /// error, this returns a register number of 0. 708 std::pair<unsigned, const TargetRegisterClass *> 709 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 710 const std::string &Constraint, 711 MVT VT) const override; 712 713 /// Return true if the addressing mode represented 714 /// by AM is legal for this target, for a load/store of the specified type. 715 bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override; 716 717 /// Return true if the specified immediate is legal 718 /// icmp immediate, that is the target has icmp instructions which can 719 /// compare a register against the immediate without having to materialize 720 /// the immediate into a register. 721 bool isLegalICmpImmediate(int64_t Imm) const override; 722 723 /// Return true if the specified immediate is legal 724 /// add immediate, that is the target has add instructions which can 725 /// add a register and the immediate without having to materialize 726 /// the immediate into a register. 727 bool isLegalAddImmediate(int64_t Imm) const override; 728 729 /// \brief Return the cost of the scaling factor used in the addressing 730 /// mode represented by AM for this target, for a load/store 731 /// of the specified type. 732 /// If the AM is supported, the return value must be >= 0. 733 /// If the AM is not supported, it returns a negative value. 734 int getScalingFactorCost(const AddrMode &AM, Type *Ty) const override; 735 736 bool isVectorShiftByScalarCheap(Type *Ty) const override; 737 738 /// Return true if it's free to truncate a value of 739 /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in 740 /// register EAX to i16 by referencing its sub-register AX. 741 bool isTruncateFree(Type *Ty1, Type *Ty2) const override; 742 bool isTruncateFree(EVT VT1, EVT VT2) const override; 743 744 bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override; 745 746 /// Return true if any actual instruction that defines a 747 /// value of type Ty1 implicit zero-extends the value to Ty2 in the result 748 /// register. This does not necessarily include registers defined in 749 /// unknown ways, such as incoming arguments, or copies from unknown 750 /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this 751 /// does not necessarily apply to truncate instructions. e.g. on x86-64, 752 /// all instructions that define 32-bit values implicit zero-extend the 753 /// result out to 64 bits. 754 bool isZExtFree(Type *Ty1, Type *Ty2) const override; 755 bool isZExtFree(EVT VT1, EVT VT2) const override; 756 bool isZExtFree(SDValue Val, EVT VT2) const override; 757 758 /// Return true if folding a vector load into ExtVal (a sign, zero, or any 759 /// extend node) is profitable. 760 bool isVectorLoadExtDesirable(SDValue) const override; 761 762 /// Return true if an FMA operation is faster than a pair of fmul and fadd 763 /// instructions. fmuladd intrinsics will be expanded to FMAs when this 764 /// method returns true, otherwise fmuladd is expanded to fmul + fadd. 765 bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; 766 767 /// Return true if it's profitable to narrow 768 /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow 769 /// from i32 to i8 but not from i32 to i16. 770 bool isNarrowingProfitable(EVT VT1, EVT VT2) const override; 771 772 /// Returns true if the target can instruction select the 773 /// specified FP immediate natively. If false, the legalizer will 774 /// materialize the FP immediate as a load from a constant pool. 775 bool isFPImmLegal(const APFloat &Imm, EVT VT) const override; 776 777 /// Targets can use this to indicate that they only support *some* 778 /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a 779 /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to 780 /// be legal. 781 bool isShuffleMaskLegal(const SmallVectorImpl<int> &Mask, 782 EVT VT) const override; 783 784 /// Similar to isShuffleMaskLegal. This is used by Targets can use this to 785 /// indicate if there is a suitable VECTOR_SHUFFLE that can be used to 786 /// replace a VAND with a constant pool entry. 787 bool isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask, 788 EVT VT) const override; 789 790 /// If true, then instruction selection should 791 /// seek to shrink the FP constant of the specified type to a smaller type 792 /// in order to save space and / or reduce runtime. ShouldShrinkFPConstant(EVT VT)793 bool ShouldShrinkFPConstant(EVT VT) const override { 794 // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more 795 // expensive than a straight movsd. On the other hand, it's important to 796 // shrink long double fp constant since fldt is very slow. 797 return !X86ScalarSSEf64 || VT == MVT::f80; 798 } 799 800 /// Return true if we believe it is correct and profitable to reduce the 801 /// load node to a smaller type. 802 bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, 803 EVT NewVT) const override; 804 805 /// Return true if the specified scalar FP type is computed in an SSE 806 /// register, not on the X87 floating point stack. isScalarFPTypeInSSEReg(EVT VT)807 bool isScalarFPTypeInSSEReg(EVT VT) const { 808 return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2 809 (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1 810 } 811 812 /// Return true if the target uses the MSVC _ftol2 routine for fptoui. 813 bool isTargetFTOL() const; 814 815 /// Return true if the MSVC _ftol2 routine should be used for fptoui to the 816 /// given type. isIntegerTypeFTOL(EVT VT)817 bool isIntegerTypeFTOL(EVT VT) const { 818 return isTargetFTOL() && VT == MVT::i64; 819 } 820 821 /// \brief Returns true if it is beneficial to convert a load of a constant 822 /// to just the constant itself. 823 bool shouldConvertConstantLoadToIntImm(const APInt &Imm, 824 Type *Ty) const override; 825 826 /// Return true if EXTRACT_SUBVECTOR is cheap for this result type 827 /// with this index. 828 bool isExtractSubvectorCheap(EVT ResVT, unsigned Index) const override; 829 830 /// Intel processors have a unified instruction and data cache getClearCacheBuiltinName()831 const char * getClearCacheBuiltinName() const override { 832 return nullptr; // nothing to do, move along. 833 } 834 835 unsigned getRegisterByName(const char* RegName, EVT VT) const override; 836 837 /// This method returns a target specific FastISel object, 838 /// or null if the target does not support "fast" ISel. 839 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 840 const TargetLibraryInfo *libInfo) const override; 841 842 /// Return true if the target stores stack protector cookies at a fixed 843 /// offset in some non-standard address space, and populates the address 844 /// space and offset as appropriate. 845 bool getStackCookieLocation(unsigned &AddressSpace, 846 unsigned &Offset) const override; 847 848 SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot, 849 SelectionDAG &DAG) const; 850 851 bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override; 852 853 bool useLoadStackGuardNode() const override; 854 /// \brief Customize the preferred legalization strategy for certain types. 855 LegalizeTypeAction getPreferredVectorAction(EVT VT) const override; 856 857 protected: 858 std::pair<const TargetRegisterClass *, uint8_t> 859 findRepresentativeClass(const TargetRegisterInfo *TRI, 860 MVT VT) const override; 861 862 private: 863 /// Keep a pointer to the X86Subtarget around so that we can 864 /// make the right decision when generating code for different targets. 865 const X86Subtarget *Subtarget; 866 const DataLayout *TD; 867 868 /// Select between SSE or x87 floating point ops. 869 /// When SSE is available, use it for f32 operations. 870 /// When SSE2 is available, use it for f64 operations. 871 bool X86ScalarSSEf32; 872 bool X86ScalarSSEf64; 873 874 /// A list of legal FP immediates. 875 std::vector<APFloat> LegalFPImmediates; 876 877 /// Indicate that this x86 target can instruction 878 /// select the specified FP immediate natively. addLegalFPImmediate(const APFloat & Imm)879 void addLegalFPImmediate(const APFloat& Imm) { 880 LegalFPImmediates.push_back(Imm); 881 } 882 883 SDValue LowerCallResult(SDValue Chain, SDValue InFlag, 884 CallingConv::ID CallConv, bool isVarArg, 885 const SmallVectorImpl<ISD::InputArg> &Ins, 886 SDLoc dl, SelectionDAG &DAG, 887 SmallVectorImpl<SDValue> &InVals) const; 888 SDValue LowerMemArgument(SDValue Chain, 889 CallingConv::ID CallConv, 890 const SmallVectorImpl<ISD::InputArg> &ArgInfo, 891 SDLoc dl, SelectionDAG &DAG, 892 const CCValAssign &VA, MachineFrameInfo *MFI, 893 unsigned i) const; 894 SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, 895 SDLoc dl, SelectionDAG &DAG, 896 const CCValAssign &VA, 897 ISD::ArgFlagsTy Flags) const; 898 899 // Call lowering helpers. 900 901 /// Check whether the call is eligible for tail call optimization. Targets 902 /// that want to do tail call optimization should implement this function. 903 bool IsEligibleForTailCallOptimization(SDValue Callee, 904 CallingConv::ID CalleeCC, 905 bool isVarArg, 906 bool isCalleeStructRet, 907 bool isCallerStructRet, 908 Type *RetTy, 909 const SmallVectorImpl<ISD::OutputArg> &Outs, 910 const SmallVectorImpl<SDValue> &OutVals, 911 const SmallVectorImpl<ISD::InputArg> &Ins, 912 SelectionDAG& DAG) const; 913 bool IsCalleePop(bool isVarArg, CallingConv::ID CallConv) const; 914 SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr, 915 SDValue Chain, bool IsTailCall, bool Is64Bit, 916 int FPDiff, SDLoc dl) const; 917 918 unsigned GetAlignedArgumentStackSize(unsigned StackSize, 919 SelectionDAG &DAG) const; 920 921 std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, 922 bool isSigned, 923 bool isReplace) const; 924 925 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; 926 SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const; 927 SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; 928 SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const; 929 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 930 SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) const; 931 SDValue InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) const; 932 933 SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 934 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; 935 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; 936 SDValue LowerGlobalAddress(const GlobalValue *GV, SDLoc dl, 937 int64_t Offset, SelectionDAG &DAG) const; 938 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; 939 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 940 SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const; 941 SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 942 SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 943 SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const; 944 SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const; 945 SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG) const; 946 SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; 947 SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; 948 SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const; 949 SDValue LowerToBT(SDValue And, ISD::CondCode CC, 950 SDLoc dl, SelectionDAG &DAG) const; 951 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; 952 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; 953 SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; 954 SDValue LowerMEMSET(SDValue Op, SelectionDAG &DAG) const; 955 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; 956 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; 957 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; 958 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; 959 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 960 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; 961 SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const; 962 SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; 963 SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; 964 SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; 965 SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; 966 SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; 967 SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const; 968 969 SDValue 970 LowerFormalArguments(SDValue Chain, 971 CallingConv::ID CallConv, bool isVarArg, 972 const SmallVectorImpl<ISD::InputArg> &Ins, 973 SDLoc dl, SelectionDAG &DAG, 974 SmallVectorImpl<SDValue> &InVals) const override; 975 SDValue LowerCall(CallLoweringInfo &CLI, 976 SmallVectorImpl<SDValue> &InVals) const override; 977 978 SDValue LowerReturn(SDValue Chain, 979 CallingConv::ID CallConv, bool isVarArg, 980 const SmallVectorImpl<ISD::OutputArg> &Outs, 981 const SmallVectorImpl<SDValue> &OutVals, 982 SDLoc dl, SelectionDAG &DAG) const override; 983 984 bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; 985 986 bool mayBeEmittedAsTailCall(CallInst *CI) const override; 987 988 EVT getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT, 989 ISD::NodeType ExtendKind) const override; 990 991 bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, 992 bool isVarArg, 993 const SmallVectorImpl<ISD::OutputArg> &Outs, 994 LLVMContext &Context) const override; 995 996 const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; 997 998 bool shouldExpandAtomicLoadInIR(LoadInst *SI) const override; 999 bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; 1000 TargetLoweringBase::AtomicRMWExpansionKind 1001 shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; 1002 1003 LoadInst * 1004 lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override; 1005 1006 bool needsCmpXchgNb(const Type *MemType) const; 1007 1008 /// Utility function to emit atomic-load-arith operations (and, or, xor, 1009 /// nand, max, min, umax, umin). It takes the corresponding instruction to 1010 /// expand, the associated machine basic block, and the associated X86 1011 /// opcodes for reg/reg. 1012 MachineBasicBlock *EmitAtomicLoadArith(MachineInstr *MI, 1013 MachineBasicBlock *MBB) const; 1014 1015 /// Utility function to emit atomic-load-arith operations (and, or, xor, 1016 /// nand, add, sub, swap) for 64-bit operands on 32-bit target. 1017 MachineBasicBlock *EmitAtomicLoadArith6432(MachineInstr *MI, 1018 MachineBasicBlock *MBB) const; 1019 1020 // Utility function to emit the low-level va_arg code for X86-64. 1021 MachineBasicBlock *EmitVAARG64WithCustomInserter( 1022 MachineInstr *MI, 1023 MachineBasicBlock *MBB) const; 1024 1025 /// Utility function to emit the xmm reg save portion of va_start. 1026 MachineBasicBlock *EmitVAStartSaveXMMRegsWithCustomInserter( 1027 MachineInstr *BInstr, 1028 MachineBasicBlock *BB) const; 1029 1030 MachineBasicBlock *EmitLoweredSelect(MachineInstr *I, 1031 MachineBasicBlock *BB) const; 1032 1033 MachineBasicBlock *EmitLoweredWinAlloca(MachineInstr *MI, 1034 MachineBasicBlock *BB) const; 1035 1036 MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr *MI, 1037 MachineBasicBlock *BB) const; 1038 1039 MachineBasicBlock *EmitLoweredTLSCall(MachineInstr *MI, 1040 MachineBasicBlock *BB) const; 1041 1042 MachineBasicBlock *emitLoweredTLSAddr(MachineInstr *MI, 1043 MachineBasicBlock *BB) const; 1044 1045 MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr *MI, 1046 MachineBasicBlock *MBB) const; 1047 1048 MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr *MI, 1049 MachineBasicBlock *MBB) const; 1050 1051 MachineBasicBlock *emitFMA3Instr(MachineInstr *MI, 1052 MachineBasicBlock *MBB) const; 1053 1054 /// Emit nodes that will be selected as "test Op0,Op0", or something 1055 /// equivalent, for use with the given x86 condition code. 1056 SDValue EmitTest(SDValue Op0, unsigned X86CC, SDLoc dl, 1057 SelectionDAG &DAG) const; 1058 1059 /// Emit nodes that will be selected as "cmp Op0,Op1", or something 1060 /// equivalent, for use with the given x86 condition code. 1061 SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, SDLoc dl, 1062 SelectionDAG &DAG) const; 1063 1064 /// Convert a comparison if required by the subtarget. 1065 SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const; 1066 1067 /// Use rsqrt* to speed up sqrt calculations. 1068 SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI, 1069 unsigned &RefinementSteps, 1070 bool &UseOneConstNR) const override; 1071 1072 /// Use rcp* to speed up fdiv calculations. 1073 SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI, 1074 unsigned &RefinementSteps) const override; 1075 1076 /// Reassociate floating point divisions into multiply by reciprocal. 1077 bool combineRepeatedFPDivisors(unsigned NumUsers) const override; 1078 }; 1079 1080 namespace X86 { 1081 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 1082 const TargetLibraryInfo *libInfo); 1083 } 1084 } 1085 1086 #endif // X86ISELLOWERING_H 1087