1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the interfaces that X86 uses to lower LLVM code into a 11 // selection DAG. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H 16 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H 17 18 #include "llvm/CodeGen/CallingConvLower.h" 19 #include "llvm/CodeGen/SelectionDAG.h" 20 #include "llvm/Target/TargetLowering.h" 21 #include "llvm/Target/TargetOptions.h" 22 23 namespace llvm { 24 class X86Subtarget; 25 class X86TargetMachine; 26 27 namespace X86ISD { 28 // X86 Specific DAG Nodes 29 enum NodeType : unsigned { 30 // Start the numbering where the builtin ops leave off. 31 FIRST_NUMBER = ISD::BUILTIN_OP_END, 32 33 /// Bit scan forward. 34 BSF, 35 /// Bit scan reverse. 36 BSR, 37 38 /// Double shift instructions. These correspond to 39 /// X86::SHLDxx and X86::SHRDxx instructions. 40 SHLD, 41 SHRD, 42 43 /// Bitwise logical AND of floating point values. This corresponds 44 /// to X86::ANDPS or X86::ANDPD. 45 FAND, 46 47 /// Bitwise logical OR of floating point values. This corresponds 48 /// to X86::ORPS or X86::ORPD. 49 FOR, 50 51 /// Bitwise logical XOR of floating point values. This corresponds 52 /// to X86::XORPS or X86::XORPD. 53 FXOR, 54 55 /// Bitwise logical ANDNOT of floating point values. This 56 /// corresponds to X86::ANDNPS or X86::ANDNPD. 57 FANDN, 58 59 /// These operations represent an abstract X86 call 60 /// instruction, which includes a bunch of information. In particular the 61 /// operands of these node are: 62 /// 63 /// #0 - The incoming token chain 64 /// #1 - The callee 65 /// #2 - The number of arg bytes the caller pushes on the stack. 66 /// #3 - The number of arg bytes the callee pops off the stack. 67 /// #4 - The value to pass in AL/AX/EAX (optional) 68 /// #5 - The value to pass in DL/DX/EDX (optional) 69 /// 70 /// The result values of these nodes are: 71 /// 72 /// #0 - The outgoing token chain 73 /// #1 - The first register result value (optional) 74 /// #2 - The second register result value (optional) 75 /// 76 CALL, 77 78 /// This operation implements the lowering for readcyclecounter 79 RDTSC_DAG, 80 81 /// X86 Read Time-Stamp Counter and Processor ID. 82 RDTSCP_DAG, 83 84 /// X86 Read Performance Monitoring Counters. 85 RDPMC_DAG, 86 87 /// X86 compare and logical compare instructions. 88 CMP, COMI, UCOMI, 89 90 /// X86 bit-test instructions. 91 BT, 92 93 /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS 94 /// operand, usually produced by a CMP instruction. 95 SETCC, 96 97 /// X86 Select 98 SELECT, 99 100 // Same as SETCC except it's materialized with a sbb and the value is all 101 // one's or all zero's. 102 SETCC_CARRY, // R = carry_bit ? ~0 : 0 103 104 /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD. 105 /// Operands are two FP values to compare; result is a mask of 106 /// 0s or 1s. Generally DTRT for C/C++ with NaNs. 107 FSETCC, 108 109 /// X86 MOVMSK{pd|ps}, extracts sign bits of two or four FP values, 110 /// result in an integer GPR. Needs masking for scalar result. 111 FGETSIGNx86, 112 113 /// X86 conditional moves. Operand 0 and operand 1 are the two values 114 /// to select from. Operand 2 is the condition code, and operand 3 is the 115 /// flag operand produced by a CMP or TEST instruction. It also writes a 116 /// flag result. 117 CMOV, 118 119 /// X86 conditional branches. Operand 0 is the chain operand, operand 1 120 /// is the block to branch if condition is true, operand 2 is the 121 /// condition code, and operand 3 is the flag operand produced by a CMP 122 /// or TEST instruction. 123 BRCOND, 124 125 /// Return with a flag operand. Operand 0 is the chain operand, operand 126 /// 1 is the number of bytes of stack to pop. 127 RET_FLAG, 128 129 /// Return from interrupt. Operand 0 is the number of bytes to pop. 130 IRET, 131 132 /// Repeat fill, corresponds to X86::REP_STOSx. 133 REP_STOS, 134 135 /// Repeat move, corresponds to X86::REP_MOVSx. 136 REP_MOVS, 137 138 /// On Darwin, this node represents the result of the popl 139 /// at function entry, used for PIC code. 140 GlobalBaseReg, 141 142 /// A wrapper node for TargetConstantPool, 143 /// TargetExternalSymbol, and TargetGlobalAddress. 144 Wrapper, 145 146 /// Special wrapper used under X86-64 PIC mode for RIP 147 /// relative displacements. 148 WrapperRIP, 149 150 /// Copies a 64-bit value from the low word of an XMM vector 151 /// to an MMX vector. If you think this is too close to the previous 152 /// mnemonic, so do I; blame Intel. 153 MOVDQ2Q, 154 155 /// Copies a 32-bit value from the low word of a MMX 156 /// vector to a GPR. 157 MMX_MOVD2W, 158 159 /// Copies a GPR into the low 32-bit word of a MMX vector 160 /// and zero out the high word. 161 MMX_MOVW2D, 162 163 /// Extract an 8-bit value from a vector and zero extend it to 164 /// i32, corresponds to X86::PEXTRB. 165 PEXTRB, 166 167 /// Extract a 16-bit value from a vector and zero extend it to 168 /// i32, corresponds to X86::PEXTRW. 169 PEXTRW, 170 171 /// Insert any element of a 4 x float vector into any element 172 /// of a destination 4 x floatvector. 173 INSERTPS, 174 175 /// Insert the lower 8-bits of a 32-bit value to a vector, 176 /// corresponds to X86::PINSRB. 177 PINSRB, 178 179 /// Insert the lower 16-bits of a 32-bit value to a vector, 180 /// corresponds to X86::PINSRW. 181 PINSRW, MMX_PINSRW, 182 183 /// Shuffle 16 8-bit values within a vector. 184 PSHUFB, 185 186 /// Compute Sum of Absolute Differences. 187 PSADBW, 188 /// Compute Double Block Packed Sum-Absolute-Differences 189 DBPSADBW, 190 191 /// Bitwise Logical AND NOT of Packed FP values. 192 ANDNP, 193 194 /// Copy integer sign. 195 PSIGN, 196 197 /// Blend where the selector is an immediate. 198 BLENDI, 199 200 /// Blend where the condition has been shrunk. 201 /// This is used to emphasize that the condition mask is 202 /// no more valid for generic VSELECT optimizations. 203 SHRUNKBLEND, 204 205 /// Combined add and sub on an FP vector. 206 ADDSUB, 207 208 // FP vector ops with rounding mode. 209 FADD_RND, 210 FSUB_RND, 211 FMUL_RND, 212 FDIV_RND, 213 FMAX_RND, 214 FMIN_RND, 215 FSQRT_RND, 216 217 // FP vector get exponent 218 FGETEXP_RND, 219 // Extract Normalized Mantissas 220 VGETMANT, 221 // FP Scale 222 SCALEF, 223 // Integer add/sub with unsigned saturation. 224 ADDUS, 225 SUBUS, 226 // Integer add/sub with signed saturation. 227 ADDS, 228 SUBS, 229 // Unsigned Integer average 230 AVG, 231 /// Integer horizontal add. 232 HADD, 233 234 /// Integer horizontal sub. 235 HSUB, 236 237 /// Floating point horizontal add. 238 FHADD, 239 240 /// Floating point horizontal sub. 241 FHSUB, 242 243 // Integer absolute value 244 ABS, 245 246 // Detect Conflicts Within a Vector 247 CONFLICT, 248 249 /// Floating point max and min. 250 FMAX, FMIN, 251 252 /// Commutative FMIN and FMAX. 253 FMAXC, FMINC, 254 255 /// Floating point reciprocal-sqrt and reciprocal approximation. 256 /// Note that these typically require refinement 257 /// in order to obtain suitable precision. 258 FRSQRT, FRCP, 259 260 // Thread Local Storage. 261 TLSADDR, 262 263 // Thread Local Storage. A call to get the start address 264 // of the TLS block for the current module. 265 TLSBASEADDR, 266 267 // Thread Local Storage. When calling to an OS provided 268 // thunk at the address from an earlier relocation. 269 TLSCALL, 270 271 // Exception Handling helpers. 272 EH_RETURN, 273 274 // SjLj exception handling setjmp. 275 EH_SJLJ_SETJMP, 276 277 // SjLj exception handling longjmp. 278 EH_SJLJ_LONGJMP, 279 280 /// Tail call return. See X86TargetLowering::LowerCall for 281 /// the list of operands. 282 TC_RETURN, 283 284 // Vector move to low scalar and zero higher vector elements. 285 VZEXT_MOVL, 286 287 // Vector integer zero-extend. 288 VZEXT, 289 290 // Vector integer signed-extend. 291 VSEXT, 292 293 // Vector integer truncate. 294 VTRUNC, 295 // Vector integer truncate with unsigned/signed saturation. 296 VTRUNCUS, VTRUNCS, 297 298 // Vector FP extend. 299 VFPEXT, 300 301 // Vector FP round. 302 VFPROUND, 303 304 // Vector signed/unsigned integer to double. 305 CVTDQ2PD, CVTUDQ2PD, 306 307 // 128-bit vector logical left / right shift 308 VSHLDQ, VSRLDQ, 309 310 // Vector shift elements 311 VSHL, VSRL, VSRA, 312 313 // Vector shift elements by immediate 314 VSHLI, VSRLI, VSRAI, 315 316 // Vector packed double/float comparison. 317 CMPP, 318 319 // Vector integer comparisons. 320 PCMPEQ, PCMPGT, 321 // Vector integer comparisons, the result is in a mask vector. 322 PCMPEQM, PCMPGTM, 323 324 /// Vector comparison generating mask bits for fp and 325 /// integer signed and unsigned data types. 326 CMPM, 327 CMPMU, 328 // Vector comparison with rounding mode for FP values 329 CMPM_RND, 330 331 // Arithmetic operations with FLAGS results. 332 ADD, SUB, ADC, SBB, SMUL, 333 INC, DEC, OR, XOR, AND, 334 335 BEXTR, // Bit field extract 336 337 UMUL, // LOW, HI, FLAGS = umul LHS, RHS 338 339 // 8-bit SMUL/UMUL - AX, FLAGS = smul8/umul8 AL, RHS 340 SMUL8, UMUL8, 341 342 // 8-bit divrem that zero-extend the high result (AH). 343 UDIVREM8_ZEXT_HREG, 344 SDIVREM8_SEXT_HREG, 345 346 // X86-specific multiply by immediate. 347 MUL_IMM, 348 349 // Vector bitwise comparisons. 350 PTEST, 351 352 // Vector packed fp sign bitwise comparisons. 353 TESTP, 354 355 // Vector "test" in AVX-512, the result is in a mask vector. 356 TESTM, 357 TESTNM, 358 359 // OR/AND test for masks 360 KORTEST, 361 KTEST, 362 363 // Several flavors of instructions with vector shuffle behaviors. 364 PACKSS, 365 PACKUS, 366 // Intra-lane alignr 367 PALIGNR, 368 // AVX512 inter-lane alignr 369 VALIGN, 370 PSHUFD, 371 PSHUFHW, 372 PSHUFLW, 373 SHUFP, 374 //Shuffle Packed Values at 128-bit granularity 375 SHUF128, 376 MOVDDUP, 377 MOVSHDUP, 378 MOVSLDUP, 379 MOVLHPS, 380 MOVLHPD, 381 MOVHLPS, 382 MOVLPS, 383 MOVLPD, 384 MOVSD, 385 MOVSS, 386 UNPCKL, 387 UNPCKH, 388 VPERMILPV, 389 VPERMILPI, 390 VPERMV, 391 VPERMV3, 392 VPERMIV3, 393 VPERMI, 394 VPERM2X128, 395 // Bitwise ternary logic 396 VPTERNLOG, 397 // Fix Up Special Packed Float32/64 values 398 VFIXUPIMM, 399 // Range Restriction Calculation For Packed Pairs of Float32/64 values 400 VRANGE, 401 // Reduce - Perform Reduction Transformation on scalar\packed FP 402 VREDUCE, 403 // RndScale - Round FP Values To Include A Given Number Of Fraction Bits 404 VRNDSCALE, 405 // VFPCLASS - Tests Types Of a FP Values for packed types. 406 VFPCLASS, 407 // VFPCLASSS - Tests Types Of a FP Values for scalar types. 408 VFPCLASSS, 409 // Broadcast scalar to vector 410 VBROADCAST, 411 // Broadcast mask to vector 412 VBROADCASTM, 413 // Broadcast subvector to vector 414 SUBV_BROADCAST, 415 // Insert/Extract vector element 416 VINSERT, 417 VEXTRACT, 418 419 /// SSE4A Extraction and Insertion. 420 EXTRQI, INSERTQI, 421 422 // XOP variable/immediate rotations 423 VPROT, VPROTI, 424 // XOP arithmetic/logical shifts 425 VPSHA, VPSHL, 426 // XOP signed/unsigned integer comparisons 427 VPCOM, VPCOMU, 428 429 // Vector multiply packed unsigned doubleword integers 430 PMULUDQ, 431 // Vector multiply packed signed doubleword integers 432 PMULDQ, 433 // Vector Multiply Packed UnsignedIntegers with Round and Scale 434 MULHRS, 435 // Multiply and Add Packed Integers 436 VPMADDUBSW, VPMADDWD, 437 // FMA nodes 438 FMADD, 439 FNMADD, 440 FMSUB, 441 FNMSUB, 442 FMADDSUB, 443 FMSUBADD, 444 // FMA with rounding mode 445 FMADD_RND, 446 FNMADD_RND, 447 FMSUB_RND, 448 FNMSUB_RND, 449 FMADDSUB_RND, 450 FMSUBADD_RND, 451 452 // Compress and expand 453 COMPRESS, 454 EXPAND, 455 456 //Convert Unsigned/Integer to Scalar Floating-Point Value 457 //with rounding mode 458 SINT_TO_FP_RND, 459 UINT_TO_FP_RND, 460 461 // Vector float/double to signed/unsigned integer. 462 FP_TO_SINT_RND, FP_TO_UINT_RND, 463 // Save xmm argument registers to the stack, according to %al. An operator 464 // is needed so that this can be expanded with control flow. 465 VASTART_SAVE_XMM_REGS, 466 467 // Windows's _chkstk call to do stack probing. 468 WIN_ALLOCA, 469 470 // For allocating variable amounts of stack space when using 471 // segmented stacks. Check if the current stacklet has enough space, and 472 // falls back to heap allocation if not. 473 SEG_ALLOCA, 474 475 // Memory barrier 476 MEMBARRIER, 477 MFENCE, 478 SFENCE, 479 LFENCE, 480 481 // Store FP status word into i16 register. 482 FNSTSW16r, 483 484 // Store contents of %ah into %eflags. 485 SAHF, 486 487 // Get a random integer and indicate whether it is valid in CF. 488 RDRAND, 489 490 // Get a NIST SP800-90B & C compliant random integer and 491 // indicate whether it is valid in CF. 492 RDSEED, 493 494 PCMPISTRI, 495 PCMPESTRI, 496 497 // Test if in transactional execution. 498 XTEST, 499 500 // ERI instructions 501 RSQRT28, RCP28, EXP2, 502 503 // Compare and swap. 504 LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE, 505 LCMPXCHG8_DAG, 506 LCMPXCHG16_DAG, 507 508 // Load, scalar_to_vector, and zero extend. 509 VZEXT_LOAD, 510 511 // Store FP control world into i16 memory. 512 FNSTCW16m, 513 514 /// This instruction implements FP_TO_SINT with the 515 /// integer destination in memory and a FP reg source. This corresponds 516 /// to the X86::FIST*m instructions and the rounding mode change stuff. It 517 /// has two inputs (token chain and address) and two outputs (int value 518 /// and token chain). 519 FP_TO_INT16_IN_MEM, 520 FP_TO_INT32_IN_MEM, 521 FP_TO_INT64_IN_MEM, 522 523 /// This instruction implements SINT_TO_FP with the 524 /// integer source in memory and FP reg result. This corresponds to the 525 /// X86::FILD*m instructions. It has three inputs (token chain, address, 526 /// and source type) and two outputs (FP value and token chain). FILD_FLAG 527 /// also produces a flag). 528 FILD, 529 FILD_FLAG, 530 531 /// This instruction implements an extending load to FP stack slots. 532 /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain 533 /// operand, ptr to load from, and a ValueType node indicating the type 534 /// to load to. 535 FLD, 536 537 /// This instruction implements a truncating store to FP stack 538 /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a 539 /// chain operand, value to store, address, and a ValueType to store it 540 /// as. 541 FST, 542 543 /// This instruction grabs the address of the next argument 544 /// from a va_list. (reads and modifies the va_list in memory) 545 VAARG_64 546 547 // WARNING: Do not add anything in the end unless you want the node to 548 // have memop! In fact, starting from ATOMADD64_DAG all opcodes will be 549 // thought as target memory ops! 550 }; 551 } 552 553 /// Define some predicates that are used for node matching. 554 namespace X86 { 555 /// Return true if the specified 556 /// EXTRACT_SUBVECTOR operand specifies a vector extract that is 557 /// suitable for input to VEXTRACTF128, VEXTRACTI128 instructions. 558 bool isVEXTRACT128Index(SDNode *N); 559 560 /// Return true if the specified 561 /// INSERT_SUBVECTOR operand specifies a subvector insert that is 562 /// suitable for input to VINSERTF128, VINSERTI128 instructions. 563 bool isVINSERT128Index(SDNode *N); 564 565 /// Return true if the specified 566 /// EXTRACT_SUBVECTOR operand specifies a vector extract that is 567 /// suitable for input to VEXTRACTF64X4, VEXTRACTI64X4 instructions. 568 bool isVEXTRACT256Index(SDNode *N); 569 570 /// Return true if the specified 571 /// INSERT_SUBVECTOR operand specifies a subvector insert that is 572 /// suitable for input to VINSERTF64X4, VINSERTI64X4 instructions. 573 bool isVINSERT256Index(SDNode *N); 574 575 /// Return the appropriate 576 /// immediate to extract the specified EXTRACT_SUBVECTOR index 577 /// with VEXTRACTF128, VEXTRACTI128 instructions. 578 unsigned getExtractVEXTRACT128Immediate(SDNode *N); 579 580 /// Return the appropriate 581 /// immediate to insert at the specified INSERT_SUBVECTOR index 582 /// with VINSERTF128, VINSERT128 instructions. 583 unsigned getInsertVINSERT128Immediate(SDNode *N); 584 585 /// Return the appropriate 586 /// immediate to extract the specified EXTRACT_SUBVECTOR index 587 /// with VEXTRACTF64X4, VEXTRACTI64x4 instructions. 588 unsigned getExtractVEXTRACT256Immediate(SDNode *N); 589 590 /// Return the appropriate 591 /// immediate to insert at the specified INSERT_SUBVECTOR index 592 /// with VINSERTF64x4, VINSERTI64x4 instructions. 593 unsigned getInsertVINSERT256Immediate(SDNode *N); 594 595 /// Returns true if Elt is a constant zero or floating point constant +0.0. 596 bool isZeroNode(SDValue Elt); 597 598 /// Returns true of the given offset can be 599 /// fit into displacement field of the instruction. 600 bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, 601 bool hasSymbolicDisplacement = true); 602 603 604 /// Determines whether the callee is required to pop its 605 /// own arguments. Callee pop is necessary to support tail calls. 606 bool isCalleePop(CallingConv::ID CallingConv, 607 bool is64Bit, bool IsVarArg, bool TailCallOpt); 608 609 /// AVX512 static rounding constants. These need to match the values in 610 /// avx512fintrin.h. 611 enum STATIC_ROUNDING { 612 TO_NEAREST_INT = 0, 613 TO_NEG_INF = 1, 614 TO_POS_INF = 2, 615 TO_ZERO = 3, 616 CUR_DIRECTION = 4 617 }; 618 } 619 620 //===--------------------------------------------------------------------===// 621 // X86 Implementation of the TargetLowering interface 622 class X86TargetLowering final : public TargetLowering { 623 public: 624 explicit X86TargetLowering(const X86TargetMachine &TM, 625 const X86Subtarget &STI); 626 627 unsigned getJumpTableEncoding() const override; 628 bool useSoftFloat() const override; 629 getScalarShiftAmountTy(const DataLayout &,EVT)630 MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override { 631 return MVT::i8; 632 } 633 634 const MCExpr * 635 LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, 636 const MachineBasicBlock *MBB, unsigned uid, 637 MCContext &Ctx) const override; 638 639 /// Returns relocation base for the given PIC jumptable. 640 SDValue getPICJumpTableRelocBase(SDValue Table, 641 SelectionDAG &DAG) const override; 642 const MCExpr * 643 getPICJumpTableRelocBaseExpr(const MachineFunction *MF, 644 unsigned JTI, MCContext &Ctx) const override; 645 646 /// Return the desired alignment for ByVal aggregate 647 /// function arguments in the caller parameter area. For X86, aggregates 648 /// that contains are placed at 16-byte boundaries while the rest are at 649 /// 4-byte boundaries. 650 unsigned getByValTypeAlignment(Type *Ty, 651 const DataLayout &DL) const override; 652 653 /// Returns the target specific optimal type for load 654 /// and store operations as a result of memset, memcpy, and memmove 655 /// lowering. If DstAlign is zero that means it's safe to destination 656 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it 657 /// means there isn't a need to check it against alignment requirement, 658 /// probably because the source does not need to be loaded. If 'IsMemset' is 659 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that 660 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy 661 /// source is constant so it does not need to be loaded. 662 /// It returns EVT::Other if the type should be determined using generic 663 /// target-independent logic. 664 EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, 665 bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, 666 MachineFunction &MF) const override; 667 668 /// Returns true if it's safe to use load / store of the 669 /// specified type to expand memcpy / memset inline. This is mostly true 670 /// for all types except for some special cases. For example, on X86 671 /// targets without SSE2 f64 load / store are done with fldl / fstpl which 672 /// also does type conversion. Note the specified type doesn't have to be 673 /// legal as the hook is used before type legalization. 674 bool isSafeMemOpType(MVT VT) const override; 675 676 /// Returns true if the target allows unaligned memory accesses of the 677 /// specified type. Returns whether it is "fast" in the last argument. 678 bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, 679 bool *Fast) const override; 680 681 /// Provide custom lowering hooks for some operations. 682 /// 683 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; 684 685 /// Replace the results of node with an illegal result 686 /// type with new values built out of custom code. 687 /// 688 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, 689 SelectionDAG &DAG) const override; 690 691 692 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; 693 694 /// Return true if the target has native support for 695 /// the specified value type and it is 'desirable' to use the type for the 696 /// given node type. e.g. On x86 i16 is legal, but undesirable since i16 697 /// instruction encodings are longer and some i16 instructions are slow. 698 bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override; 699 700 /// Return true if the target has native support for the 701 /// specified value type and it is 'desirable' to use the type. e.g. On x86 702 /// i16 is legal, but undesirable since i16 instruction encodings are longer 703 /// and some i16 instructions are slow. 704 bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override; 705 706 MachineBasicBlock * 707 EmitInstrWithCustomInserter(MachineInstr *MI, 708 MachineBasicBlock *MBB) const override; 709 710 711 /// This method returns the name of a target specific DAG node. 712 const char *getTargetNodeName(unsigned Opcode) const override; 713 714 bool isCheapToSpeculateCttz() const override; 715 716 bool isCheapToSpeculateCtlz() const override; 717 718 /// Return the value type to use for ISD::SETCC. 719 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, 720 EVT VT) const override; 721 722 /// Determine which of the bits specified in Mask are known to be either 723 /// zero or one and return them in the KnownZero/KnownOne bitsets. 724 void computeKnownBitsForTargetNode(const SDValue Op, 725 APInt &KnownZero, 726 APInt &KnownOne, 727 const SelectionDAG &DAG, 728 unsigned Depth = 0) const override; 729 730 /// Determine the number of bits in the operation that are sign bits. 731 unsigned ComputeNumSignBitsForTargetNode(SDValue Op, 732 const SelectionDAG &DAG, 733 unsigned Depth) const override; 734 735 bool isGAPlusOffset(SDNode *N, const GlobalValue* &GA, 736 int64_t &Offset) const override; 737 738 SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const; 739 740 bool ExpandInlineAsm(CallInst *CI) const override; 741 742 ConstraintType getConstraintType(StringRef Constraint) const override; 743 744 /// Examine constraint string and operand type and determine a weight value. 745 /// The operand object must already have been set up with the operand type. 746 ConstraintWeight 747 getSingleConstraintMatchWeight(AsmOperandInfo &info, 748 const char *constraint) const override; 749 750 const char *LowerXConstraint(EVT ConstraintVT) const override; 751 752 /// Lower the specified operand into the Ops vector. If it is invalid, don't 753 /// add anything to Ops. If hasMemory is true it means one of the asm 754 /// constraint of the inline asm instruction being processed is 'm'. 755 void LowerAsmOperandForConstraint(SDValue Op, 756 std::string &Constraint, 757 std::vector<SDValue> &Ops, 758 SelectionDAG &DAG) const override; 759 760 unsigned getInlineAsmMemConstraint(StringRef ConstraintCode)761 getInlineAsmMemConstraint(StringRef ConstraintCode) const override { 762 if (ConstraintCode == "i") 763 return InlineAsm::Constraint_i; 764 else if (ConstraintCode == "o") 765 return InlineAsm::Constraint_o; 766 else if (ConstraintCode == "v") 767 return InlineAsm::Constraint_v; 768 else if (ConstraintCode == "X") 769 return InlineAsm::Constraint_X; 770 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 771 } 772 773 /// Given a physical register constraint 774 /// (e.g. {edx}), return the register number and the register class for the 775 /// register. This should only be used for C_Register constraints. On 776 /// error, this returns a register number of 0. 777 std::pair<unsigned, const TargetRegisterClass *> 778 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 779 StringRef Constraint, MVT VT) const override; 780 781 /// Return true if the addressing mode represented 782 /// by AM is legal for this target, for a load/store of the specified type. 783 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, 784 Type *Ty, unsigned AS) const override; 785 786 /// Return true if the specified immediate is legal 787 /// icmp immediate, that is the target has icmp instructions which can 788 /// compare a register against the immediate without having to materialize 789 /// the immediate into a register. 790 bool isLegalICmpImmediate(int64_t Imm) const override; 791 792 /// Return true if the specified immediate is legal 793 /// add immediate, that is the target has add instructions which can 794 /// add a register and the immediate without having to materialize 795 /// the immediate into a register. 796 bool isLegalAddImmediate(int64_t Imm) const override; 797 798 /// \brief Return the cost of the scaling factor used in the addressing 799 /// mode represented by AM for this target, for a load/store 800 /// of the specified type. 801 /// If the AM is supported, the return value must be >= 0. 802 /// If the AM is not supported, it returns a negative value. 803 int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty, 804 unsigned AS) const override; 805 806 bool isVectorShiftByScalarCheap(Type *Ty) const override; 807 808 /// Return true if it's free to truncate a value of 809 /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in 810 /// register EAX to i16 by referencing its sub-register AX. 811 bool isTruncateFree(Type *Ty1, Type *Ty2) const override; 812 bool isTruncateFree(EVT VT1, EVT VT2) const override; 813 814 bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override; 815 816 /// Return true if any actual instruction that defines a 817 /// value of type Ty1 implicit zero-extends the value to Ty2 in the result 818 /// register. This does not necessarily include registers defined in 819 /// unknown ways, such as incoming arguments, or copies from unknown 820 /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this 821 /// does not necessarily apply to truncate instructions. e.g. on x86-64, 822 /// all instructions that define 32-bit values implicit zero-extend the 823 /// result out to 64 bits. 824 bool isZExtFree(Type *Ty1, Type *Ty2) const override; 825 bool isZExtFree(EVT VT1, EVT VT2) const override; 826 bool isZExtFree(SDValue Val, EVT VT2) const override; 827 828 /// Return true if folding a vector load into ExtVal (a sign, zero, or any 829 /// extend node) is profitable. 830 bool isVectorLoadExtDesirable(SDValue) const override; 831 832 /// Return true if an FMA operation is faster than a pair of fmul and fadd 833 /// instructions. fmuladd intrinsics will be expanded to FMAs when this 834 /// method returns true, otherwise fmuladd is expanded to fmul + fadd. 835 bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; 836 837 /// Return true if it's profitable to narrow 838 /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow 839 /// from i32 to i8 but not from i32 to i16. 840 bool isNarrowingProfitable(EVT VT1, EVT VT2) const override; 841 842 /// Returns true if the target can instruction select the 843 /// specified FP immediate natively. If false, the legalizer will 844 /// materialize the FP immediate as a load from a constant pool. 845 bool isFPImmLegal(const APFloat &Imm, EVT VT) const override; 846 847 /// Targets can use this to indicate that they only support *some* 848 /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a 849 /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to 850 /// be legal. 851 bool isShuffleMaskLegal(const SmallVectorImpl<int> &Mask, 852 EVT VT) const override; 853 854 /// Similar to isShuffleMaskLegal. This is used by Targets can use this to 855 /// indicate if there is a suitable VECTOR_SHUFFLE that can be used to 856 /// replace a VAND with a constant pool entry. 857 bool isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask, 858 EVT VT) const override; 859 860 /// If true, then instruction selection should 861 /// seek to shrink the FP constant of the specified type to a smaller type 862 /// in order to save space and / or reduce runtime. ShouldShrinkFPConstant(EVT VT)863 bool ShouldShrinkFPConstant(EVT VT) const override { 864 // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more 865 // expensive than a straight movsd. On the other hand, it's important to 866 // shrink long double fp constant since fldt is very slow. 867 return !X86ScalarSSEf64 || VT == MVT::f80; 868 } 869 870 /// Return true if we believe it is correct and profitable to reduce the 871 /// load node to a smaller type. 872 bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, 873 EVT NewVT) const override; 874 875 /// Return true if the specified scalar FP type is computed in an SSE 876 /// register, not on the X87 floating point stack. isScalarFPTypeInSSEReg(EVT VT)877 bool isScalarFPTypeInSSEReg(EVT VT) const { 878 return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2 879 (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1 880 } 881 882 /// \brief Returns true if it is beneficial to convert a load of a constant 883 /// to just the constant itself. 884 bool shouldConvertConstantLoadToIntImm(const APInt &Imm, 885 Type *Ty) const override; 886 887 /// Return true if EXTRACT_SUBVECTOR is cheap for this result type 888 /// with this index. 889 bool isExtractSubvectorCheap(EVT ResVT, unsigned Index) const override; 890 891 /// Intel processors have a unified instruction and data cache getClearCacheBuiltinName()892 const char * getClearCacheBuiltinName() const override { 893 return nullptr; // nothing to do, move along. 894 } 895 896 unsigned getRegisterByName(const char* RegName, EVT VT, 897 SelectionDAG &DAG) const override; 898 899 /// If a physical register, this returns the register that receives the 900 /// exception address on entry to an EH pad. 901 unsigned 902 getExceptionPointerRegister(const Constant *PersonalityFn) const override; 903 904 /// If a physical register, this returns the register that receives the 905 /// exception typeid on entry to a landing pad. 906 unsigned 907 getExceptionSelectorRegister(const Constant *PersonalityFn) const override; 908 909 /// This method returns a target specific FastISel object, 910 /// or null if the target does not support "fast" ISel. 911 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 912 const TargetLibraryInfo *libInfo) const override; 913 914 /// Return true if the target stores stack protector cookies at a fixed 915 /// offset in some non-standard address space, and populates the address 916 /// space and offset as appropriate. 917 bool getStackCookieLocation(unsigned &AddressSpace, 918 unsigned &Offset) const override; 919 920 /// Return true if the target stores SafeStack pointer at a fixed offset in 921 /// some non-standard address space, and populates the address space and 922 /// offset as appropriate. 923 Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override; 924 925 SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot, 926 SelectionDAG &DAG) const; 927 928 bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override; 929 930 bool useLoadStackGuardNode() const override; 931 /// \brief Customize the preferred legalization strategy for certain types. 932 LegalizeTypeAction getPreferredVectorAction(EVT VT) const override; 933 934 bool isIntDivCheap(EVT VT, AttributeSet Attr) const override; 935 936 void markInRegArguments(SelectionDAG &DAG, TargetLowering::ArgListTy& Args) 937 const override; 938 939 protected: 940 std::pair<const TargetRegisterClass *, uint8_t> 941 findRepresentativeClass(const TargetRegisterInfo *TRI, 942 MVT VT) const override; 943 944 private: 945 /// Keep a pointer to the X86Subtarget around so that we can 946 /// make the right decision when generating code for different targets. 947 const X86Subtarget *Subtarget; 948 949 /// Select between SSE or x87 floating point ops. 950 /// When SSE is available, use it for f32 operations. 951 /// When SSE2 is available, use it for f64 operations. 952 bool X86ScalarSSEf32; 953 bool X86ScalarSSEf64; 954 955 /// A list of legal FP immediates. 956 std::vector<APFloat> LegalFPImmediates; 957 958 /// Indicate that this x86 target can instruction 959 /// select the specified FP immediate natively. addLegalFPImmediate(const APFloat & Imm)960 void addLegalFPImmediate(const APFloat& Imm) { 961 LegalFPImmediates.push_back(Imm); 962 } 963 964 SDValue LowerCallResult(SDValue Chain, SDValue InFlag, 965 CallingConv::ID CallConv, bool isVarArg, 966 const SmallVectorImpl<ISD::InputArg> &Ins, 967 SDLoc dl, SelectionDAG &DAG, 968 SmallVectorImpl<SDValue> &InVals) const; 969 SDValue LowerMemArgument(SDValue Chain, 970 CallingConv::ID CallConv, 971 const SmallVectorImpl<ISD::InputArg> &ArgInfo, 972 SDLoc dl, SelectionDAG &DAG, 973 const CCValAssign &VA, MachineFrameInfo *MFI, 974 unsigned i) const; 975 SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, 976 SDLoc dl, SelectionDAG &DAG, 977 const CCValAssign &VA, 978 ISD::ArgFlagsTy Flags) const; 979 980 // Call lowering helpers. 981 982 /// Check whether the call is eligible for tail call optimization. Targets 983 /// that want to do tail call optimization should implement this function. 984 bool IsEligibleForTailCallOptimization(SDValue Callee, 985 CallingConv::ID CalleeCC, 986 bool isVarArg, 987 bool isCalleeStructRet, 988 bool isCallerStructRet, 989 Type *RetTy, 990 const SmallVectorImpl<ISD::OutputArg> &Outs, 991 const SmallVectorImpl<SDValue> &OutVals, 992 const SmallVectorImpl<ISD::InputArg> &Ins, 993 SelectionDAG& DAG) const; 994 SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr, 995 SDValue Chain, bool IsTailCall, bool Is64Bit, 996 int FPDiff, SDLoc dl) const; 997 998 unsigned GetAlignedArgumentStackSize(unsigned StackSize, 999 SelectionDAG &DAG) const; 1000 1001 std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, 1002 bool isSigned, 1003 bool isReplace) const; 1004 1005 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; 1006 SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const; 1007 SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const; 1008 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 1009 SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) const; 1010 SDValue InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) const; 1011 1012 SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 1013 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; 1014 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; 1015 SDValue LowerGlobalAddress(const GlobalValue *GV, SDLoc dl, 1016 int64_t Offset, SelectionDAG &DAG) const; 1017 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; 1018 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 1019 SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const; 1020 SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 1021 SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 1022 SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const; 1023 SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const; 1024 SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG) const; 1025 SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; 1026 SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; 1027 SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const; 1028 SDValue LowerToBT(SDValue And, ISD::CondCode CC, 1029 SDLoc dl, SelectionDAG &DAG) const; 1030 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; 1031 SDValue LowerSETCCE(SDValue Op, SelectionDAG &DAG) const; 1032 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; 1033 SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; 1034 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; 1035 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; 1036 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; 1037 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; 1038 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 1039 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; 1040 SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const; 1041 SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; 1042 SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; 1043 SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; 1044 SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; 1045 SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; 1046 SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const; 1047 SDValue LowerGC_TRANSITION_START(SDValue Op, SelectionDAG &DAG) const; 1048 SDValue LowerGC_TRANSITION_END(SDValue Op, SelectionDAG &DAG) const; 1049 1050 SDValue 1051 LowerFormalArguments(SDValue Chain, 1052 CallingConv::ID CallConv, bool isVarArg, 1053 const SmallVectorImpl<ISD::InputArg> &Ins, 1054 SDLoc dl, SelectionDAG &DAG, 1055 SmallVectorImpl<SDValue> &InVals) const override; 1056 SDValue LowerCall(CallLoweringInfo &CLI, 1057 SmallVectorImpl<SDValue> &InVals) const override; 1058 1059 SDValue LowerReturn(SDValue Chain, 1060 CallingConv::ID CallConv, bool isVarArg, 1061 const SmallVectorImpl<ISD::OutputArg> &Outs, 1062 const SmallVectorImpl<SDValue> &OutVals, 1063 SDLoc dl, SelectionDAG &DAG) const override; 1064 1065 bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; 1066 1067 bool mayBeEmittedAsTailCall(CallInst *CI) const override; 1068 1069 EVT getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT, 1070 ISD::NodeType ExtendKind) const override; 1071 1072 bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, 1073 bool isVarArg, 1074 const SmallVectorImpl<ISD::OutputArg> &Outs, 1075 LLVMContext &Context) const override; 1076 1077 const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; 1078 1079 TargetLoweringBase::AtomicExpansionKind 1080 shouldExpandAtomicLoadInIR(LoadInst *SI) const override; 1081 bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; 1082 TargetLoweringBase::AtomicExpansionKind 1083 shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; 1084 1085 LoadInst * 1086 lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override; 1087 1088 bool needsCmpXchgNb(Type *MemType) const; 1089 1090 // Utility function to emit the low-level va_arg code for X86-64. 1091 MachineBasicBlock *EmitVAARG64WithCustomInserter( 1092 MachineInstr *MI, 1093 MachineBasicBlock *MBB) const; 1094 1095 /// Utility function to emit the xmm reg save portion of va_start. 1096 MachineBasicBlock *EmitVAStartSaveXMMRegsWithCustomInserter( 1097 MachineInstr *BInstr, 1098 MachineBasicBlock *BB) const; 1099 1100 MachineBasicBlock *EmitLoweredSelect(MachineInstr *I, 1101 MachineBasicBlock *BB) const; 1102 1103 MachineBasicBlock *EmitLoweredAtomicFP(MachineInstr *I, 1104 MachineBasicBlock *BB) const; 1105 1106 MachineBasicBlock *EmitLoweredWinAlloca(MachineInstr *MI, 1107 MachineBasicBlock *BB) const; 1108 1109 MachineBasicBlock *EmitLoweredCatchRet(MachineInstr *MI, 1110 MachineBasicBlock *BB) const; 1111 1112 MachineBasicBlock *EmitLoweredCatchPad(MachineInstr *MI, 1113 MachineBasicBlock *BB) const; 1114 1115 MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr *MI, 1116 MachineBasicBlock *BB) const; 1117 1118 MachineBasicBlock *EmitLoweredTLSCall(MachineInstr *MI, 1119 MachineBasicBlock *BB) const; 1120 1121 MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr *MI, 1122 MachineBasicBlock *MBB) const; 1123 1124 MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr *MI, 1125 MachineBasicBlock *MBB) const; 1126 1127 MachineBasicBlock *emitFMA3Instr(MachineInstr *MI, 1128 MachineBasicBlock *MBB) const; 1129 1130 /// Emit nodes that will be selected as "test Op0,Op0", or something 1131 /// equivalent, for use with the given x86 condition code. 1132 SDValue EmitTest(SDValue Op0, unsigned X86CC, SDLoc dl, 1133 SelectionDAG &DAG) const; 1134 1135 /// Emit nodes that will be selected as "cmp Op0,Op1", or something 1136 /// equivalent, for use with the given x86 condition code. 1137 SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, SDLoc dl, 1138 SelectionDAG &DAG) const; 1139 1140 /// Convert a comparison if required by the subtarget. 1141 SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const; 1142 1143 /// Use rsqrt* to speed up sqrt calculations. 1144 SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI, 1145 unsigned &RefinementSteps, 1146 bool &UseOneConstNR) const override; 1147 1148 /// Use rcp* to speed up fdiv calculations. 1149 SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI, 1150 unsigned &RefinementSteps) const override; 1151 1152 /// Reassociate floating point divisions into multiply by reciprocal. 1153 unsigned combineRepeatedFPDivisors() const override; 1154 }; 1155 1156 namespace X86 { 1157 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 1158 const TargetLibraryInfo *libInfo); 1159 } 1160 } 1161 1162 #endif // X86ISELLOWERING_H 1163