• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
2  //
3  //                     The LLVM Compiler Infrastructure
4  //
5  // This file is distributed under the University of Illinois Open Source
6  // License. See LICENSE.TXT for details.
7  //
8  //===----------------------------------------------------------------------===//
9  //
10  // This file defines the interfaces that X86 uses to lower LLVM code into a
11  // selection DAG.
12  //
13  //===----------------------------------------------------------------------===//
14  
15  #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
16  #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
17  
18  #include "llvm/CodeGen/CallingConvLower.h"
19  #include "llvm/CodeGen/SelectionDAG.h"
20  #include "llvm/CodeGen/TargetLowering.h"
21  #include "llvm/Target/TargetOptions.h"
22  
23  namespace llvm {
24    class X86Subtarget;
25    class X86TargetMachine;
26  
27    namespace X86ISD {
28      // X86 Specific DAG Nodes
29      enum NodeType : unsigned {
30        // Start the numbering where the builtin ops leave off.
31        FIRST_NUMBER = ISD::BUILTIN_OP_END,
32  
33        /// Bit scan forward.
34        BSF,
35        /// Bit scan reverse.
36        BSR,
37  
38        /// Double shift instructions. These correspond to
39        /// X86::SHLDxx and X86::SHRDxx instructions.
40        SHLD,
41        SHRD,
42  
43        /// Bitwise logical AND of floating point values. This corresponds
44        /// to X86::ANDPS or X86::ANDPD.
45        FAND,
46  
47        /// Bitwise logical OR of floating point values. This corresponds
48        /// to X86::ORPS or X86::ORPD.
49        FOR,
50  
51        /// Bitwise logical XOR of floating point values. This corresponds
52        /// to X86::XORPS or X86::XORPD.
53        FXOR,
54  
55        ///  Bitwise logical ANDNOT of floating point values. This
56        /// corresponds to X86::ANDNPS or X86::ANDNPD.
57        FANDN,
58  
59        /// These operations represent an abstract X86 call
60        /// instruction, which includes a bunch of information.  In particular the
61        /// operands of these node are:
62        ///
63        ///     #0 - The incoming token chain
64        ///     #1 - The callee
65        ///     #2 - The number of arg bytes the caller pushes on the stack.
66        ///     #3 - The number of arg bytes the callee pops off the stack.
67        ///     #4 - The value to pass in AL/AX/EAX (optional)
68        ///     #5 - The value to pass in DL/DX/EDX (optional)
69        ///
70        /// The result values of these nodes are:
71        ///
72        ///     #0 - The outgoing token chain
73        ///     #1 - The first register result value (optional)
74        ///     #2 - The second register result value (optional)
75        ///
76        CALL,
77  
78        /// Same as call except it adds the NoTrack prefix.
79        NT_CALL,
80  
81        /// This operation implements the lowering for readcyclecounter.
82        RDTSC_DAG,
83  
84        /// X86 Read Time-Stamp Counter and Processor ID.
85        RDTSCP_DAG,
86  
87        /// X86 Read Performance Monitoring Counters.
88        RDPMC_DAG,
89  
90        /// X86 compare and logical compare instructions.
91        CMP, COMI, UCOMI,
92  
93        /// X86 bit-test instructions.
94        BT,
95  
96        /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
97        /// operand, usually produced by a CMP instruction.
98        SETCC,
99  
100        /// X86 Select
101        SELECT, SELECTS,
102  
103        // Same as SETCC except it's materialized with a sbb and the value is all
104        // one's or all zero's.
105        SETCC_CARRY,  // R = carry_bit ? ~0 : 0
106  
107        /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
108        /// Operands are two FP values to compare; result is a mask of
109        /// 0s or 1s.  Generally DTRT for C/C++ with NaNs.
110        FSETCC,
111  
112        /// X86 FP SETCC, similar to above, but with output as an i1 mask and
113        /// with optional rounding mode.
114        FSETCCM, FSETCCM_RND,
115  
116        /// X86 conditional moves. Operand 0 and operand 1 are the two values
117        /// to select from. Operand 2 is the condition code, and operand 3 is the
118        /// flag operand produced by a CMP or TEST instruction. It also writes a
119        /// flag result.
120        CMOV,
121  
122        /// X86 conditional branches. Operand 0 is the chain operand, operand 1
123        /// is the block to branch if condition is true, operand 2 is the
124        /// condition code, and operand 3 is the flag operand produced by a CMP
125        /// or TEST instruction.
126        BRCOND,
127  
128        /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
129        /// operand 1 is the target address.
130        NT_BRIND,
131  
132        /// Return with a flag operand. Operand 0 is the chain operand, operand
133        /// 1 is the number of bytes of stack to pop.
134        RET_FLAG,
135  
136        /// Return from interrupt. Operand 0 is the number of bytes to pop.
137        IRET,
138  
139        /// Repeat fill, corresponds to X86::REP_STOSx.
140        REP_STOS,
141  
142        /// Repeat move, corresponds to X86::REP_MOVSx.
143        REP_MOVS,
144  
145        /// On Darwin, this node represents the result of the popl
146        /// at function entry, used for PIC code.
147        GlobalBaseReg,
148  
149        /// A wrapper node for TargetConstantPool, TargetJumpTable,
150        /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
151        /// MCSymbol and TargetBlockAddress.
152        Wrapper,
153  
154        /// Special wrapper used under X86-64 PIC mode for RIP
155        /// relative displacements.
156        WrapperRIP,
157  
158        /// Copies a 64-bit value from the low word of an XMM vector
159        /// to an MMX vector.
160        MOVDQ2Q,
161  
162        /// Copies a 32-bit value from the low word of a MMX
163        /// vector to a GPR.
164        MMX_MOVD2W,
165  
166        /// Copies a GPR into the low 32-bit word of a MMX vector
167        /// and zero out the high word.
168        MMX_MOVW2D,
169  
170        /// Extract an 8-bit value from a vector and zero extend it to
171        /// i32, corresponds to X86::PEXTRB.
172        PEXTRB,
173  
174        /// Extract a 16-bit value from a vector and zero extend it to
175        /// i32, corresponds to X86::PEXTRW.
176        PEXTRW,
177  
178        /// Insert any element of a 4 x float vector into any element
179        /// of a destination 4 x floatvector.
180        INSERTPS,
181  
182        /// Insert the lower 8-bits of a 32-bit value to a vector,
183        /// corresponds to X86::PINSRB.
184        PINSRB,
185  
186        /// Insert the lower 16-bits of a 32-bit value to a vector,
187        /// corresponds to X86::PINSRW.
188        PINSRW,
189  
190        /// Shuffle 16 8-bit values within a vector.
191        PSHUFB,
192  
193        /// Compute Sum of Absolute Differences.
194        PSADBW,
195        /// Compute Double Block Packed Sum-Absolute-Differences
196        DBPSADBW,
197  
198        /// Bitwise Logical AND NOT of Packed FP values.
199        ANDNP,
200  
201        /// Blend where the selector is an immediate.
202        BLENDI,
203  
204        /// Dynamic (non-constant condition) vector blend where only the sign bits
205        /// of the condition elements are used. This is used to enforce that the
206        /// condition mask is not valid for generic VSELECT optimizations.
207        SHRUNKBLEND,
208  
209        /// Combined add and sub on an FP vector.
210        ADDSUB,
211  
212        //  FP vector ops with rounding mode.
213        FADD_RND, FADDS_RND,
214        FSUB_RND, FSUBS_RND,
215        FMUL_RND, FMULS_RND,
216        FDIV_RND, FDIVS_RND,
217        FMAX_RND, FMAXS_RND,
218        FMIN_RND, FMINS_RND,
219        FSQRT_RND, FSQRTS_RND,
220  
221        // FP vector get exponent.
222        FGETEXP_RND, FGETEXPS_RND,
223        // Extract Normalized Mantissas.
224        VGETMANT, VGETMANT_RND, VGETMANTS, VGETMANTS_RND,
225        // FP Scale.
226        SCALEF,
227        SCALEFS,
228  
229        // Integer add/sub with unsigned saturation.
230        ADDUS,
231        SUBUS,
232  
233        // Integer add/sub with signed saturation.
234        ADDS,
235        SUBS,
236  
237        // Unsigned Integer average.
238        AVG,
239  
240        /// Integer horizontal add/sub.
241        HADD,
242        HSUB,
243  
244        /// Floating point horizontal add/sub.
245        FHADD,
246        FHSUB,
247  
248        // Detect Conflicts Within a Vector
249        CONFLICT,
250  
251        /// Floating point max and min.
252        FMAX, FMIN,
253  
254        /// Commutative FMIN and FMAX.
255        FMAXC, FMINC,
256  
257        /// Scalar intrinsic floating point max and min.
258        FMAXS, FMINS,
259  
260        /// Floating point reciprocal-sqrt and reciprocal approximation.
261        /// Note that these typically require refinement
262        /// in order to obtain suitable precision.
263        FRSQRT, FRCP,
264  
265        // AVX-512 reciprocal approximations with a little more precision.
266        RSQRT14, RSQRT14S, RCP14, RCP14S,
267  
268        // Thread Local Storage.
269        TLSADDR,
270  
271        // Thread Local Storage. A call to get the start address
272        // of the TLS block for the current module.
273        TLSBASEADDR,
274  
275        // Thread Local Storage.  When calling to an OS provided
276        // thunk at the address from an earlier relocation.
277        TLSCALL,
278  
279        // Exception Handling helpers.
280        EH_RETURN,
281  
282        // SjLj exception handling setjmp.
283        EH_SJLJ_SETJMP,
284  
285        // SjLj exception handling longjmp.
286        EH_SJLJ_LONGJMP,
287  
288        // SjLj exception handling dispatch.
289        EH_SJLJ_SETUP_DISPATCH,
290  
291        /// Tail call return. See X86TargetLowering::LowerCall for
292        /// the list of operands.
293        TC_RETURN,
294  
295        // Vector move to low scalar and zero higher vector elements.
296        VZEXT_MOVL,
297  
298        // Vector integer zero-extend.
299        VZEXT,
300        // Vector integer signed-extend.
301        VSEXT,
302  
303        // Vector integer truncate.
304        VTRUNC,
305        // Vector integer truncate with unsigned/signed saturation.
306        VTRUNCUS, VTRUNCS,
307  
308        // Vector FP extend.
309        VFPEXT, VFPEXT_RND, VFPEXTS_RND,
310  
311        // Vector FP round.
312        VFPROUND, VFPROUND_RND, VFPROUNDS_RND,
313  
314        // 128-bit vector logical left / right shift
315        VSHLDQ, VSRLDQ,
316  
317        // Vector shift elements
318        VSHL, VSRL, VSRA,
319  
320        // Vector variable shift right arithmetic.
321        // Unlike ISD::SRA, in case shift count greater then element size
322        // use sign bit to fill destination data element.
323        VSRAV,
324  
325        // Vector shift elements by immediate
326        VSHLI, VSRLI, VSRAI,
327  
328        // Shifts of mask registers.
329        KSHIFTL, KSHIFTR,
330  
331        // Bit rotate by immediate
332        VROTLI, VROTRI,
333  
334        // Vector packed double/float comparison.
335        CMPP,
336  
337        // Vector integer comparisons.
338        PCMPEQ, PCMPGT,
339  
340        // v8i16 Horizontal minimum and position.
341        PHMINPOS,
342  
343        MULTISHIFT,
344  
345        /// Vector comparison generating mask bits for fp and
346        /// integer signed and unsigned data types.
347        CMPM,
348        // Vector comparison with rounding mode for FP values
349        CMPM_RND,
350  
351        // Arithmetic operations with FLAGS results.
352        ADD, SUB, ADC, SBB, SMUL,
353        INC, DEC, OR, XOR, AND,
354  
355        // Bit field extract.
356        BEXTR,
357  
358        // LOW, HI, FLAGS = umul LHS, RHS.
359        UMUL,
360  
361        // 8-bit SMUL/UMUL - AX, FLAGS = smul8/umul8 AL, RHS.
362        SMUL8, UMUL8,
363  
364        // 8-bit divrem that zero-extend the high result (AH).
365        UDIVREM8_ZEXT_HREG,
366        SDIVREM8_SEXT_HREG,
367  
368        // X86-specific multiply by immediate.
369        MUL_IMM,
370  
371        // Vector sign bit extraction.
372        MOVMSK,
373  
374        // Vector bitwise comparisons.
375        PTEST,
376  
377        // Vector packed fp sign bitwise comparisons.
378        TESTP,
379  
380        // OR/AND test for masks.
381        KORTEST,
382        KTEST,
383  
384        // ADD for masks.
385        KADD,
386  
387        // Several flavors of instructions with vector shuffle behaviors.
388        // Saturated signed/unnsigned packing.
389        PACKSS,
390        PACKUS,
391        // Intra-lane alignr.
392        PALIGNR,
393        // AVX512 inter-lane alignr.
394        VALIGN,
395        PSHUFD,
396        PSHUFHW,
397        PSHUFLW,
398        SHUFP,
399        // VBMI2 Concat & Shift.
400        VSHLD,
401        VSHRD,
402        VSHLDV,
403        VSHRDV,
404        //Shuffle Packed Values at 128-bit granularity.
405        SHUF128,
406        MOVDDUP,
407        MOVSHDUP,
408        MOVSLDUP,
409        MOVLHPS,
410        MOVHLPS,
411        MOVSD,
412        MOVSS,
413        UNPCKL,
414        UNPCKH,
415        VPERMILPV,
416        VPERMILPI,
417        VPERMI,
418        VPERM2X128,
419  
420        // Variable Permute (VPERM).
421        // Res = VPERMV MaskV, V0
422        VPERMV,
423  
424        // 3-op Variable Permute (VPERMT2).
425        // Res = VPERMV3 V0, MaskV, V1
426        VPERMV3,
427  
428        // Bitwise ternary logic.
429        VPTERNLOG,
430        // Fix Up Special Packed Float32/64 values.
431        VFIXUPIMM,
432        VFIXUPIMMS,
433        // Range Restriction Calculation For Packed Pairs of Float32/64 values.
434        VRANGE, VRANGE_RND, VRANGES, VRANGES_RND,
435        // Reduce - Perform Reduction Transformation on scalar\packed FP.
436        VREDUCE, VREDUCE_RND, VREDUCES, VREDUCES_RND,
437        // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
438        // Also used by the legacy (V)ROUND intrinsics where we mask out the
439        // scaling part of the immediate.
440        VRNDSCALE, VRNDSCALE_RND, VRNDSCALES, VRNDSCALES_RND,
441        // Tests Types Of a FP Values for packed types.
442        VFPCLASS,
443        // Tests Types Of a FP Values for scalar types.
444        VFPCLASSS,
445  
446        // Broadcast scalar to vector.
447        VBROADCAST,
448        // Broadcast mask to vector.
449        VBROADCASTM,
450        // Broadcast subvector to vector.
451        SUBV_BROADCAST,
452  
453        /// SSE4A Extraction and Insertion.
454        EXTRQI, INSERTQI,
455  
456        // XOP arithmetic/logical shifts.
457        VPSHA, VPSHL,
458        // XOP signed/unsigned integer comparisons.
459        VPCOM, VPCOMU,
460        // XOP packed permute bytes.
461        VPPERM,
462        // XOP two source permutation.
463        VPERMIL2,
464  
465        // Vector multiply packed unsigned doubleword integers.
466        PMULUDQ,
467        // Vector multiply packed signed doubleword integers.
468        PMULDQ,
469        // Vector Multiply Packed UnsignedIntegers with Round and Scale.
470        MULHRS,
471  
472        // Multiply and Add Packed Integers.
473        VPMADDUBSW, VPMADDWD,
474  
475        // AVX512IFMA multiply and add.
476        // NOTE: These are different than the instruction and perform
477        // op0 x op1 + op2.
478        VPMADD52L, VPMADD52H,
479  
480        // VNNI
481        VPDPBUSD,
482        VPDPBUSDS,
483        VPDPWSSD,
484        VPDPWSSDS,
485  
486        // FMA nodes.
487        // We use the target independent ISD::FMA for the non-inverted case.
488        FNMADD,
489        FMSUB,
490        FNMSUB,
491        FMADDSUB,
492        FMSUBADD,
493  
494        // FMA with rounding mode.
495        FMADD_RND,
496        FNMADD_RND,
497        FMSUB_RND,
498        FNMSUB_RND,
499        FMADDSUB_RND,
500        FMSUBADD_RND,
501  
502        // Compress and expand.
503        COMPRESS,
504        EXPAND,
505  
506        // Bits shuffle
507        VPSHUFBITQMB,
508  
509        // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
510        SINT_TO_FP_RND, UINT_TO_FP_RND,
511        SCALAR_SINT_TO_FP_RND, SCALAR_UINT_TO_FP_RND,
512  
513        // Vector float/double to signed/unsigned integer.
514        CVTP2SI, CVTP2UI, CVTP2SI_RND, CVTP2UI_RND,
515        // Scalar float/double to signed/unsigned integer.
516        CVTS2SI_RND, CVTS2UI_RND,
517  
518        // Vector float/double to signed/unsigned integer with truncation.
519        CVTTP2SI, CVTTP2UI, CVTTP2SI_RND, CVTTP2UI_RND,
520        // Scalar float/double to signed/unsigned integer with truncation.
521        CVTTS2SI_RND, CVTTS2UI_RND,
522  
523        // Vector signed/unsigned integer to float/double.
524        CVTSI2P, CVTUI2P,
525  
526        // Save xmm argument registers to the stack, according to %al. An operator
527        // is needed so that this can be expanded with control flow.
528        VASTART_SAVE_XMM_REGS,
529  
530        // Windows's _chkstk call to do stack probing.
531        WIN_ALLOCA,
532  
533        // For allocating variable amounts of stack space when using
534        // segmented stacks. Check if the current stacklet has enough space, and
535        // falls back to heap allocation if not.
536        SEG_ALLOCA,
537  
538        // Memory barriers.
539        MEMBARRIER,
540        MFENCE,
541  
542        // Store FP status word into i16 register.
543        FNSTSW16r,
544  
545        // Store contents of %ah into %eflags.
546        SAHF,
547  
548        // Get a random integer and indicate whether it is valid in CF.
549        RDRAND,
550  
551        // Get a NIST SP800-90B & C compliant random integer and
552        // indicate whether it is valid in CF.
553        RDSEED,
554  
555        // SSE42 string comparisons.
556        // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
557        // will emit one or two instructions based on which results are used. If
558        // flags and index/mask this allows us to use a single instruction since
559        // we won't have to pick and opcode for flags. Instead we can rely on the
560        // DAG to CSE everything and decide at isel.
561        PCMPISTR,
562        PCMPESTR,
563  
564        // Test if in transactional execution.
565        XTEST,
566  
567        // ERI instructions.
568        RSQRT28, RSQRT28S, RCP28, RCP28S, EXP2,
569  
570        // Conversions between float and half-float.
571        CVTPS2PH, CVTPH2PS, CVTPH2PS_RND,
572  
573        // Galois Field Arithmetic Instructions
574        GF2P8AFFINEINVQB, GF2P8AFFINEQB, GF2P8MULB,
575  
576        // LWP insert record.
577        LWPINS,
578  
579        // User level wait
580        UMWAIT, TPAUSE,
581  
582        // Compare and swap.
583        LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
584        LCMPXCHG8_DAG,
585        LCMPXCHG16_DAG,
586        LCMPXCHG8_SAVE_EBX_DAG,
587        LCMPXCHG16_SAVE_RBX_DAG,
588  
589        /// LOCK-prefixed arithmetic read-modify-write instructions.
590        /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
591        LADD, LSUB, LOR, LXOR, LAND, LINC, LDEC,
592  
593        // Load, scalar_to_vector, and zero extend.
594        VZEXT_LOAD,
595  
596        // Store FP control world into i16 memory.
597        FNSTCW16m,
598  
599        /// This instruction implements FP_TO_SINT with the
600        /// integer destination in memory and a FP reg source.  This corresponds
601        /// to the X86::FIST*m instructions and the rounding mode change stuff. It
602        /// has two inputs (token chain and address) and two outputs (int value
603        /// and token chain).
604        FP_TO_INT16_IN_MEM,
605        FP_TO_INT32_IN_MEM,
606        FP_TO_INT64_IN_MEM,
607  
608        /// This instruction implements SINT_TO_FP with the
609        /// integer source in memory and FP reg result.  This corresponds to the
610        /// X86::FILD*m instructions. It has three inputs (token chain, address,
611        /// and source type) and two outputs (FP value and token chain). FILD_FLAG
612        /// also produces a flag).
613        FILD,
614        FILD_FLAG,
615  
616        /// This instruction implements an extending load to FP stack slots.
617        /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
618        /// operand, ptr to load from, and a ValueType node indicating the type
619        /// to load to.
620        FLD,
621  
622        /// This instruction implements a truncating store to FP stack
623        /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
624        /// chain operand, value to store, address, and a ValueType to store it
625        /// as.
626        FST,
627  
628        /// This instruction grabs the address of the next argument
629        /// from a va_list. (reads and modifies the va_list in memory)
630        VAARG_64,
631  
632        // Vector truncating store with unsigned/signed saturation
633        VTRUNCSTOREUS, VTRUNCSTORES,
634        // Vector truncating masked store with unsigned/signed saturation
635        VMTRUNCSTOREUS, VMTRUNCSTORES,
636  
637        // X86 specific gather and scatter
638        MGATHER, MSCATTER,
639  
640        // WARNING: Do not add anything in the end unless you want the node to
641        // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
642        // opcodes will be thought as target memory ops!
643      };
644    } // end namespace X86ISD
645  
646    /// Define some predicates that are used for node matching.
647    namespace X86 {
648      /// Returns true if Elt is a constant zero or floating point constant +0.0.
649      bool isZeroNode(SDValue Elt);
650  
651      /// Returns true of the given offset can be
652      /// fit into displacement field of the instruction.
653      bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
654                                        bool hasSymbolicDisplacement = true);
655  
656      /// Determines whether the callee is required to pop its
657      /// own arguments. Callee pop is necessary to support tail calls.
658      bool isCalleePop(CallingConv::ID CallingConv,
659                       bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
660  
661    } // end namespace X86
662  
663    //===--------------------------------------------------------------------===//
664    //  X86 Implementation of the TargetLowering interface
665    class X86TargetLowering final : public TargetLowering {
666    public:
667      explicit X86TargetLowering(const X86TargetMachine &TM,
668                                 const X86Subtarget &STI);
669  
670      unsigned getJumpTableEncoding() const override;
671      bool useSoftFloat() const override;
672  
673      void markLibCallAttributes(MachineFunction *MF, unsigned CC,
674                                 ArgListTy &Args) const override;
675  
getScalarShiftAmountTy(const DataLayout &,EVT VT)676      MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override {
677        return MVT::i8;
678      }
679  
680      const MCExpr *
681      LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
682                                const MachineBasicBlock *MBB, unsigned uid,
683                                MCContext &Ctx) const override;
684  
685      /// Returns relocation base for the given PIC jumptable.
686      SDValue getPICJumpTableRelocBase(SDValue Table,
687                                       SelectionDAG &DAG) const override;
688      const MCExpr *
689      getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
690                                   unsigned JTI, MCContext &Ctx) const override;
691  
692      /// Return the desired alignment for ByVal aggregate
693      /// function arguments in the caller parameter area. For X86, aggregates
694      /// that contains are placed at 16-byte boundaries while the rest are at
695      /// 4-byte boundaries.
696      unsigned getByValTypeAlignment(Type *Ty,
697                                     const DataLayout &DL) const override;
698  
699      /// Returns the target specific optimal type for load
700      /// and store operations as a result of memset, memcpy, and memmove
701      /// lowering. If DstAlign is zero that means it's safe to destination
702      /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
703      /// means there isn't a need to check it against alignment requirement,
704      /// probably because the source does not need to be loaded. If 'IsMemset' is
705      /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
706      /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
707      /// source is constant so it does not need to be loaded.
708      /// It returns EVT::Other if the type should be determined using generic
709      /// target-independent logic.
710      EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
711                              bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
712                              MachineFunction &MF) const override;
713  
714      /// Returns true if it's safe to use load / store of the
715      /// specified type to expand memcpy / memset inline. This is mostly true
716      /// for all types except for some special cases. For example, on X86
717      /// targets without SSE2 f64 load / store are done with fldl / fstpl which
718      /// also does type conversion. Note the specified type doesn't have to be
719      /// legal as the hook is used before type legalization.
720      bool isSafeMemOpType(MVT VT) const override;
721  
722      /// Returns true if the target allows unaligned memory accesses of the
723      /// specified type. Returns whether it is "fast" in the last argument.
724      bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align,
725                                         bool *Fast) const override;
726  
727      /// Provide custom lowering hooks for some operations.
728      ///
729      SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
730  
731      /// Places new result values for the node in Results (their number
732      /// and types must exactly match those of the original return values of
733      /// the node), or leaves Results empty, which indicates that the node is not
734      /// to be custom lowered after all.
735      void LowerOperationWrapper(SDNode *N,
736                                 SmallVectorImpl<SDValue> &Results,
737                                 SelectionDAG &DAG) const override;
738  
739      /// Replace the results of node with an illegal result
740      /// type with new values built out of custom code.
741      ///
742      void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
743                              SelectionDAG &DAG) const override;
744  
745      SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
746  
747      // Return true if it is profitable to combine a BUILD_VECTOR with a
748      // stride-pattern to a shuffle and a truncate.
749      // Example of such a combine:
750      // v4i32 build_vector((extract_elt V, 1),
751      //                    (extract_elt V, 3),
752      //                    (extract_elt V, 5),
753      //                    (extract_elt V, 7))
754      //  -->
755      // v4i32 truncate (bitcast (shuffle<1,u,3,u,4,u,5,u,6,u,7,u> V, u) to
756      // v4i64)
757      bool isDesirableToCombineBuildVectorToShuffleTruncate(
758          ArrayRef<int> ShuffleMask, EVT SrcVT, EVT TruncVT) const override;
759  
760      /// Return true if the target has native support for
761      /// the specified value type and it is 'desirable' to use the type for the
762      /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
763      /// instruction encodings are longer and some i16 instructions are slow.
764      bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
765  
766      /// Return true if the target has native support for the
767      /// specified value type and it is 'desirable' to use the type. e.g. On x86
768      /// i16 is legal, but undesirable since i16 instruction encodings are longer
769      /// and some i16 instructions are slow.
770      bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
771  
772      MachineBasicBlock *
773      EmitInstrWithCustomInserter(MachineInstr &MI,
774                                  MachineBasicBlock *MBB) const override;
775  
776      /// This method returns the name of a target specific DAG node.
777      const char *getTargetNodeName(unsigned Opcode) const override;
778  
mergeStoresAfterLegalization()779      bool mergeStoresAfterLegalization() const override { return true; }
780  
781      bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
782                            const SelectionDAG &DAG) const override;
783  
784      bool isCheapToSpeculateCttz() const override;
785  
786      bool isCheapToSpeculateCtlz() const override;
787  
788      bool isCtlzFast() const override;
789  
hasBitPreservingFPLogic(EVT VT)790      bool hasBitPreservingFPLogic(EVT VT) const override {
791        return VT == MVT::f32 || VT == MVT::f64 || VT.isVector();
792      }
793  
isMultiStoresCheaperThanBitsMerge(EVT LTy,EVT HTy)794      bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
795        // If the pair to store is a mixture of float and int values, we will
796        // save two bitwise instructions and one float-to-int instruction and
797        // increase one store instruction. There is potentially a more
798        // significant benefit because it avoids the float->int domain switch
799        // for input value. So It is more likely a win.
800        if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
801            (LTy.isInteger() && HTy.isFloatingPoint()))
802          return true;
803        // If the pair only contains int values, we will save two bitwise
804        // instructions and increase one store instruction (costing one more
805        // store buffer). Since the benefit is more blurred so we leave
806        // such pair out until we get testcase to prove it is a win.
807        return false;
808      }
809  
810      bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
811  
812      bool hasAndNotCompare(SDValue Y) const override;
813  
814      bool hasAndNot(SDValue Y) const override;
815  
816      bool preferShiftsToClearExtremeBits(SDValue Y) const override;
817  
818      bool
shouldTransformSignedTruncationCheck(EVT XVT,unsigned KeptBits)819      shouldTransformSignedTruncationCheck(EVT XVT,
820                                           unsigned KeptBits) const override {
821        // For vectors, we don't have a preference..
822        if (XVT.isVector())
823          return false;
824  
825        auto VTIsOk = [](EVT VT) -> bool {
826          return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
827                 VT == MVT::i64;
828        };
829  
830        // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports.
831        // XVT will be larger than KeptBitsVT.
832        MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
833        return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
834      }
835  
convertSetCCLogicToBitwiseLogic(EVT VT)836      bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
837        return VT.isScalarInteger();
838      }
839  
840      /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
841      MVT hasFastEqualityCompare(unsigned NumBits) const override;
842  
843      /// Allow multiple load pairs per block for smaller and faster code.
getMemcmpEqZeroLoadsPerBlock()844      unsigned getMemcmpEqZeroLoadsPerBlock() const override {
845        return 2;
846      }
847  
848      /// Return the value type to use for ISD::SETCC.
849      EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
850                             EVT VT) const override;
851  
852      bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
853                                        TargetLoweringOpt &TLO) const override;
854  
855      /// Determine which of the bits specified in Mask are known to be either
856      /// zero or one and return them in the KnownZero/KnownOne bitsets.
857      void computeKnownBitsForTargetNode(const SDValue Op,
858                                         KnownBits &Known,
859                                         const APInt &DemandedElts,
860                                         const SelectionDAG &DAG,
861                                         unsigned Depth = 0) const override;
862  
863      /// Determine the number of bits in the operation that are sign bits.
864      unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
865                                               const APInt &DemandedElts,
866                                               const SelectionDAG &DAG,
867                                               unsigned Depth) const override;
868  
869      SDValue unwrapAddress(SDValue N) const override;
870  
871      bool isGAPlusOffset(SDNode *N, const GlobalValue* &GA,
872                          int64_t &Offset) const override;
873  
874      SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
875  
876      bool ExpandInlineAsm(CallInst *CI) const override;
877  
878      ConstraintType getConstraintType(StringRef Constraint) const override;
879  
880      /// Examine constraint string and operand type and determine a weight value.
881      /// The operand object must already have been set up with the operand type.
882      ConstraintWeight
883        getSingleConstraintMatchWeight(AsmOperandInfo &info,
884                                       const char *constraint) const override;
885  
886      const char *LowerXConstraint(EVT ConstraintVT) const override;
887  
888      /// Lower the specified operand into the Ops vector. If it is invalid, don't
889      /// add anything to Ops. If hasMemory is true it means one of the asm
890      /// constraint of the inline asm instruction being processed is 'm'.
891      void LowerAsmOperandForConstraint(SDValue Op,
892                                        std::string &Constraint,
893                                        std::vector<SDValue> &Ops,
894                                        SelectionDAG &DAG) const override;
895  
896      unsigned
getInlineAsmMemConstraint(StringRef ConstraintCode)897      getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
898        if (ConstraintCode == "i")
899          return InlineAsm::Constraint_i;
900        else if (ConstraintCode == "o")
901          return InlineAsm::Constraint_o;
902        else if (ConstraintCode == "v")
903          return InlineAsm::Constraint_v;
904        else if (ConstraintCode == "X")
905          return InlineAsm::Constraint_X;
906        return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
907      }
908  
909      /// Given a physical register constraint
910      /// (e.g. {edx}), return the register number and the register class for the
911      /// register.  This should only be used for C_Register constraints.  On
912      /// error, this returns a register number of 0.
913      std::pair<unsigned, const TargetRegisterClass *>
914      getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
915                                   StringRef Constraint, MVT VT) const override;
916  
917      /// Return true if the addressing mode represented
918      /// by AM is legal for this target, for a load/store of the specified type.
919      bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
920                                 Type *Ty, unsigned AS,
921                                 Instruction *I = nullptr) const override;
922  
923      /// Return true if the specified immediate is legal
924      /// icmp immediate, that is the target has icmp instructions which can
925      /// compare a register against the immediate without having to materialize
926      /// the immediate into a register.
927      bool isLegalICmpImmediate(int64_t Imm) const override;
928  
929      /// Return true if the specified immediate is legal
930      /// add immediate, that is the target has add instructions which can
931      /// add a register and the immediate without having to materialize
932      /// the immediate into a register.
933      bool isLegalAddImmediate(int64_t Imm) const override;
934  
935      /// Return the cost of the scaling factor used in the addressing
936      /// mode represented by AM for this target, for a load/store
937      /// of the specified type.
938      /// If the AM is supported, the return value must be >= 0.
939      /// If the AM is not supported, it returns a negative value.
940      int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty,
941                               unsigned AS) const override;
942  
943      bool isVectorShiftByScalarCheap(Type *Ty) const override;
944  
945      /// Return true if it's free to truncate a value of
946      /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
947      /// register EAX to i16 by referencing its sub-register AX.
948      bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
949      bool isTruncateFree(EVT VT1, EVT VT2) const override;
950  
951      bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
952  
953      /// Return true if any actual instruction that defines a
954      /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
955      /// register. This does not necessarily include registers defined in
956      /// unknown ways, such as incoming arguments, or copies from unknown
957      /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
958      /// does not necessarily apply to truncate instructions. e.g. on x86-64,
959      /// all instructions that define 32-bit values implicit zero-extend the
960      /// result out to 64 bits.
961      bool isZExtFree(Type *Ty1, Type *Ty2) const override;
962      bool isZExtFree(EVT VT1, EVT VT2) const override;
963      bool isZExtFree(SDValue Val, EVT VT2) const override;
964  
965      /// Return true if folding a vector load into ExtVal (a sign, zero, or any
966      /// extend node) is profitable.
967      bool isVectorLoadExtDesirable(SDValue) const override;
968  
969      /// Return true if an FMA operation is faster than a pair of fmul and fadd
970      /// instructions. fmuladd intrinsics will be expanded to FMAs when this
971      /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
972      bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
973  
974      /// Return true if it's profitable to narrow
975      /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
976      /// from i32 to i8 but not from i32 to i16.
977      bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
978  
979      /// Given an intrinsic, checks if on the target the intrinsic will need to map
980      /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
981      /// true and stores the intrinsic information into the IntrinsicInfo that was
982      /// passed to the function.
983      bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
984                              MachineFunction &MF,
985                              unsigned Intrinsic) const override;
986  
987      /// Returns true if the target can instruction select the
988      /// specified FP immediate natively. If false, the legalizer will
989      /// materialize the FP immediate as a load from a constant pool.
990      bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
991  
992      /// Targets can use this to indicate that they only support *some*
993      /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
994      /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
995      /// be legal.
996      bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
997  
998      /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
999      /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
1000      /// constant pool entry.
1001      bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1002  
1003      /// Returns true if lowering to a jump table is allowed.
1004      bool areJTsAllowed(const Function *Fn) const override;
1005  
1006      /// If true, then instruction selection should
1007      /// seek to shrink the FP constant of the specified type to a smaller type
1008      /// in order to save space and / or reduce runtime.
ShouldShrinkFPConstant(EVT VT)1009      bool ShouldShrinkFPConstant(EVT VT) const override {
1010        // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
1011        // expensive than a straight movsd. On the other hand, it's important to
1012        // shrink long double fp constant since fldt is very slow.
1013        return !X86ScalarSSEf64 || VT == MVT::f80;
1014      }
1015  
1016      /// Return true if we believe it is correct and profitable to reduce the
1017      /// load node to a smaller type.
1018      bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1019                                 EVT NewVT) const override;
1020  
1021      /// Return true if the specified scalar FP type is computed in an SSE
1022      /// register, not on the X87 floating point stack.
isScalarFPTypeInSSEReg(EVT VT)1023      bool isScalarFPTypeInSSEReg(EVT VT) const {
1024        return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
1025               (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
1026      }
1027  
1028      /// Returns true if it is beneficial to convert a load of a constant
1029      /// to just the constant itself.
1030      bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
1031                                             Type *Ty) const override;
1032  
1033      bool convertSelectOfConstantsToMath(EVT VT) const override;
1034  
1035      /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
1036      /// with this index.
1037      bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
1038                                   unsigned Index) const override;
1039  
storeOfVectorConstantIsCheap(EVT MemVT,unsigned NumElem,unsigned AddrSpace)1040      bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem,
1041                                        unsigned AddrSpace) const override {
1042        // If we can replace more than 2 scalar stores, there will be a reduction
1043        // in instructions even after we add a vector constant load.
1044        return NumElem > 2;
1045      }
1046  
1047      bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT) const override;
1048  
1049      /// Intel processors have a unified instruction and data cache
getClearCacheBuiltinName()1050      const char * getClearCacheBuiltinName() const override {
1051        return nullptr; // nothing to do, move along.
1052      }
1053  
1054      unsigned getRegisterByName(const char* RegName, EVT VT,
1055                                 SelectionDAG &DAG) const override;
1056  
1057      /// If a physical register, this returns the register that receives the
1058      /// exception address on entry to an EH pad.
1059      unsigned
1060      getExceptionPointerRegister(const Constant *PersonalityFn) const override;
1061  
1062      /// If a physical register, this returns the register that receives the
1063      /// exception typeid on entry to a landing pad.
1064      unsigned
1065      getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
1066  
1067      virtual bool needsFixedCatchObjects() const override;
1068  
1069      /// This method returns a target specific FastISel object,
1070      /// or null if the target does not support "fast" ISel.
1071      FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1072                               const TargetLibraryInfo *libInfo) const override;
1073  
1074      /// If the target has a standard location for the stack protector cookie,
1075      /// returns the address of that location. Otherwise, returns nullptr.
1076      Value *getIRStackGuard(IRBuilder<> &IRB) const override;
1077  
1078      bool useLoadStackGuardNode() const override;
1079      bool useStackGuardXorFP() const override;
1080      void insertSSPDeclarations(Module &M) const override;
1081      Value *getSDagStackGuard(const Module &M) const override;
1082      Value *getSSPStackGuardCheck(const Module &M) const override;
1083      SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1084                                  const SDLoc &DL) const override;
1085  
1086  
1087      /// Return true if the target stores SafeStack pointer at a fixed offset in
1088      /// some non-standard address space, and populates the address space and
1089      /// offset as appropriate.
1090      Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override;
1091  
1092      SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
1093                        SelectionDAG &DAG) const;
1094  
1095      bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
1096  
1097      /// Customize the preferred legalization strategy for certain types.
1098      LegalizeTypeAction getPreferredVectorAction(EVT VT) const override;
1099  
1100      MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1101                                        EVT VT) const override;
1102  
1103      unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1104                                             CallingConv::ID CC,
1105                                             EVT VT) const override;
1106  
1107      bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
1108  
1109      bool supportSwiftError() const override;
1110  
1111      StringRef getStackProbeSymbolName(MachineFunction &MF) const override;
1112  
hasVectorBlend()1113      bool hasVectorBlend() const override { return true; }
1114  
getMaxSupportedInterleaveFactor()1115      unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
1116  
1117      /// Lower interleaved load(s) into target specific
1118      /// instructions/intrinsics.
1119      bool lowerInterleavedLoad(LoadInst *LI,
1120                                ArrayRef<ShuffleVectorInst *> Shuffles,
1121                                ArrayRef<unsigned> Indices,
1122                                unsigned Factor) const override;
1123  
1124      /// Lower interleaved store(s) into target specific
1125      /// instructions/intrinsics.
1126      bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
1127                                 unsigned Factor) const override;
1128  
1129      SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value,
1130                                     SDValue Addr, SelectionDAG &DAG)
1131                                     const override;
1132  
1133    protected:
1134      std::pair<const TargetRegisterClass *, uint8_t>
1135      findRepresentativeClass(const TargetRegisterInfo *TRI,
1136                              MVT VT) const override;
1137  
1138    private:
1139      /// Keep a reference to the X86Subtarget around so that we can
1140      /// make the right decision when generating code for different targets.
1141      const X86Subtarget &Subtarget;
1142  
1143      /// Select between SSE or x87 floating point ops.
1144      /// When SSE is available, use it for f32 operations.
1145      /// When SSE2 is available, use it for f64 operations.
1146      bool X86ScalarSSEf32;
1147      bool X86ScalarSSEf64;
1148  
1149      /// A list of legal FP immediates.
1150      std::vector<APFloat> LegalFPImmediates;
1151  
1152      /// Indicate that this x86 target can instruction
1153      /// select the specified FP immediate natively.
addLegalFPImmediate(const APFloat & Imm)1154      void addLegalFPImmediate(const APFloat& Imm) {
1155        LegalFPImmediates.push_back(Imm);
1156      }
1157  
1158      SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
1159                              CallingConv::ID CallConv, bool isVarArg,
1160                              const SmallVectorImpl<ISD::InputArg> &Ins,
1161                              const SDLoc &dl, SelectionDAG &DAG,
1162                              SmallVectorImpl<SDValue> &InVals,
1163                              uint32_t *RegMask) const;
1164      SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1165                               const SmallVectorImpl<ISD::InputArg> &ArgInfo,
1166                               const SDLoc &dl, SelectionDAG &DAG,
1167                               const CCValAssign &VA, MachineFrameInfo &MFI,
1168                               unsigned i) const;
1169      SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
1170                               const SDLoc &dl, SelectionDAG &DAG,
1171                               const CCValAssign &VA,
1172                               ISD::ArgFlagsTy Flags) const;
1173  
1174      // Call lowering helpers.
1175  
1176      /// Check whether the call is eligible for tail call optimization. Targets
1177      /// that want to do tail call optimization should implement this function.
1178      bool IsEligibleForTailCallOptimization(SDValue Callee,
1179                                             CallingConv::ID CalleeCC,
1180                                             bool isVarArg,
1181                                             bool isCalleeStructRet,
1182                                             bool isCallerStructRet,
1183                                             Type *RetTy,
1184                                      const SmallVectorImpl<ISD::OutputArg> &Outs,
1185                                      const SmallVectorImpl<SDValue> &OutVals,
1186                                      const SmallVectorImpl<ISD::InputArg> &Ins,
1187                                             SelectionDAG& DAG) const;
1188      SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
1189                                      SDValue Chain, bool IsTailCall,
1190                                      bool Is64Bit, int FPDiff,
1191                                      const SDLoc &dl) const;
1192  
1193      unsigned GetAlignedArgumentStackSize(unsigned StackSize,
1194                                           SelectionDAG &DAG) const;
1195  
1196      unsigned getAddressSpace(void) const;
1197  
1198      std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
1199                                                 bool isSigned,
1200                                                 bool isReplace) const;
1201  
1202      SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1203      SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
1204      SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1205      SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1206  
1207      unsigned getGlobalWrapperKind(const GlobalValue *GV = nullptr,
1208                                    const unsigned char OpFlags = 0) const;
1209      SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1210      SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1211      SDValue LowerGlobalAddress(const GlobalValue *GV, const SDLoc &dl,
1212                                 int64_t Offset, SelectionDAG &DAG) const;
1213      SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1214      SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1215      SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
1216  
1217      SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1218      SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1219      SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1220      SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1221      SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1222      SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1223      SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1224      SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
1225      SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1226      SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1227      SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1228      SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1229      SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1230      SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1231      SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1232      SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1233      SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
1234      SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1235      SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1236      SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
1237      SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1238      SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
1239      SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
1240      SDValue LowerGC_TRANSITION_START(SDValue Op, SelectionDAG &DAG) const;
1241      SDValue LowerGC_TRANSITION_END(SDValue Op, SelectionDAG &DAG) const;
1242      SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1243  
1244      SDValue
1245      LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1246                           const SmallVectorImpl<ISD::InputArg> &Ins,
1247                           const SDLoc &dl, SelectionDAG &DAG,
1248                           SmallVectorImpl<SDValue> &InVals) const override;
1249      SDValue LowerCall(CallLoweringInfo &CLI,
1250                        SmallVectorImpl<SDValue> &InVals) const override;
1251  
1252      SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1253                          const SmallVectorImpl<ISD::OutputArg> &Outs,
1254                          const SmallVectorImpl<SDValue> &OutVals,
1255                          const SDLoc &dl, SelectionDAG &DAG) const override;
1256  
supportSplitCSR(MachineFunction * MF)1257      bool supportSplitCSR(MachineFunction *MF) const override {
1258        return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
1259            MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
1260      }
1261      void initializeSplitCSR(MachineBasicBlock *Entry) const override;
1262      void insertCopiesSplitCSR(
1263        MachineBasicBlock *Entry,
1264        const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
1265  
1266      bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1267  
1268      bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1269  
1270      EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1271                              ISD::NodeType ExtendKind) const override;
1272  
1273      bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1274                          bool isVarArg,
1275                          const SmallVectorImpl<ISD::OutputArg> &Outs,
1276                          LLVMContext &Context) const override;
1277  
1278      const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1279  
1280      TargetLoweringBase::AtomicExpansionKind
1281      shouldExpandAtomicLoadInIR(LoadInst *SI) const override;
1282      bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
1283      TargetLoweringBase::AtomicExpansionKind
1284      shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
1285  
1286      LoadInst *
1287      lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
1288  
1289      bool needsCmpXchgNb(Type *MemType) const;
1290  
1291      void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
1292                                  MachineBasicBlock *DispatchBB, int FI) const;
1293  
1294      // Utility function to emit the low-level va_arg code for X86-64.
1295      MachineBasicBlock *
1296      EmitVAARG64WithCustomInserter(MachineInstr &MI,
1297                                    MachineBasicBlock *MBB) const;
1298  
1299      /// Utility function to emit the xmm reg save portion of va_start.
1300      MachineBasicBlock *
1301      EmitVAStartSaveXMMRegsWithCustomInserter(MachineInstr &BInstr,
1302                                               MachineBasicBlock *BB) const;
1303  
1304      MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1,
1305                                                   MachineInstr &MI2,
1306                                                   MachineBasicBlock *BB) const;
1307  
1308      MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
1309                                           MachineBasicBlock *BB) const;
1310  
1311      MachineBasicBlock *EmitLoweredAtomicFP(MachineInstr &I,
1312                                             MachineBasicBlock *BB) const;
1313  
1314      MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
1315                                             MachineBasicBlock *BB) const;
1316  
1317      MachineBasicBlock *EmitLoweredCatchPad(MachineInstr &MI,
1318                                             MachineBasicBlock *BB) const;
1319  
1320      MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
1321                                              MachineBasicBlock *BB) const;
1322  
1323      MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
1324                                            MachineBasicBlock *BB) const;
1325  
1326      MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
1327                                            MachineBasicBlock *BB) const;
1328  
1329      MachineBasicBlock *EmitLoweredRetpoline(MachineInstr &MI,
1330                                              MachineBasicBlock *BB) const;
1331  
1332      MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
1333                                          MachineBasicBlock *MBB) const;
1334  
1335      void emitSetJmpShadowStackFix(MachineInstr &MI,
1336                                    MachineBasicBlock *MBB) const;
1337  
1338      MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
1339                                           MachineBasicBlock *MBB) const;
1340  
1341      MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,
1342                                                   MachineBasicBlock *MBB) const;
1343  
1344      MachineBasicBlock *emitFMA3Instr(MachineInstr &MI,
1345                                       MachineBasicBlock *MBB) const;
1346  
1347      MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
1348                                               MachineBasicBlock *MBB) const;
1349  
1350      /// Emit nodes that will be selected as "test Op0,Op0", or something
1351      /// equivalent, for use with the given x86 condition code.
1352      SDValue EmitTest(SDValue Op0, unsigned X86CC, const SDLoc &dl,
1353                       SelectionDAG &DAG) const;
1354  
1355      /// Emit nodes that will be selected as "cmp Op0,Op1", or something
1356      /// equivalent, for use with the given x86 condition code.
1357      SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, const SDLoc &dl,
1358                      SelectionDAG &DAG) const;
1359  
1360      /// Convert a comparison if required by the subtarget.
1361      SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const;
1362  
1363      /// Check if replacement of SQRT with RSQRT should be disabled.
1364      bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override;
1365  
1366      /// Use rsqrt* to speed up sqrt calculations.
1367      SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1368                              int &RefinementSteps, bool &UseOneConstNR,
1369                              bool Reciprocal) const override;
1370  
1371      /// Use rcp* to speed up fdiv calculations.
1372      SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1373                               int &RefinementSteps) const override;
1374  
1375      /// Reassociate floating point divisions into multiply by reciprocal.
1376      unsigned combineRepeatedFPDivisors() const override;
1377    };
1378  
1379    namespace X86 {
1380      FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1381                               const TargetLibraryInfo *libInfo);
1382    } // end namespace X86
1383  
1384    // Base class for all X86 non-masked store operations.
1385    class X86StoreSDNode : public MemSDNode {
1386    public:
X86StoreSDNode(unsigned Opcode,unsigned Order,const DebugLoc & dl,SDVTList VTs,EVT MemVT,MachineMemOperand * MMO)1387      X86StoreSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl,
1388                     SDVTList VTs, EVT MemVT,
1389                     MachineMemOperand *MMO)
1390        :MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
getValue()1391      const SDValue &getValue() const { return getOperand(1); }
getBasePtr()1392      const SDValue &getBasePtr() const { return getOperand(2); }
1393  
classof(const SDNode * N)1394      static bool classof(const SDNode *N) {
1395        return N->getOpcode() == X86ISD::VTRUNCSTORES ||
1396          N->getOpcode() == X86ISD::VTRUNCSTOREUS;
1397      }
1398    };
1399  
1400    // Base class for all X86 masked store operations.
1401    // The class has the same order of operands as MaskedStoreSDNode for
1402    // convenience.
1403    class X86MaskedStoreSDNode : public MemSDNode {
1404    public:
X86MaskedStoreSDNode(unsigned Opcode,unsigned Order,const DebugLoc & dl,SDVTList VTs,EVT MemVT,MachineMemOperand * MMO)1405      X86MaskedStoreSDNode(unsigned Opcode, unsigned Order,
1406                           const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1407                           MachineMemOperand *MMO)
1408        : MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
1409  
getBasePtr()1410      const SDValue &getBasePtr() const { return getOperand(1); }
getMask()1411      const SDValue &getMask()    const { return getOperand(2); }
getValue()1412      const SDValue &getValue()   const { return getOperand(3); }
1413  
classof(const SDNode * N)1414      static bool classof(const SDNode *N) {
1415        return N->getOpcode() == X86ISD::VMTRUNCSTORES ||
1416          N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
1417      }
1418    };
1419  
1420    // X86 Truncating Store with Signed saturation.
1421    class TruncSStoreSDNode : public X86StoreSDNode {
1422    public:
TruncSStoreSDNode(unsigned Order,const DebugLoc & dl,SDVTList VTs,EVT MemVT,MachineMemOperand * MMO)1423      TruncSStoreSDNode(unsigned Order, const DebugLoc &dl,
1424                          SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1425        : X86StoreSDNode(X86ISD::VTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1426  
classof(const SDNode * N)1427      static bool classof(const SDNode *N) {
1428        return N->getOpcode() == X86ISD::VTRUNCSTORES;
1429      }
1430    };
1431  
1432    // X86 Truncating Store with Unsigned saturation.
1433    class TruncUSStoreSDNode : public X86StoreSDNode {
1434    public:
TruncUSStoreSDNode(unsigned Order,const DebugLoc & dl,SDVTList VTs,EVT MemVT,MachineMemOperand * MMO)1435      TruncUSStoreSDNode(unsigned Order, const DebugLoc &dl,
1436                        SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1437        : X86StoreSDNode(X86ISD::VTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1438  
classof(const SDNode * N)1439      static bool classof(const SDNode *N) {
1440        return N->getOpcode() == X86ISD::VTRUNCSTOREUS;
1441      }
1442    };
1443  
1444    // X86 Truncating Masked Store with Signed saturation.
1445    class MaskedTruncSStoreSDNode : public X86MaskedStoreSDNode {
1446    public:
MaskedTruncSStoreSDNode(unsigned Order,const DebugLoc & dl,SDVTList VTs,EVT MemVT,MachineMemOperand * MMO)1447      MaskedTruncSStoreSDNode(unsigned Order,
1448                           const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1449                           MachineMemOperand *MMO)
1450        : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1451  
classof(const SDNode * N)1452      static bool classof(const SDNode *N) {
1453        return N->getOpcode() == X86ISD::VMTRUNCSTORES;
1454      }
1455    };
1456  
1457    // X86 Truncating Masked Store with Unsigned saturation.
1458    class MaskedTruncUSStoreSDNode : public X86MaskedStoreSDNode {
1459    public:
MaskedTruncUSStoreSDNode(unsigned Order,const DebugLoc & dl,SDVTList VTs,EVT MemVT,MachineMemOperand * MMO)1460      MaskedTruncUSStoreSDNode(unsigned Order,
1461                              const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1462                              MachineMemOperand *MMO)
1463        : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1464  
classof(const SDNode * N)1465      static bool classof(const SDNode *N) {
1466        return N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
1467      }
1468    };
1469  
1470    // X86 specific Gather/Scatter nodes.
1471    // The class has the same order of operands as MaskedGatherScatterSDNode for
1472    // convenience.
1473    class X86MaskedGatherScatterSDNode : public MemSDNode {
1474    public:
X86MaskedGatherScatterSDNode(unsigned Opc,unsigned Order,const DebugLoc & dl,SDVTList VTs,EVT MemVT,MachineMemOperand * MMO)1475      X86MaskedGatherScatterSDNode(unsigned Opc, unsigned Order,
1476                                   const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1477                                   MachineMemOperand *MMO)
1478          : MemSDNode(Opc, Order, dl, VTs, MemVT, MMO) {}
1479  
getBasePtr()1480      const SDValue &getBasePtr() const { return getOperand(3); }
getIndex()1481      const SDValue &getIndex()   const { return getOperand(4); }
getMask()1482      const SDValue &getMask()    const { return getOperand(2); }
getValue()1483      const SDValue &getValue()   const { return getOperand(1); }
getScale()1484      const SDValue &getScale()   const { return getOperand(5); }
1485  
classof(const SDNode * N)1486      static bool classof(const SDNode *N) {
1487        return N->getOpcode() == X86ISD::MGATHER ||
1488               N->getOpcode() == X86ISD::MSCATTER;
1489      }
1490    };
1491  
1492    class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode {
1493    public:
X86MaskedGatherSDNode(unsigned Order,const DebugLoc & dl,SDVTList VTs,EVT MemVT,MachineMemOperand * MMO)1494      X86MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
1495                            EVT MemVT, MachineMemOperand *MMO)
1496          : X86MaskedGatherScatterSDNode(X86ISD::MGATHER, Order, dl, VTs, MemVT,
1497                                         MMO) {}
1498  
classof(const SDNode * N)1499      static bool classof(const SDNode *N) {
1500        return N->getOpcode() == X86ISD::MGATHER;
1501      }
1502    };
1503  
1504    class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode {
1505    public:
X86MaskedScatterSDNode(unsigned Order,const DebugLoc & dl,SDVTList VTs,EVT MemVT,MachineMemOperand * MMO)1506      X86MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
1507                             EVT MemVT, MachineMemOperand *MMO)
1508          : X86MaskedGatherScatterSDNode(X86ISD::MSCATTER, Order, dl, VTs, MemVT,
1509                                         MMO) {}
1510  
classof(const SDNode * N)1511      static bool classof(const SDNode *N) {
1512        return N->getOpcode() == X86ISD::MSCATTER;
1513      }
1514    };
1515  
1516    /// Generate unpacklo/unpackhi shuffle mask.
1517    template <typename T = int>
createUnpackShuffleMask(MVT VT,SmallVectorImpl<T> & Mask,bool Lo,bool Unary)1518    void createUnpackShuffleMask(MVT VT, SmallVectorImpl<T> &Mask, bool Lo,
1519                                 bool Unary) {
1520      assert(Mask.empty() && "Expected an empty shuffle mask vector");
1521      int NumElts = VT.getVectorNumElements();
1522      int NumEltsInLane = 128 / VT.getScalarSizeInBits();
1523      for (int i = 0; i < NumElts; ++i) {
1524        unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane;
1525        int Pos = (i % NumEltsInLane) / 2 + LaneStart;
1526        Pos += (Unary ? 0 : NumElts * (i % 2));
1527        Pos += (Lo ? 0 : NumEltsInLane / 2);
1528        Mask.push_back(Pos);
1529      }
1530    }
1531  
1532    /// Helper function to scale a shuffle or target shuffle mask, replacing each
1533    /// mask index with the scaled sequential indices for an equivalent narrowed
1534    /// mask. This is the reverse process to canWidenShuffleElements, but can
1535    /// always succeed.
1536    template <typename T>
scaleShuffleMask(int Scale,ArrayRef<T> Mask,SmallVectorImpl<T> & ScaledMask)1537    void scaleShuffleMask(int Scale, ArrayRef<T> Mask,
1538                          SmallVectorImpl<T> &ScaledMask) {
1539      assert(0 < Scale && "Unexpected scaling factor");
1540      int NumElts = Mask.size();
1541      ScaledMask.assign(static_cast<size_t>(NumElts * Scale), -1);
1542  
1543      for (int i = 0; i != NumElts; ++i) {
1544        int M = Mask[i];
1545  
1546        // Repeat sentinel values in every mask element.
1547        if (M < 0) {
1548          for (int s = 0; s != Scale; ++s)
1549            ScaledMask[(Scale * i) + s] = M;
1550          continue;
1551        }
1552  
1553        // Scale mask element and increment across each mask element.
1554        for (int s = 0; s != Scale; ++s)
1555          ScaledMask[(Scale * i) + s] = (Scale * M) + s;
1556      }
1557    }
1558  } // end namespace llvm
1559  
1560  #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
1561