1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the interfaces that X86 uses to lower LLVM code into a
11 // selection DAG.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
16 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
17 
18 #include "llvm/CodeGen/CallingConvLower.h"
19 #include "llvm/CodeGen/SelectionDAG.h"
20 #include "llvm/Target/TargetLowering.h"
21 #include "llvm/Target/TargetOptions.h"
22 
23 namespace llvm {
24   class X86Subtarget;
25   class X86TargetMachine;
26 
27   namespace X86ISD {
28     // X86 Specific DAG Nodes
29     enum NodeType : unsigned {
30       // Start the numbering where the builtin ops leave off.
31       FIRST_NUMBER = ISD::BUILTIN_OP_END,
32 
33       /// Bit scan forward.
34       BSF,
35       /// Bit scan reverse.
36       BSR,
37 
38       /// Double shift instructions. These correspond to
39       /// X86::SHLDxx and X86::SHRDxx instructions.
40       SHLD,
41       SHRD,
42 
43       /// Bitwise logical AND of floating point values. This corresponds
44       /// to X86::ANDPS or X86::ANDPD.
45       FAND,
46 
47       /// Bitwise logical OR of floating point values. This corresponds
48       /// to X86::ORPS or X86::ORPD.
49       FOR,
50 
51       /// Bitwise logical XOR of floating point values. This corresponds
52       /// to X86::XORPS or X86::XORPD.
53       FXOR,
54 
55       ///  Bitwise logical ANDNOT of floating point values. This
56       /// corresponds to X86::ANDNPS or X86::ANDNPD.
57       FANDN,
58 
59       /// These operations represent an abstract X86 call
60       /// instruction, which includes a bunch of information.  In particular the
61       /// operands of these node are:
62       ///
63       ///     #0 - The incoming token chain
64       ///     #1 - The callee
65       ///     #2 - The number of arg bytes the caller pushes on the stack.
66       ///     #3 - The number of arg bytes the callee pops off the stack.
67       ///     #4 - The value to pass in AL/AX/EAX (optional)
68       ///     #5 - The value to pass in DL/DX/EDX (optional)
69       ///
70       /// The result values of these nodes are:
71       ///
72       ///     #0 - The outgoing token chain
73       ///     #1 - The first register result value (optional)
74       ///     #2 - The second register result value (optional)
75       ///
76       CALL,
77 
78       /// This operation implements the lowering for readcyclecounter.
79       RDTSC_DAG,
80 
81       /// X86 Read Time-Stamp Counter and Processor ID.
82       RDTSCP_DAG,
83 
84       /// X86 Read Performance Monitoring Counters.
85       RDPMC_DAG,
86 
87       /// X86 compare and logical compare instructions.
88       CMP, COMI, UCOMI,
89 
90       /// X86 bit-test instructions.
91       BT,
92 
93       /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
94       /// operand, usually produced by a CMP instruction.
95       SETCC,
96 
97       /// X86 Select
98       SELECT,
99 
100       // Same as SETCC except it's materialized with a sbb and the value is all
101       // one's or all zero's.
102       SETCC_CARRY,  // R = carry_bit ? ~0 : 0
103 
104       /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
105       /// Operands are two FP values to compare; result is a mask of
106       /// 0s or 1s.  Generally DTRT for C/C++ with NaNs.
107       FSETCC,
108 
109       /// X86 conditional moves. Operand 0 and operand 1 are the two values
110       /// to select from. Operand 2 is the condition code, and operand 3 is the
111       /// flag operand produced by a CMP or TEST instruction. It also writes a
112       /// flag result.
113       CMOV,
114 
115       /// X86 conditional branches. Operand 0 is the chain operand, operand 1
116       /// is the block to branch if condition is true, operand 2 is the
117       /// condition code, and operand 3 is the flag operand produced by a CMP
118       /// or TEST instruction.
119       BRCOND,
120 
121       /// Return with a flag operand. Operand 0 is the chain operand, operand
122       /// 1 is the number of bytes of stack to pop.
123       RET_FLAG,
124 
125       /// Return from interrupt. Operand 0 is the number of bytes to pop.
126       IRET,
127 
128       /// Repeat fill, corresponds to X86::REP_STOSx.
129       REP_STOS,
130 
131       /// Repeat move, corresponds to X86::REP_MOVSx.
132       REP_MOVS,
133 
134       /// On Darwin, this node represents the result of the popl
135       /// at function entry, used for PIC code.
136       GlobalBaseReg,
137 
138       /// A wrapper node for TargetConstantPool,
139       /// TargetExternalSymbol, and TargetGlobalAddress.
140       Wrapper,
141 
142       /// Special wrapper used under X86-64 PIC mode for RIP
143       /// relative displacements.
144       WrapperRIP,
145 
146       /// Copies a 64-bit value from the low word of an XMM vector
147       /// to an MMX vector.  If you think this is too close to the previous
148       /// mnemonic, so do I; blame Intel.
149       MOVDQ2Q,
150 
151       /// Copies a 32-bit value from the low word of a MMX
152       /// vector to a GPR.
153       MMX_MOVD2W,
154 
155       /// Copies a GPR into the low 32-bit word of a MMX vector
156       /// and zero out the high word.
157       MMX_MOVW2D,
158 
159       /// Extract an 8-bit value from a vector and zero extend it to
160       /// i32, corresponds to X86::PEXTRB.
161       PEXTRB,
162 
163       /// Extract a 16-bit value from a vector and zero extend it to
164       /// i32, corresponds to X86::PEXTRW.
165       PEXTRW,
166 
167       /// Insert any element of a 4 x float vector into any element
168       /// of a destination 4 x floatvector.
169       INSERTPS,
170 
171       /// Insert the lower 8-bits of a 32-bit value to a vector,
172       /// corresponds to X86::PINSRB.
173       PINSRB,
174 
175       /// Insert the lower 16-bits of a 32-bit value to a vector,
176       /// corresponds to X86::PINSRW.
177       PINSRW, MMX_PINSRW,
178 
179       /// Shuffle 16 8-bit values within a vector.
180       PSHUFB,
181 
182       /// Compute Sum of Absolute Differences.
183       PSADBW,
184       /// Compute Double Block Packed Sum-Absolute-Differences
185       DBPSADBW,
186 
187       /// Bitwise Logical AND NOT of Packed FP values.
188       ANDNP,
189 
190       /// Blend where the selector is an immediate.
191       BLENDI,
192 
193       /// Blend where the condition has been shrunk.
194       /// This is used to emphasize that the condition mask is
195       /// no more valid for generic VSELECT optimizations.
196       SHRUNKBLEND,
197 
198       /// Combined add and sub on an FP vector.
199       ADDSUB,
200 
201       //  FP vector ops with rounding mode.
202       FADD_RND,
203       FSUB_RND,
204       FMUL_RND,
205       FDIV_RND,
206       FMAX_RND,
207       FMIN_RND,
208       FSQRT_RND,
209 
210       // FP vector get exponent.
211       FGETEXP_RND,
212       // Extract Normalized Mantissas.
213       VGETMANT,
214       // FP Scale.
215       SCALEF,
216       SCALEFS,
217 
218       // Integer add/sub with unsigned saturation.
219       ADDUS,
220       SUBUS,
221 
222       // Integer add/sub with signed saturation.
223       ADDS,
224       SUBS,
225 
226       // Unsigned Integer average.
227       AVG,
228 
229       /// Integer horizontal add/sub.
230       HADD,
231       HSUB,
232 
233       /// Floating point horizontal add/sub.
234       FHADD,
235       FHSUB,
236 
237       // Integer absolute value
238       ABS,
239 
240       // Detect Conflicts Within a Vector
241       CONFLICT,
242 
243       /// Floating point max and min.
244       FMAX, FMIN,
245 
246       /// Commutative FMIN and FMAX.
247       FMAXC, FMINC,
248 
249       /// Floating point reciprocal-sqrt and reciprocal approximation.
250       /// Note that these typically require refinement
251       /// in order to obtain suitable precision.
252       FRSQRT, FRCP,
253       FRSQRTS, FRCPS,
254 
255       // Thread Local Storage.
256       TLSADDR,
257 
258       // Thread Local Storage. A call to get the start address
259       // of the TLS block for the current module.
260       TLSBASEADDR,
261 
262       // Thread Local Storage.  When calling to an OS provided
263       // thunk at the address from an earlier relocation.
264       TLSCALL,
265 
266       // Exception Handling helpers.
267       EH_RETURN,
268 
269       // SjLj exception handling setjmp.
270       EH_SJLJ_SETJMP,
271 
272       // SjLj exception handling longjmp.
273       EH_SJLJ_LONGJMP,
274 
275       // SjLj exception handling dispatch.
276       EH_SJLJ_SETUP_DISPATCH,
277 
278       /// Tail call return. See X86TargetLowering::LowerCall for
279       /// the list of operands.
280       TC_RETURN,
281 
282       // Vector move to low scalar and zero higher vector elements.
283       VZEXT_MOVL,
284 
285       // Vector integer zero-extend.
286       VZEXT,
287       // Vector integer signed-extend.
288       VSEXT,
289 
290       // Vector integer truncate.
291       VTRUNC,
292       // Vector integer truncate with unsigned/signed saturation.
293       VTRUNCUS, VTRUNCS,
294 
295       // Vector FP extend.
296       VFPEXT,
297 
298       // Vector FP round.
299       VFPROUND,
300 
301       // Vector signed/unsigned integer to double.
302       CVTDQ2PD, CVTUDQ2PD,
303 
304       // Convert a vector to mask, set bits base on MSB.
305       CVT2MASK,
306 
307       // 128-bit vector logical left / right shift
308       VSHLDQ, VSRLDQ,
309 
310       // Vector shift elements
311       VSHL, VSRL, VSRA,
312 
313       // Vector variable shift right arithmetic.
314       // Unlike ISD::SRA, in case shift count greater then element size
315       // use sign bit to fill destination data element.
316       VSRAV,
317 
318       // Vector shift elements by immediate
319       VSHLI, VSRLI, VSRAI,
320 
321       // Bit rotate by immediate
322       VROTLI, VROTRI,
323 
324       // Vector packed double/float comparison.
325       CMPP,
326 
327       // Vector integer comparisons.
328       PCMPEQ, PCMPGT,
329       // Vector integer comparisons, the result is in a mask vector.
330       PCMPEQM, PCMPGTM,
331 
332       MULTISHIFT,
333 
334       /// Vector comparison generating mask bits for fp and
335       /// integer signed and unsigned data types.
336       CMPM,
337       CMPMU,
338       // Vector comparison with rounding mode for FP values
339       CMPM_RND,
340 
341       // Arithmetic operations with FLAGS results.
342       ADD, SUB, ADC, SBB, SMUL,
343       INC, DEC, OR, XOR, AND,
344 
345       // Bit field extract.
346       BEXTR,
347 
348       // LOW, HI, FLAGS = umul LHS, RHS.
349       UMUL,
350 
351       // 8-bit SMUL/UMUL - AX, FLAGS = smul8/umul8 AL, RHS.
352       SMUL8, UMUL8,
353 
354       // 8-bit divrem that zero-extend the high result (AH).
355       UDIVREM8_ZEXT_HREG,
356       SDIVREM8_SEXT_HREG,
357 
358       // X86-specific multiply by immediate.
359       MUL_IMM,
360 
361       // Vector sign bit extraction.
362       MOVMSK,
363 
364       // Vector bitwise comparisons.
365       PTEST,
366 
367       // Vector packed fp sign bitwise comparisons.
368       TESTP,
369 
370       // Vector "test" in AVX-512, the result is in a mask vector.
371       TESTM,
372       TESTNM,
373 
374       // OR/AND test for masks.
375       KORTEST,
376       KTEST,
377 
378       // Several flavors of instructions with vector shuffle behaviors.
379       // Saturated signed/unnsigned packing.
380       PACKSS,
381       PACKUS,
382       // Intra-lane alignr.
383       PALIGNR,
384       // AVX512 inter-lane alignr.
385       VALIGN,
386       PSHUFD,
387       PSHUFHW,
388       PSHUFLW,
389       SHUFP,
390       //Shuffle Packed Values at 128-bit granularity.
391       SHUF128,
392       MOVDDUP,
393       MOVSHDUP,
394       MOVSLDUP,
395       MOVLHPS,
396       MOVLHPD,
397       MOVHLPS,
398       MOVLPS,
399       MOVLPD,
400       MOVSD,
401       MOVSS,
402       UNPCKL,
403       UNPCKH,
404       VPERMILPV,
405       VPERMILPI,
406       VPERMI,
407       VPERM2X128,
408 
409       // Variable Permute (VPERM).
410       // Res = VPERMV MaskV, V0
411       VPERMV,
412 
413       // 3-op Variable Permute (VPERMT2).
414       // Res = VPERMV3 V0, MaskV, V1
415       VPERMV3,
416 
417       // 3-op Variable Permute overwriting the index (VPERMI2).
418       // Res = VPERMIV3 V0, MaskV, V1
419       VPERMIV3,
420 
421       // Bitwise ternary logic.
422       VPTERNLOG,
423       // Fix Up Special Packed Float32/64 values.
424       VFIXUPIMM,
425       VFIXUPIMMS,
426       // Range Restriction Calculation For Packed Pairs of Float32/64 values.
427       VRANGE,
428       // Reduce - Perform Reduction Transformation on scalar\packed FP.
429       VREDUCE,
430       // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
431       VRNDSCALE,
432       // Tests Types Of a FP Values for packed types.
433       VFPCLASS,
434       // Tests Types Of a FP Values for scalar types.
435       VFPCLASSS,
436 
437       // Broadcast scalar to vector.
438       VBROADCAST,
439       // Broadcast mask to vector.
440       VBROADCASTM,
441       // Broadcast subvector to vector.
442       SUBV_BROADCAST,
443 
444       // Insert/Extract vector element.
445       VINSERT,
446       VEXTRACT,
447 
448       /// SSE4A Extraction and Insertion.
449       EXTRQI, INSERTQI,
450 
451       // XOP variable/immediate rotations.
452       VPROT, VPROTI,
453       // XOP arithmetic/logical shifts.
454       VPSHA, VPSHL,
455       // XOP signed/unsigned integer comparisons.
456       VPCOM, VPCOMU,
457       // XOP packed permute bytes.
458       VPPERM,
459       // XOP two source permutation.
460       VPERMIL2,
461 
462       // Vector multiply packed unsigned doubleword integers.
463       PMULUDQ,
464       // Vector multiply packed signed doubleword integers.
465       PMULDQ,
466       // Vector Multiply Packed UnsignedIntegers with Round and Scale.
467       MULHRS,
468 
469       // Multiply and Add Packed Integers.
470       VPMADDUBSW, VPMADDWD,
471       VPMADD52L, VPMADD52H,
472 
473       // FMA nodes.
474       FMADD,
475       FNMADD,
476       FMSUB,
477       FNMSUB,
478       FMADDSUB,
479       FMSUBADD,
480 
481       // FMA with rounding mode.
482       FMADD_RND,
483       FNMADD_RND,
484       FMSUB_RND,
485       FNMSUB_RND,
486       FMADDSUB_RND,
487       FMSUBADD_RND,
488 
489       // Compress and expand.
490       COMPRESS,
491       EXPAND,
492 
493       // Convert Unsigned/Integer to Scalar Floating-Point Value
494       // with rounding mode.
495       SINT_TO_FP_RND,
496       UINT_TO_FP_RND,
497 
498       // Vector float/double to signed/unsigned integer.
499       FP_TO_SINT_RND, FP_TO_UINT_RND,
500       // Scalar float/double to signed/unsigned integer.
501       SCALAR_FP_TO_SINT_RND, SCALAR_FP_TO_UINT_RND,
502 
503       // Save xmm argument registers to the stack, according to %al. An operator
504       // is needed so that this can be expanded with control flow.
505       VASTART_SAVE_XMM_REGS,
506 
507       // Windows's _chkstk call to do stack probing.
508       WIN_ALLOCA,
509 
510       // For allocating variable amounts of stack space when using
511       // segmented stacks. Check if the current stacklet has enough space, and
512       // falls back to heap allocation if not.
513       SEG_ALLOCA,
514 
515       // Memory barriers.
516       MEMBARRIER,
517       MFENCE,
518 
519       // Store FP status word into i16 register.
520       FNSTSW16r,
521 
522       // Store contents of %ah into %eflags.
523       SAHF,
524 
525       // Get a random integer and indicate whether it is valid in CF.
526       RDRAND,
527 
528       // Get a NIST SP800-90B & C compliant random integer and
529       // indicate whether it is valid in CF.
530       RDSEED,
531 
532       // SSE42 string comparisons.
533       PCMPISTRI,
534       PCMPESTRI,
535 
536       // Test if in transactional execution.
537       XTEST,
538 
539       // ERI instructions.
540       RSQRT28, RCP28, EXP2,
541 
542       // Compare and swap.
543       LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
544       LCMPXCHG8_DAG,
545       LCMPXCHG16_DAG,
546       LCMPXCHG8_SAVE_EBX_DAG,
547       LCMPXCHG16_SAVE_RBX_DAG,
548 
549       /// LOCK-prefixed arithmetic read-modify-write instructions.
550       /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
551       LADD, LSUB, LOR, LXOR, LAND,
552 
553       // Load, scalar_to_vector, and zero extend.
554       VZEXT_LOAD,
555 
556       // Store FP control world into i16 memory.
557       FNSTCW16m,
558 
559       /// This instruction implements FP_TO_SINT with the
560       /// integer destination in memory and a FP reg source.  This corresponds
561       /// to the X86::FIST*m instructions and the rounding mode change stuff. It
562       /// has two inputs (token chain and address) and two outputs (int value
563       /// and token chain).
564       FP_TO_INT16_IN_MEM,
565       FP_TO_INT32_IN_MEM,
566       FP_TO_INT64_IN_MEM,
567 
568       /// This instruction implements SINT_TO_FP with the
569       /// integer source in memory and FP reg result.  This corresponds to the
570       /// X86::FILD*m instructions. It has three inputs (token chain, address,
571       /// and source type) and two outputs (FP value and token chain). FILD_FLAG
572       /// also produces a flag).
573       FILD,
574       FILD_FLAG,
575 
576       /// This instruction implements an extending load to FP stack slots.
577       /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
578       /// operand, ptr to load from, and a ValueType node indicating the type
579       /// to load to.
580       FLD,
581 
582       /// This instruction implements a truncating store to FP stack
583       /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
584       /// chain operand, value to store, address, and a ValueType to store it
585       /// as.
586       FST,
587 
588       /// This instruction grabs the address of the next argument
589       /// from a va_list. (reads and modifies the va_list in memory)
590       VAARG_64
591 
592       // WARNING: Do not add anything in the end unless you want the node to
593       // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
594       // opcodes will be thought as target memory ops!
595     };
596   } // end namespace X86ISD
597 
598   /// Define some predicates that are used for node matching.
599   namespace X86 {
600     /// Return true if the specified
601     /// EXTRACT_SUBVECTOR operand specifies a vector extract that is
602     /// suitable for input to VEXTRACTF128, VEXTRACTI128 instructions.
603     bool isVEXTRACT128Index(SDNode *N);
604 
605     /// Return true if the specified
606     /// INSERT_SUBVECTOR operand specifies a subvector insert that is
607     /// suitable for input to VINSERTF128, VINSERTI128 instructions.
608     bool isVINSERT128Index(SDNode *N);
609 
610     /// Return true if the specified
611     /// EXTRACT_SUBVECTOR operand specifies a vector extract that is
612     /// suitable for input to VEXTRACTF64X4, VEXTRACTI64X4 instructions.
613     bool isVEXTRACT256Index(SDNode *N);
614 
615     /// Return true if the specified
616     /// INSERT_SUBVECTOR operand specifies a subvector insert that is
617     /// suitable for input to VINSERTF64X4, VINSERTI64X4 instructions.
618     bool isVINSERT256Index(SDNode *N);
619 
620     /// Return the appropriate
621     /// immediate to extract the specified EXTRACT_SUBVECTOR index
622     /// with VEXTRACTF128, VEXTRACTI128 instructions.
623     unsigned getExtractVEXTRACT128Immediate(SDNode *N);
624 
625     /// Return the appropriate
626     /// immediate to insert at the specified INSERT_SUBVECTOR index
627     /// with VINSERTF128, VINSERT128 instructions.
628     unsigned getInsertVINSERT128Immediate(SDNode *N);
629 
630     /// Return the appropriate
631     /// immediate to extract the specified EXTRACT_SUBVECTOR index
632     /// with VEXTRACTF64X4, VEXTRACTI64x4 instructions.
633     unsigned getExtractVEXTRACT256Immediate(SDNode *N);
634 
635     /// Return the appropriate
636     /// immediate to insert at the specified INSERT_SUBVECTOR index
637     /// with VINSERTF64x4, VINSERTI64x4 instructions.
638     unsigned getInsertVINSERT256Immediate(SDNode *N);
639 
640     /// Returns true if Elt is a constant zero or floating point constant +0.0.
641     bool isZeroNode(SDValue Elt);
642 
643     /// Returns true of the given offset can be
644     /// fit into displacement field of the instruction.
645     bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
646                                       bool hasSymbolicDisplacement = true);
647 
648     /// Determines whether the callee is required to pop its
649     /// own arguments. Callee pop is necessary to support tail calls.
650     bool isCalleePop(CallingConv::ID CallingConv,
651                      bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
652 
653   } // end namespace X86
654 
655   //===--------------------------------------------------------------------===//
656   //  X86 Implementation of the TargetLowering interface
657   class X86TargetLowering final : public TargetLowering {
658   public:
659     explicit X86TargetLowering(const X86TargetMachine &TM,
660                                const X86Subtarget &STI);
661 
662     unsigned getJumpTableEncoding() const override;
663     bool useSoftFloat() const override;
664 
getScalarShiftAmountTy(const DataLayout &,EVT)665     MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
666       return MVT::i8;
667     }
668 
669     const MCExpr *
670     LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
671                               const MachineBasicBlock *MBB, unsigned uid,
672                               MCContext &Ctx) const override;
673 
674     /// Returns relocation base for the given PIC jumptable.
675     SDValue getPICJumpTableRelocBase(SDValue Table,
676                                      SelectionDAG &DAG) const override;
677     const MCExpr *
678     getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
679                                  unsigned JTI, MCContext &Ctx) const override;
680 
681     /// Return the desired alignment for ByVal aggregate
682     /// function arguments in the caller parameter area. For X86, aggregates
683     /// that contains are placed at 16-byte boundaries while the rest are at
684     /// 4-byte boundaries.
685     unsigned getByValTypeAlignment(Type *Ty,
686                                    const DataLayout &DL) const override;
687 
688     /// Returns the target specific optimal type for load
689     /// and store operations as a result of memset, memcpy, and memmove
690     /// lowering. If DstAlign is zero that means it's safe to destination
691     /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
692     /// means there isn't a need to check it against alignment requirement,
693     /// probably because the source does not need to be loaded. If 'IsMemset' is
694     /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
695     /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
696     /// source is constant so it does not need to be loaded.
697     /// It returns EVT::Other if the type should be determined using generic
698     /// target-independent logic.
699     EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
700                             bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
701                             MachineFunction &MF) const override;
702 
703     /// Returns true if it's safe to use load / store of the
704     /// specified type to expand memcpy / memset inline. This is mostly true
705     /// for all types except for some special cases. For example, on X86
706     /// targets without SSE2 f64 load / store are done with fldl / fstpl which
707     /// also does type conversion. Note the specified type doesn't have to be
708     /// legal as the hook is used before type legalization.
709     bool isSafeMemOpType(MVT VT) const override;
710 
711     /// Returns true if the target allows unaligned memory accesses of the
712     /// specified type. Returns whether it is "fast" in the last argument.
713     bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align,
714                                        bool *Fast) const override;
715 
716     /// Provide custom lowering hooks for some operations.
717     ///
718     SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
719 
720     /// Places new result values for the node in Results (their number
721     /// and types must exactly match those of the original return values of
722     /// the node), or leaves Results empty, which indicates that the node is not
723     /// to be custom lowered after all.
724     void LowerOperationWrapper(SDNode *N,
725                                SmallVectorImpl<SDValue> &Results,
726                                SelectionDAG &DAG) const override;
727 
728     /// Replace the results of node with an illegal result
729     /// type with new values built out of custom code.
730     ///
731     void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
732                             SelectionDAG &DAG) const override;
733 
734     SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
735 
736     /// Return true if the target has native support for
737     /// the specified value type and it is 'desirable' to use the type for the
738     /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
739     /// instruction encodings are longer and some i16 instructions are slow.
740     bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
741 
742     /// Return true if the target has native support for the
743     /// specified value type and it is 'desirable' to use the type. e.g. On x86
744     /// i16 is legal, but undesirable since i16 instruction encodings are longer
745     /// and some i16 instructions are slow.
746     bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
747 
748     /// Return true if the MachineFunction contains a COPY which would imply
749     /// HasOpaqueSPAdjustment.
750     bool hasCopyImplyingStackAdjustment(MachineFunction *MF) const override;
751 
752     MachineBasicBlock *
753     EmitInstrWithCustomInserter(MachineInstr &MI,
754                                 MachineBasicBlock *MBB) const override;
755 
756     /// This method returns the name of a target specific DAG node.
757     const char *getTargetNodeName(unsigned Opcode) const override;
758 
759     bool isCheapToSpeculateCttz() const override;
760 
761     bool isCheapToSpeculateCtlz() const override;
762 
hasBitPreservingFPLogic(EVT VT)763     bool hasBitPreservingFPLogic(EVT VT) const override {
764       return VT == MVT::f32 || VT == MVT::f64 || VT.isVector();
765     }
766 
767     bool hasAndNotCompare(SDValue Y) const override;
768 
769     /// Return the value type to use for ISD::SETCC.
770     EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
771                            EVT VT) const override;
772 
773     /// Determine which of the bits specified in Mask are known to be either
774     /// zero or one and return them in the KnownZero/KnownOne bitsets.
775     void computeKnownBitsForTargetNode(const SDValue Op,
776                                        APInt &KnownZero,
777                                        APInt &KnownOne,
778                                        const SelectionDAG &DAG,
779                                        unsigned Depth = 0) const override;
780 
781     /// Determine the number of bits in the operation that are sign bits.
782     unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
783                                              const SelectionDAG &DAG,
784                                              unsigned Depth) const override;
785 
786     bool isGAPlusOffset(SDNode *N, const GlobalValue* &GA,
787                         int64_t &Offset) const override;
788 
789     SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
790 
791     bool ExpandInlineAsm(CallInst *CI) const override;
792 
793     ConstraintType getConstraintType(StringRef Constraint) const override;
794 
795     /// Examine constraint string and operand type and determine a weight value.
796     /// The operand object must already have been set up with the operand type.
797     ConstraintWeight
798       getSingleConstraintMatchWeight(AsmOperandInfo &info,
799                                      const char *constraint) const override;
800 
801     const char *LowerXConstraint(EVT ConstraintVT) const override;
802 
803     /// Lower the specified operand into the Ops vector. If it is invalid, don't
804     /// add anything to Ops. If hasMemory is true it means one of the asm
805     /// constraint of the inline asm instruction being processed is 'm'.
806     void LowerAsmOperandForConstraint(SDValue Op,
807                                       std::string &Constraint,
808                                       std::vector<SDValue> &Ops,
809                                       SelectionDAG &DAG) const override;
810 
811     unsigned
getInlineAsmMemConstraint(StringRef ConstraintCode)812     getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
813       if (ConstraintCode == "i")
814         return InlineAsm::Constraint_i;
815       else if (ConstraintCode == "o")
816         return InlineAsm::Constraint_o;
817       else if (ConstraintCode == "v")
818         return InlineAsm::Constraint_v;
819       else if (ConstraintCode == "X")
820         return InlineAsm::Constraint_X;
821       return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
822     }
823 
824     /// Given a physical register constraint
825     /// (e.g. {edx}), return the register number and the register class for the
826     /// register.  This should only be used for C_Register constraints.  On
827     /// error, this returns a register number of 0.
828     std::pair<unsigned, const TargetRegisterClass *>
829     getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
830                                  StringRef Constraint, MVT VT) const override;
831 
832     /// Return true if the addressing mode represented
833     /// by AM is legal for this target, for a load/store of the specified type.
834     bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
835                                Type *Ty, unsigned AS) const override;
836 
837     /// Return true if the specified immediate is legal
838     /// icmp immediate, that is the target has icmp instructions which can
839     /// compare a register against the immediate without having to materialize
840     /// the immediate into a register.
841     bool isLegalICmpImmediate(int64_t Imm) const override;
842 
843     /// Return true if the specified immediate is legal
844     /// add immediate, that is the target has add instructions which can
845     /// add a register and the immediate without having to materialize
846     /// the immediate into a register.
847     bool isLegalAddImmediate(int64_t Imm) const override;
848 
849     /// \brief Return the cost of the scaling factor used in the addressing
850     /// mode represented by AM for this target, for a load/store
851     /// of the specified type.
852     /// If the AM is supported, the return value must be >= 0.
853     /// If the AM is not supported, it returns a negative value.
854     int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty,
855                              unsigned AS) const override;
856 
857     bool isVectorShiftByScalarCheap(Type *Ty) const override;
858 
859     /// Return true if it's free to truncate a value of
860     /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
861     /// register EAX to i16 by referencing its sub-register AX.
862     bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
863     bool isTruncateFree(EVT VT1, EVT VT2) const override;
864 
865     bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
866 
867     /// Return true if any actual instruction that defines a
868     /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
869     /// register. This does not necessarily include registers defined in
870     /// unknown ways, such as incoming arguments, or copies from unknown
871     /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
872     /// does not necessarily apply to truncate instructions. e.g. on x86-64,
873     /// all instructions that define 32-bit values implicit zero-extend the
874     /// result out to 64 bits.
875     bool isZExtFree(Type *Ty1, Type *Ty2) const override;
876     bool isZExtFree(EVT VT1, EVT VT2) const override;
877     bool isZExtFree(SDValue Val, EVT VT2) const override;
878 
879     /// Return true if folding a vector load into ExtVal (a sign, zero, or any
880     /// extend node) is profitable.
881     bool isVectorLoadExtDesirable(SDValue) const override;
882 
883     /// Return true if an FMA operation is faster than a pair of fmul and fadd
884     /// instructions. fmuladd intrinsics will be expanded to FMAs when this
885     /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
886     bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
887 
888     /// Return true if it's profitable to narrow
889     /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
890     /// from i32 to i8 but not from i32 to i16.
891     bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
892 
893     /// Given an intrinsic, checks if on the target the intrinsic will need to map
894     /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
895     /// true and stores the intrinsic information into the IntrinsicInfo that was
896     /// passed to the function.
897     bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
898                             unsigned Intrinsic) const override;
899 
900     /// Returns true if the target can instruction select the
901     /// specified FP immediate natively. If false, the legalizer will
902     /// materialize the FP immediate as a load from a constant pool.
903     bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
904 
905     /// Targets can use this to indicate that they only support *some*
906     /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
907     /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
908     /// be legal.
909     bool isShuffleMaskLegal(const SmallVectorImpl<int> &Mask,
910                             EVT VT) const override;
911 
912     /// Similar to isShuffleMaskLegal. This is used by Targets can use this to
913     /// indicate if there is a suitable VECTOR_SHUFFLE that can be used to
914     /// replace a VAND with a constant pool entry.
915     bool isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
916                                 EVT VT) const override;
917 
918     /// If true, then instruction selection should
919     /// seek to shrink the FP constant of the specified type to a smaller type
920     /// in order to save space and / or reduce runtime.
ShouldShrinkFPConstant(EVT VT)921     bool ShouldShrinkFPConstant(EVT VT) const override {
922       // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
923       // expensive than a straight movsd. On the other hand, it's important to
924       // shrink long double fp constant since fldt is very slow.
925       return !X86ScalarSSEf64 || VT == MVT::f80;
926     }
927 
928     /// Return true if we believe it is correct and profitable to reduce the
929     /// load node to a smaller type.
930     bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
931                                EVT NewVT) const override;
932 
933     /// Return true if the specified scalar FP type is computed in an SSE
934     /// register, not on the X87 floating point stack.
isScalarFPTypeInSSEReg(EVT VT)935     bool isScalarFPTypeInSSEReg(EVT VT) const {
936       return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
937              (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
938     }
939 
940     /// \brief Returns true if it is beneficial to convert a load of a constant
941     /// to just the constant itself.
942     bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
943                                            Type *Ty) const override;
944 
945     /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
946     /// with this index.
947     bool isExtractSubvectorCheap(EVT ResVT, unsigned Index) const override;
948 
949     /// Intel processors have a unified instruction and data cache
getClearCacheBuiltinName()950     const char * getClearCacheBuiltinName() const override {
951       return nullptr; // nothing to do, move along.
952     }
953 
954     unsigned getRegisterByName(const char* RegName, EVT VT,
955                                SelectionDAG &DAG) const override;
956 
957     /// If a physical register, this returns the register that receives the
958     /// exception address on entry to an EH pad.
959     unsigned
960     getExceptionPointerRegister(const Constant *PersonalityFn) const override;
961 
962     /// If a physical register, this returns the register that receives the
963     /// exception typeid on entry to a landing pad.
964     unsigned
965     getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
966 
967     virtual bool needsFixedCatchObjects() const override;
968 
969     /// This method returns a target specific FastISel object,
970     /// or null if the target does not support "fast" ISel.
971     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
972                              const TargetLibraryInfo *libInfo) const override;
973 
974     /// If the target has a standard location for the stack protector cookie,
975     /// returns the address of that location. Otherwise, returns nullptr.
976     Value *getIRStackGuard(IRBuilder<> &IRB) const override;
977 
978     bool useLoadStackGuardNode() const override;
979     void insertSSPDeclarations(Module &M) const override;
980     Value *getSDagStackGuard(const Module &M) const override;
981     Value *getSSPStackGuardCheck(const Module &M) const override;
982 
983     /// Return true if the target stores SafeStack pointer at a fixed offset in
984     /// some non-standard address space, and populates the address space and
985     /// offset as appropriate.
986     Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override;
987 
988     SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
989                       SelectionDAG &DAG) const;
990 
991     bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
992 
993     /// \brief Customize the preferred legalization strategy for certain types.
994     LegalizeTypeAction getPreferredVectorAction(EVT VT) const override;
995 
996     bool isIntDivCheap(EVT VT, AttributeSet Attr) const override;
997 
supportSwiftError()998     bool supportSwiftError() const override {
999       return true;
1000     }
1001 
1002   protected:
1003     std::pair<const TargetRegisterClass *, uint8_t>
1004     findRepresentativeClass(const TargetRegisterInfo *TRI,
1005                             MVT VT) const override;
1006 
1007   private:
1008     /// Keep a reference to the X86Subtarget around so that we can
1009     /// make the right decision when generating code for different targets.
1010     const X86Subtarget &Subtarget;
1011 
1012     /// Select between SSE or x87 floating point ops.
1013     /// When SSE is available, use it for f32 operations.
1014     /// When SSE2 is available, use it for f64 operations.
1015     bool X86ScalarSSEf32;
1016     bool X86ScalarSSEf64;
1017 
1018     /// A list of legal FP immediates.
1019     std::vector<APFloat> LegalFPImmediates;
1020 
1021     /// Indicate that this x86 target can instruction
1022     /// select the specified FP immediate natively.
addLegalFPImmediate(const APFloat & Imm)1023     void addLegalFPImmediate(const APFloat& Imm) {
1024       LegalFPImmediates.push_back(Imm);
1025     }
1026 
1027     SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
1028                             CallingConv::ID CallConv, bool isVarArg,
1029                             const SmallVectorImpl<ISD::InputArg> &Ins,
1030                             const SDLoc &dl, SelectionDAG &DAG,
1031                             SmallVectorImpl<SDValue> &InVals) const;
1032     SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1033                              const SmallVectorImpl<ISD::InputArg> &ArgInfo,
1034                              const SDLoc &dl, SelectionDAG &DAG,
1035                              const CCValAssign &VA, MachineFrameInfo *MFI,
1036                              unsigned i) const;
1037     SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
1038                              const SDLoc &dl, SelectionDAG &DAG,
1039                              const CCValAssign &VA,
1040                              ISD::ArgFlagsTy Flags) const;
1041 
1042     // Call lowering helpers.
1043 
1044     /// Check whether the call is eligible for tail call optimization. Targets
1045     /// that want to do tail call optimization should implement this function.
1046     bool IsEligibleForTailCallOptimization(SDValue Callee,
1047                                            CallingConv::ID CalleeCC,
1048                                            bool isVarArg,
1049                                            bool isCalleeStructRet,
1050                                            bool isCallerStructRet,
1051                                            Type *RetTy,
1052                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
1053                                     const SmallVectorImpl<SDValue> &OutVals,
1054                                     const SmallVectorImpl<ISD::InputArg> &Ins,
1055                                            SelectionDAG& DAG) const;
1056     SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
1057                                     SDValue Chain, bool IsTailCall,
1058                                     bool Is64Bit, int FPDiff,
1059                                     const SDLoc &dl) const;
1060 
1061     unsigned GetAlignedArgumentStackSize(unsigned StackSize,
1062                                          SelectionDAG &DAG) const;
1063 
1064     unsigned getAddressSpace(void) const;
1065 
1066     std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
1067                                                bool isSigned,
1068                                                bool isReplace) const;
1069 
1070     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1071     SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const;
1072     SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
1073     SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1074     SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) const;
1075     SDValue InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) const;
1076 
1077     SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1078     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1079     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1080     SDValue LowerGlobalAddress(const GlobalValue *GV, const SDLoc &dl,
1081                                int64_t Offset, SelectionDAG &DAG) const;
1082     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1083     SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1084     SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
1085     SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1086     SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1087     SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const;
1088     SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const;
1089     SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG) const;
1090     SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1091     SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
1092     SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const;
1093     SDValue LowerToBT(SDValue And, ISD::CondCode CC, const SDLoc &dl,
1094                       SelectionDAG &DAG) const;
1095     SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1096     SDValue LowerSETCCE(SDValue Op, SelectionDAG &DAG) const;
1097     SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1098     SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
1099     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1100     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1101     SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1102     SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1103     SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1104     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1105     SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1106     SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
1107     SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1108     SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1109     SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
1110     SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1111     SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
1112     SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
1113     SDValue LowerGC_TRANSITION_START(SDValue Op, SelectionDAG &DAG) const;
1114     SDValue LowerGC_TRANSITION_END(SDValue Op, SelectionDAG &DAG) const;
1115 
1116     SDValue
1117     LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1118                          const SmallVectorImpl<ISD::InputArg> &Ins,
1119                          const SDLoc &dl, SelectionDAG &DAG,
1120                          SmallVectorImpl<SDValue> &InVals) const override;
1121     SDValue LowerCall(CallLoweringInfo &CLI,
1122                       SmallVectorImpl<SDValue> &InVals) const override;
1123 
1124     SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1125                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1126                         const SmallVectorImpl<SDValue> &OutVals,
1127                         const SDLoc &dl, SelectionDAG &DAG) const override;
1128 
supportSplitCSR(MachineFunction * MF)1129     bool supportSplitCSR(MachineFunction *MF) const override {
1130       return MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&
1131           MF->getFunction()->hasFnAttribute(Attribute::NoUnwind);
1132     }
1133     void initializeSplitCSR(MachineBasicBlock *Entry) const override;
1134     void insertCopiesSplitCSR(
1135       MachineBasicBlock *Entry,
1136       const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
1137 
1138     bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1139 
1140     bool mayBeEmittedAsTailCall(CallInst *CI) const override;
1141 
1142     EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1143                             ISD::NodeType ExtendKind) const override;
1144 
1145     bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1146                         bool isVarArg,
1147                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1148                         LLVMContext &Context) const override;
1149 
1150     const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1151 
1152     TargetLoweringBase::AtomicExpansionKind
1153     shouldExpandAtomicLoadInIR(LoadInst *SI) const override;
1154     bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
1155     TargetLoweringBase::AtomicExpansionKind
1156     shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
1157 
1158     LoadInst *
1159     lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
1160 
1161     bool needsCmpXchgNb(Type *MemType) const;
1162 
1163     void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
1164                                 MachineBasicBlock *DispatchBB, int FI) const;
1165 
1166     // Utility function to emit the low-level va_arg code for X86-64.
1167     MachineBasicBlock *
1168     EmitVAARG64WithCustomInserter(MachineInstr &MI,
1169                                   MachineBasicBlock *MBB) const;
1170 
1171     /// Utility function to emit the xmm reg save portion of va_start.
1172     MachineBasicBlock *
1173     EmitVAStartSaveXMMRegsWithCustomInserter(MachineInstr &BInstr,
1174                                              MachineBasicBlock *BB) const;
1175 
1176     MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
1177                                          MachineBasicBlock *BB) const;
1178 
1179     MachineBasicBlock *EmitLoweredAtomicFP(MachineInstr &I,
1180                                            MachineBasicBlock *BB) const;
1181 
1182     MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
1183                                            MachineBasicBlock *BB) const;
1184 
1185     MachineBasicBlock *EmitLoweredCatchPad(MachineInstr &MI,
1186                                            MachineBasicBlock *BB) const;
1187 
1188     MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
1189                                             MachineBasicBlock *BB) const;
1190 
1191     MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
1192                                           MachineBasicBlock *BB) const;
1193 
1194     MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
1195                                           MachineBasicBlock *BB) const;
1196 
1197     MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
1198                                         MachineBasicBlock *MBB) const;
1199 
1200     MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
1201                                          MachineBasicBlock *MBB) const;
1202 
1203     MachineBasicBlock *emitFMA3Instr(MachineInstr &MI,
1204                                      MachineBasicBlock *MBB) const;
1205 
1206     MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
1207                                              MachineBasicBlock *MBB) const;
1208 
1209     /// Emit nodes that will be selected as "test Op0,Op0", or something
1210     /// equivalent, for use with the given x86 condition code.
1211     SDValue EmitTest(SDValue Op0, unsigned X86CC, const SDLoc &dl,
1212                      SelectionDAG &DAG) const;
1213 
1214     /// Emit nodes that will be selected as "cmp Op0,Op1", or something
1215     /// equivalent, for use with the given x86 condition code.
1216     SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, const SDLoc &dl,
1217                     SelectionDAG &DAG) const;
1218 
1219     /// Convert a comparison if required by the subtarget.
1220     SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const;
1221 
1222     /// Use rsqrt* to speed up sqrt calculations.
1223     SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI,
1224                              unsigned &RefinementSteps,
1225                              bool &UseOneConstNR) const override;
1226 
1227     /// Use rcp* to speed up fdiv calculations.
1228     SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI,
1229                              unsigned &RefinementSteps) const override;
1230 
1231     /// Reassociate floating point divisions into multiply by reciprocal.
1232     unsigned combineRepeatedFPDivisors() const override;
1233   };
1234 
1235   namespace X86 {
1236     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1237                              const TargetLibraryInfo *libInfo);
1238   } // end namespace X86
1239 } // end namespace llvm
1240 
1241 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
1242