• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  //===-- NVPTXISelLowering.h - NVPTX DAG Lowering Interface ------*- C++ -*-===//
2  //
3  //                     The LLVM Compiler Infrastructure
4  //
5  // This file is distributed under the University of Illinois Open Source
6  // License. See LICENSE.TXT for details.
7  //
8  //===----------------------------------------------------------------------===//
9  //
10  // This file defines the interfaces that NVPTX uses to lower LLVM code into a
11  // selection DAG.
12  //
13  //===----------------------------------------------------------------------===//
14  
15  #ifndef LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H
16  #define LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H
17  
18  #include "NVPTX.h"
19  #include "llvm/CodeGen/SelectionDAG.h"
20  #include "llvm/Target/TargetLowering.h"
21  
22  namespace llvm {
23  namespace NVPTXISD {
24  enum NodeType {
25    // Start the numbering from where ISD NodeType finishes.
26    FIRST_NUMBER = ISD::BUILTIN_OP_END,
27    Wrapper,
28    CALL,
29    RET_FLAG,
30    LOAD_PARAM,
31    DeclareParam,
32    DeclareScalarParam,
33    DeclareRetParam,
34    DeclareRet,
35    DeclareScalarRet,
36    PrintCall,
37    PrintCallUni,
38    CallArgBegin,
39    CallArg,
40    LastCallArg,
41    CallArgEnd,
42    CallVoid,
43    CallVal,
44    CallSymbol,
45    Prototype,
46    MoveParam,
47    PseudoUseParam,
48    RETURN,
49    CallSeqBegin,
50    CallSeqEnd,
51    CallPrototype,
52    FUN_SHFL_CLAMP,
53    FUN_SHFR_CLAMP,
54    MUL_WIDE_SIGNED,
55    MUL_WIDE_UNSIGNED,
56    IMAD,
57    Dummy,
58  
59    LoadV2 = ISD::FIRST_TARGET_MEMORY_OPCODE,
60    LoadV4,
61    LDGV2, // LDG.v2
62    LDGV4, // LDG.v4
63    LDUV2, // LDU.v2
64    LDUV4, // LDU.v4
65    StoreV2,
66    StoreV4,
67    LoadParam,
68    LoadParamV2,
69    LoadParamV4,
70    StoreParam,
71    StoreParamV2,
72    StoreParamV4,
73    StoreParamS32, // to sext and store a <32bit value, not used currently
74    StoreParamU32, // to zext and store a <32bit value, not used currently
75    StoreRetval,
76    StoreRetvalV2,
77    StoreRetvalV4,
78  
79    // Texture intrinsics
80    Tex1DFloatS32,
81    Tex1DFloatFloat,
82    Tex1DFloatFloatLevel,
83    Tex1DFloatFloatGrad,
84    Tex1DS32S32,
85    Tex1DS32Float,
86    Tex1DS32FloatLevel,
87    Tex1DS32FloatGrad,
88    Tex1DU32S32,
89    Tex1DU32Float,
90    Tex1DU32FloatLevel,
91    Tex1DU32FloatGrad,
92    Tex1DArrayFloatS32,
93    Tex1DArrayFloatFloat,
94    Tex1DArrayFloatFloatLevel,
95    Tex1DArrayFloatFloatGrad,
96    Tex1DArrayS32S32,
97    Tex1DArrayS32Float,
98    Tex1DArrayS32FloatLevel,
99    Tex1DArrayS32FloatGrad,
100    Tex1DArrayU32S32,
101    Tex1DArrayU32Float,
102    Tex1DArrayU32FloatLevel,
103    Tex1DArrayU32FloatGrad,
104    Tex2DFloatS32,
105    Tex2DFloatFloat,
106    Tex2DFloatFloatLevel,
107    Tex2DFloatFloatGrad,
108    Tex2DS32S32,
109    Tex2DS32Float,
110    Tex2DS32FloatLevel,
111    Tex2DS32FloatGrad,
112    Tex2DU32S32,
113    Tex2DU32Float,
114    Tex2DU32FloatLevel,
115    Tex2DU32FloatGrad,
116    Tex2DArrayFloatS32,
117    Tex2DArrayFloatFloat,
118    Tex2DArrayFloatFloatLevel,
119    Tex2DArrayFloatFloatGrad,
120    Tex2DArrayS32S32,
121    Tex2DArrayS32Float,
122    Tex2DArrayS32FloatLevel,
123    Tex2DArrayS32FloatGrad,
124    Tex2DArrayU32S32,
125    Tex2DArrayU32Float,
126    Tex2DArrayU32FloatLevel,
127    Tex2DArrayU32FloatGrad,
128    Tex3DFloatS32,
129    Tex3DFloatFloat,
130    Tex3DFloatFloatLevel,
131    Tex3DFloatFloatGrad,
132    Tex3DS32S32,
133    Tex3DS32Float,
134    Tex3DS32FloatLevel,
135    Tex3DS32FloatGrad,
136    Tex3DU32S32,
137    Tex3DU32Float,
138    Tex3DU32FloatLevel,
139    Tex3DU32FloatGrad,
140    TexCubeFloatFloat,
141    TexCubeFloatFloatLevel,
142    TexCubeS32Float,
143    TexCubeS32FloatLevel,
144    TexCubeU32Float,
145    TexCubeU32FloatLevel,
146    TexCubeArrayFloatFloat,
147    TexCubeArrayFloatFloatLevel,
148    TexCubeArrayS32Float,
149    TexCubeArrayS32FloatLevel,
150    TexCubeArrayU32Float,
151    TexCubeArrayU32FloatLevel,
152    Tld4R2DFloatFloat,
153    Tld4G2DFloatFloat,
154    Tld4B2DFloatFloat,
155    Tld4A2DFloatFloat,
156    Tld4R2DS64Float,
157    Tld4G2DS64Float,
158    Tld4B2DS64Float,
159    Tld4A2DS64Float,
160    Tld4R2DU64Float,
161    Tld4G2DU64Float,
162    Tld4B2DU64Float,
163    Tld4A2DU64Float,
164    TexUnified1DFloatS32,
165    TexUnified1DFloatFloat,
166    TexUnified1DFloatFloatLevel,
167    TexUnified1DFloatFloatGrad,
168    TexUnified1DS32S32,
169    TexUnified1DS32Float,
170    TexUnified1DS32FloatLevel,
171    TexUnified1DS32FloatGrad,
172    TexUnified1DU32S32,
173    TexUnified1DU32Float,
174    TexUnified1DU32FloatLevel,
175    TexUnified1DU32FloatGrad,
176    TexUnified1DArrayFloatS32,
177    TexUnified1DArrayFloatFloat,
178    TexUnified1DArrayFloatFloatLevel,
179    TexUnified1DArrayFloatFloatGrad,
180    TexUnified1DArrayS32S32,
181    TexUnified1DArrayS32Float,
182    TexUnified1DArrayS32FloatLevel,
183    TexUnified1DArrayS32FloatGrad,
184    TexUnified1DArrayU32S32,
185    TexUnified1DArrayU32Float,
186    TexUnified1DArrayU32FloatLevel,
187    TexUnified1DArrayU32FloatGrad,
188    TexUnified2DFloatS32,
189    TexUnified2DFloatFloat,
190    TexUnified2DFloatFloatLevel,
191    TexUnified2DFloatFloatGrad,
192    TexUnified2DS32S32,
193    TexUnified2DS32Float,
194    TexUnified2DS32FloatLevel,
195    TexUnified2DS32FloatGrad,
196    TexUnified2DU32S32,
197    TexUnified2DU32Float,
198    TexUnified2DU32FloatLevel,
199    TexUnified2DU32FloatGrad,
200    TexUnified2DArrayFloatS32,
201    TexUnified2DArrayFloatFloat,
202    TexUnified2DArrayFloatFloatLevel,
203    TexUnified2DArrayFloatFloatGrad,
204    TexUnified2DArrayS32S32,
205    TexUnified2DArrayS32Float,
206    TexUnified2DArrayS32FloatLevel,
207    TexUnified2DArrayS32FloatGrad,
208    TexUnified2DArrayU32S32,
209    TexUnified2DArrayU32Float,
210    TexUnified2DArrayU32FloatLevel,
211    TexUnified2DArrayU32FloatGrad,
212    TexUnified3DFloatS32,
213    TexUnified3DFloatFloat,
214    TexUnified3DFloatFloatLevel,
215    TexUnified3DFloatFloatGrad,
216    TexUnified3DS32S32,
217    TexUnified3DS32Float,
218    TexUnified3DS32FloatLevel,
219    TexUnified3DS32FloatGrad,
220    TexUnified3DU32S32,
221    TexUnified3DU32Float,
222    TexUnified3DU32FloatLevel,
223    TexUnified3DU32FloatGrad,
224    TexUnifiedCubeFloatFloat,
225    TexUnifiedCubeFloatFloatLevel,
226    TexUnifiedCubeS32Float,
227    TexUnifiedCubeS32FloatLevel,
228    TexUnifiedCubeU32Float,
229    TexUnifiedCubeU32FloatLevel,
230    TexUnifiedCubeArrayFloatFloat,
231    TexUnifiedCubeArrayFloatFloatLevel,
232    TexUnifiedCubeArrayS32Float,
233    TexUnifiedCubeArrayS32FloatLevel,
234    TexUnifiedCubeArrayU32Float,
235    TexUnifiedCubeArrayU32FloatLevel,
236    Tld4UnifiedR2DFloatFloat,
237    Tld4UnifiedG2DFloatFloat,
238    Tld4UnifiedB2DFloatFloat,
239    Tld4UnifiedA2DFloatFloat,
240    Tld4UnifiedR2DS64Float,
241    Tld4UnifiedG2DS64Float,
242    Tld4UnifiedB2DS64Float,
243    Tld4UnifiedA2DS64Float,
244    Tld4UnifiedR2DU64Float,
245    Tld4UnifiedG2DU64Float,
246    Tld4UnifiedB2DU64Float,
247    Tld4UnifiedA2DU64Float,
248  
249    // Surface intrinsics
250    Suld1DI8Clamp,
251    Suld1DI16Clamp,
252    Suld1DI32Clamp,
253    Suld1DI64Clamp,
254    Suld1DV2I8Clamp,
255    Suld1DV2I16Clamp,
256    Suld1DV2I32Clamp,
257    Suld1DV2I64Clamp,
258    Suld1DV4I8Clamp,
259    Suld1DV4I16Clamp,
260    Suld1DV4I32Clamp,
261  
262    Suld1DArrayI8Clamp,
263    Suld1DArrayI16Clamp,
264    Suld1DArrayI32Clamp,
265    Suld1DArrayI64Clamp,
266    Suld1DArrayV2I8Clamp,
267    Suld1DArrayV2I16Clamp,
268    Suld1DArrayV2I32Clamp,
269    Suld1DArrayV2I64Clamp,
270    Suld1DArrayV4I8Clamp,
271    Suld1DArrayV4I16Clamp,
272    Suld1DArrayV4I32Clamp,
273  
274    Suld2DI8Clamp,
275    Suld2DI16Clamp,
276    Suld2DI32Clamp,
277    Suld2DI64Clamp,
278    Suld2DV2I8Clamp,
279    Suld2DV2I16Clamp,
280    Suld2DV2I32Clamp,
281    Suld2DV2I64Clamp,
282    Suld2DV4I8Clamp,
283    Suld2DV4I16Clamp,
284    Suld2DV4I32Clamp,
285  
286    Suld2DArrayI8Clamp,
287    Suld2DArrayI16Clamp,
288    Suld2DArrayI32Clamp,
289    Suld2DArrayI64Clamp,
290    Suld2DArrayV2I8Clamp,
291    Suld2DArrayV2I16Clamp,
292    Suld2DArrayV2I32Clamp,
293    Suld2DArrayV2I64Clamp,
294    Suld2DArrayV4I8Clamp,
295    Suld2DArrayV4I16Clamp,
296    Suld2DArrayV4I32Clamp,
297  
298    Suld3DI8Clamp,
299    Suld3DI16Clamp,
300    Suld3DI32Clamp,
301    Suld3DI64Clamp,
302    Suld3DV2I8Clamp,
303    Suld3DV2I16Clamp,
304    Suld3DV2I32Clamp,
305    Suld3DV2I64Clamp,
306    Suld3DV4I8Clamp,
307    Suld3DV4I16Clamp,
308    Suld3DV4I32Clamp,
309  
310    Suld1DI8Trap,
311    Suld1DI16Trap,
312    Suld1DI32Trap,
313    Suld1DI64Trap,
314    Suld1DV2I8Trap,
315    Suld1DV2I16Trap,
316    Suld1DV2I32Trap,
317    Suld1DV2I64Trap,
318    Suld1DV4I8Trap,
319    Suld1DV4I16Trap,
320    Suld1DV4I32Trap,
321  
322    Suld1DArrayI8Trap,
323    Suld1DArrayI16Trap,
324    Suld1DArrayI32Trap,
325    Suld1DArrayI64Trap,
326    Suld1DArrayV2I8Trap,
327    Suld1DArrayV2I16Trap,
328    Suld1DArrayV2I32Trap,
329    Suld1DArrayV2I64Trap,
330    Suld1DArrayV4I8Trap,
331    Suld1DArrayV4I16Trap,
332    Suld1DArrayV4I32Trap,
333  
334    Suld2DI8Trap,
335    Suld2DI16Trap,
336    Suld2DI32Trap,
337    Suld2DI64Trap,
338    Suld2DV2I8Trap,
339    Suld2DV2I16Trap,
340    Suld2DV2I32Trap,
341    Suld2DV2I64Trap,
342    Suld2DV4I8Trap,
343    Suld2DV4I16Trap,
344    Suld2DV4I32Trap,
345  
346    Suld2DArrayI8Trap,
347    Suld2DArrayI16Trap,
348    Suld2DArrayI32Trap,
349    Suld2DArrayI64Trap,
350    Suld2DArrayV2I8Trap,
351    Suld2DArrayV2I16Trap,
352    Suld2DArrayV2I32Trap,
353    Suld2DArrayV2I64Trap,
354    Suld2DArrayV4I8Trap,
355    Suld2DArrayV4I16Trap,
356    Suld2DArrayV4I32Trap,
357  
358    Suld3DI8Trap,
359    Suld3DI16Trap,
360    Suld3DI32Trap,
361    Suld3DI64Trap,
362    Suld3DV2I8Trap,
363    Suld3DV2I16Trap,
364    Suld3DV2I32Trap,
365    Suld3DV2I64Trap,
366    Suld3DV4I8Trap,
367    Suld3DV4I16Trap,
368    Suld3DV4I32Trap,
369  
370    Suld1DI8Zero,
371    Suld1DI16Zero,
372    Suld1DI32Zero,
373    Suld1DI64Zero,
374    Suld1DV2I8Zero,
375    Suld1DV2I16Zero,
376    Suld1DV2I32Zero,
377    Suld1DV2I64Zero,
378    Suld1DV4I8Zero,
379    Suld1DV4I16Zero,
380    Suld1DV4I32Zero,
381  
382    Suld1DArrayI8Zero,
383    Suld1DArrayI16Zero,
384    Suld1DArrayI32Zero,
385    Suld1DArrayI64Zero,
386    Suld1DArrayV2I8Zero,
387    Suld1DArrayV2I16Zero,
388    Suld1DArrayV2I32Zero,
389    Suld1DArrayV2I64Zero,
390    Suld1DArrayV4I8Zero,
391    Suld1DArrayV4I16Zero,
392    Suld1DArrayV4I32Zero,
393  
394    Suld2DI8Zero,
395    Suld2DI16Zero,
396    Suld2DI32Zero,
397    Suld2DI64Zero,
398    Suld2DV2I8Zero,
399    Suld2DV2I16Zero,
400    Suld2DV2I32Zero,
401    Suld2DV2I64Zero,
402    Suld2DV4I8Zero,
403    Suld2DV4I16Zero,
404    Suld2DV4I32Zero,
405  
406    Suld2DArrayI8Zero,
407    Suld2DArrayI16Zero,
408    Suld2DArrayI32Zero,
409    Suld2DArrayI64Zero,
410    Suld2DArrayV2I8Zero,
411    Suld2DArrayV2I16Zero,
412    Suld2DArrayV2I32Zero,
413    Suld2DArrayV2I64Zero,
414    Suld2DArrayV4I8Zero,
415    Suld2DArrayV4I16Zero,
416    Suld2DArrayV4I32Zero,
417  
418    Suld3DI8Zero,
419    Suld3DI16Zero,
420    Suld3DI32Zero,
421    Suld3DI64Zero,
422    Suld3DV2I8Zero,
423    Suld3DV2I16Zero,
424    Suld3DV2I32Zero,
425    Suld3DV2I64Zero,
426    Suld3DV4I8Zero,
427    Suld3DV4I16Zero,
428    Suld3DV4I32Zero
429  };
430  }
431  
432  class NVPTXSubtarget;
433  
434  //===--------------------------------------------------------------------===//
435  // TargetLowering Implementation
436  //===--------------------------------------------------------------------===//
437  class NVPTXTargetLowering : public TargetLowering {
438  public:
439    explicit NVPTXTargetLowering(const NVPTXTargetMachine &TM,
440                                 const NVPTXSubtarget &STI);
441    SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
442  
443    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
444    SDValue LowerGlobalAddress(const GlobalValue *GV, int64_t Offset,
445                               SelectionDAG &DAG) const;
446  
447    const char *getTargetNodeName(unsigned Opcode) const override;
448  
449    bool isTypeSupportedInIntrinsic(MVT VT) const;
450  
451    bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
452                            unsigned Intrinsic) const override;
453  
454    /// isLegalAddressingMode - Return true if the addressing mode represented
455    /// by AM is legal for this target, for a load/store of the specified type
456    /// Used to guide target specific optimizations, like loop strength
457    /// reduction (LoopStrengthReduce.cpp) and memory optimization for
458    /// address mode (CodeGenPrepare.cpp)
459    bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
460  
461    /// getFunctionAlignment - Return the Log2 alignment of this function.
462    unsigned getFunctionAlignment(const Function *F) const;
463  
getSetCCResultType(LLVMContext & Ctx,EVT VT)464    EVT getSetCCResultType(LLVMContext &Ctx, EVT VT) const override {
465      if (VT.isVector())
466        return EVT::getVectorVT(Ctx, MVT::i1, VT.getVectorNumElements());
467      return MVT::i1;
468    }
469  
470    ConstraintType
471    getConstraintType(const std::string &Constraint) const override;
472    std::pair<unsigned, const TargetRegisterClass *>
473    getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
474                                 const std::string &Constraint,
475                                 MVT VT) const override;
476  
477    SDValue LowerFormalArguments(
478        SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
479        const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl, SelectionDAG &DAG,
480        SmallVectorImpl<SDValue> &InVals) const override;
481  
482    SDValue LowerCall(CallLoweringInfo &CLI,
483                      SmallVectorImpl<SDValue> &InVals) const override;
484  
485    std::string getPrototype(Type *, const ArgListTy &,
486                             const SmallVectorImpl<ISD::OutputArg> &,
487                             unsigned retAlignment,
488                             const ImmutableCallSite *CS) const;
489  
490    SDValue
491    LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
492                const SmallVectorImpl<ISD::OutputArg> &Outs,
493                const SmallVectorImpl<SDValue> &OutVals, SDLoc dl,
494                SelectionDAG &DAG) const override;
495  
496    void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
497                                      std::vector<SDValue> &Ops,
498                                      SelectionDAG &DAG) const override;
499  
getInlineAsmMemConstraint(const std::string & ConstraintCode)500    unsigned getInlineAsmMemConstraint(
501        const std::string &ConstraintCode) const override {
502      // FIXME: Map different constraints differently.
503      return InlineAsm::Constraint_m;
504    }
505  
506    const NVPTXTargetMachine *nvTM;
507  
508    // PTX always uses 32-bit shift amounts
getScalarShiftAmountTy(EVT LHSTy)509    MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; }
510  
511    TargetLoweringBase::LegalizeTypeAction
512    getPreferredVectorAction(EVT VT) const override;
513  
514    bool allowFMA(MachineFunction &MF, CodeGenOpt::Level OptLevel) const;
515  
isFMAFasterThanFMulAndFAdd(EVT)516    bool isFMAFasterThanFMulAndFAdd(EVT) const override { return true; }
517  
enableAggressiveFMAFusion(EVT VT)518    bool enableAggressiveFMAFusion(EVT VT) const override { return true; }
519  
520  private:
521    const NVPTXSubtarget &STI; // cache the subtarget here
522  
523    SDValue getExtSymb(SelectionDAG &DAG, const char *name, int idx,
524                       EVT = MVT::i32) const;
525    SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const;
526    SDValue getParamHelpSymbol(SelectionDAG &DAG, int idx);
527  
528    SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
529  
530    SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
531    SDValue LowerLOADi1(SDValue Op, SelectionDAG &DAG) const;
532  
533    SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
534    SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const;
535    SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const;
536  
537    SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
538    SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
539  
540    SDValue LowerSelect(SDValue Op, SelectionDAG &DAG) const;
541  
542    void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
543                            SelectionDAG &DAG) const override;
544    SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
545  
546    unsigned getArgumentAlignment(SDValue Callee, const ImmutableCallSite *CS,
547                                  Type *Ty, unsigned Idx) const;
548  };
549  } // namespace llvm
550  
551  #endif
552