1 //===-- NVPTXISelLowering.h - NVPTX DAG Lowering Interface ------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the interfaces that NVPTX uses to lower LLVM code into a
11 // selection DAG.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H
16 #define LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H
17 
18 #include "NVPTX.h"
19 #include "llvm/CodeGen/SelectionDAG.h"
20 #include "llvm/Target/TargetLowering.h"
21 
22 namespace llvm {
23 namespace NVPTXISD {
24 enum NodeType : unsigned {
25   // Start the numbering from where ISD NodeType finishes.
26   FIRST_NUMBER = ISD::BUILTIN_OP_END,
27   Wrapper,
28   CALL,
29   RET_FLAG,
30   LOAD_PARAM,
31   DeclareParam,
32   DeclareScalarParam,
33   DeclareRetParam,
34   DeclareRet,
35   DeclareScalarRet,
36   PrintCall,
37   PrintConvergentCall,
38   PrintCallUni,
39   PrintConvergentCallUni,
40   CallArgBegin,
41   CallArg,
42   LastCallArg,
43   CallArgEnd,
44   CallVoid,
45   CallVal,
46   CallSymbol,
47   Prototype,
48   MoveParam,
49   PseudoUseParam,
50   RETURN,
51   CallSeqBegin,
52   CallSeqEnd,
53   CallPrototype,
54   FUN_SHFL_CLAMP,
55   FUN_SHFR_CLAMP,
56   MUL_WIDE_SIGNED,
57   MUL_WIDE_UNSIGNED,
58   IMAD,
59   Dummy,
60 
61   LoadV2 = ISD::FIRST_TARGET_MEMORY_OPCODE,
62   LoadV4,
63   LDGV2, // LDG.v2
64   LDGV4, // LDG.v4
65   LDUV2, // LDU.v2
66   LDUV4, // LDU.v4
67   StoreV2,
68   StoreV4,
69   LoadParam,
70   LoadParamV2,
71   LoadParamV4,
72   StoreParam,
73   StoreParamV2,
74   StoreParamV4,
75   StoreParamS32, // to sext and store a <32bit value, not used currently
76   StoreParamU32, // to zext and store a <32bit value, not used currently
77   StoreRetval,
78   StoreRetvalV2,
79   StoreRetvalV4,
80 
81   // Texture intrinsics
82   Tex1DFloatS32,
83   Tex1DFloatFloat,
84   Tex1DFloatFloatLevel,
85   Tex1DFloatFloatGrad,
86   Tex1DS32S32,
87   Tex1DS32Float,
88   Tex1DS32FloatLevel,
89   Tex1DS32FloatGrad,
90   Tex1DU32S32,
91   Tex1DU32Float,
92   Tex1DU32FloatLevel,
93   Tex1DU32FloatGrad,
94   Tex1DArrayFloatS32,
95   Tex1DArrayFloatFloat,
96   Tex1DArrayFloatFloatLevel,
97   Tex1DArrayFloatFloatGrad,
98   Tex1DArrayS32S32,
99   Tex1DArrayS32Float,
100   Tex1DArrayS32FloatLevel,
101   Tex1DArrayS32FloatGrad,
102   Tex1DArrayU32S32,
103   Tex1DArrayU32Float,
104   Tex1DArrayU32FloatLevel,
105   Tex1DArrayU32FloatGrad,
106   Tex2DFloatS32,
107   Tex2DFloatFloat,
108   Tex2DFloatFloatLevel,
109   Tex2DFloatFloatGrad,
110   Tex2DS32S32,
111   Tex2DS32Float,
112   Tex2DS32FloatLevel,
113   Tex2DS32FloatGrad,
114   Tex2DU32S32,
115   Tex2DU32Float,
116   Tex2DU32FloatLevel,
117   Tex2DU32FloatGrad,
118   Tex2DArrayFloatS32,
119   Tex2DArrayFloatFloat,
120   Tex2DArrayFloatFloatLevel,
121   Tex2DArrayFloatFloatGrad,
122   Tex2DArrayS32S32,
123   Tex2DArrayS32Float,
124   Tex2DArrayS32FloatLevel,
125   Tex2DArrayS32FloatGrad,
126   Tex2DArrayU32S32,
127   Tex2DArrayU32Float,
128   Tex2DArrayU32FloatLevel,
129   Tex2DArrayU32FloatGrad,
130   Tex3DFloatS32,
131   Tex3DFloatFloat,
132   Tex3DFloatFloatLevel,
133   Tex3DFloatFloatGrad,
134   Tex3DS32S32,
135   Tex3DS32Float,
136   Tex3DS32FloatLevel,
137   Tex3DS32FloatGrad,
138   Tex3DU32S32,
139   Tex3DU32Float,
140   Tex3DU32FloatLevel,
141   Tex3DU32FloatGrad,
142   TexCubeFloatFloat,
143   TexCubeFloatFloatLevel,
144   TexCubeS32Float,
145   TexCubeS32FloatLevel,
146   TexCubeU32Float,
147   TexCubeU32FloatLevel,
148   TexCubeArrayFloatFloat,
149   TexCubeArrayFloatFloatLevel,
150   TexCubeArrayS32Float,
151   TexCubeArrayS32FloatLevel,
152   TexCubeArrayU32Float,
153   TexCubeArrayU32FloatLevel,
154   Tld4R2DFloatFloat,
155   Tld4G2DFloatFloat,
156   Tld4B2DFloatFloat,
157   Tld4A2DFloatFloat,
158   Tld4R2DS64Float,
159   Tld4G2DS64Float,
160   Tld4B2DS64Float,
161   Tld4A2DS64Float,
162   Tld4R2DU64Float,
163   Tld4G2DU64Float,
164   Tld4B2DU64Float,
165   Tld4A2DU64Float,
166   TexUnified1DFloatS32,
167   TexUnified1DFloatFloat,
168   TexUnified1DFloatFloatLevel,
169   TexUnified1DFloatFloatGrad,
170   TexUnified1DS32S32,
171   TexUnified1DS32Float,
172   TexUnified1DS32FloatLevel,
173   TexUnified1DS32FloatGrad,
174   TexUnified1DU32S32,
175   TexUnified1DU32Float,
176   TexUnified1DU32FloatLevel,
177   TexUnified1DU32FloatGrad,
178   TexUnified1DArrayFloatS32,
179   TexUnified1DArrayFloatFloat,
180   TexUnified1DArrayFloatFloatLevel,
181   TexUnified1DArrayFloatFloatGrad,
182   TexUnified1DArrayS32S32,
183   TexUnified1DArrayS32Float,
184   TexUnified1DArrayS32FloatLevel,
185   TexUnified1DArrayS32FloatGrad,
186   TexUnified1DArrayU32S32,
187   TexUnified1DArrayU32Float,
188   TexUnified1DArrayU32FloatLevel,
189   TexUnified1DArrayU32FloatGrad,
190   TexUnified2DFloatS32,
191   TexUnified2DFloatFloat,
192   TexUnified2DFloatFloatLevel,
193   TexUnified2DFloatFloatGrad,
194   TexUnified2DS32S32,
195   TexUnified2DS32Float,
196   TexUnified2DS32FloatLevel,
197   TexUnified2DS32FloatGrad,
198   TexUnified2DU32S32,
199   TexUnified2DU32Float,
200   TexUnified2DU32FloatLevel,
201   TexUnified2DU32FloatGrad,
202   TexUnified2DArrayFloatS32,
203   TexUnified2DArrayFloatFloat,
204   TexUnified2DArrayFloatFloatLevel,
205   TexUnified2DArrayFloatFloatGrad,
206   TexUnified2DArrayS32S32,
207   TexUnified2DArrayS32Float,
208   TexUnified2DArrayS32FloatLevel,
209   TexUnified2DArrayS32FloatGrad,
210   TexUnified2DArrayU32S32,
211   TexUnified2DArrayU32Float,
212   TexUnified2DArrayU32FloatLevel,
213   TexUnified2DArrayU32FloatGrad,
214   TexUnified3DFloatS32,
215   TexUnified3DFloatFloat,
216   TexUnified3DFloatFloatLevel,
217   TexUnified3DFloatFloatGrad,
218   TexUnified3DS32S32,
219   TexUnified3DS32Float,
220   TexUnified3DS32FloatLevel,
221   TexUnified3DS32FloatGrad,
222   TexUnified3DU32S32,
223   TexUnified3DU32Float,
224   TexUnified3DU32FloatLevel,
225   TexUnified3DU32FloatGrad,
226   TexUnifiedCubeFloatFloat,
227   TexUnifiedCubeFloatFloatLevel,
228   TexUnifiedCubeS32Float,
229   TexUnifiedCubeS32FloatLevel,
230   TexUnifiedCubeU32Float,
231   TexUnifiedCubeU32FloatLevel,
232   TexUnifiedCubeArrayFloatFloat,
233   TexUnifiedCubeArrayFloatFloatLevel,
234   TexUnifiedCubeArrayS32Float,
235   TexUnifiedCubeArrayS32FloatLevel,
236   TexUnifiedCubeArrayU32Float,
237   TexUnifiedCubeArrayU32FloatLevel,
238   Tld4UnifiedR2DFloatFloat,
239   Tld4UnifiedG2DFloatFloat,
240   Tld4UnifiedB2DFloatFloat,
241   Tld4UnifiedA2DFloatFloat,
242   Tld4UnifiedR2DS64Float,
243   Tld4UnifiedG2DS64Float,
244   Tld4UnifiedB2DS64Float,
245   Tld4UnifiedA2DS64Float,
246   Tld4UnifiedR2DU64Float,
247   Tld4UnifiedG2DU64Float,
248   Tld4UnifiedB2DU64Float,
249   Tld4UnifiedA2DU64Float,
250 
251   // Surface intrinsics
252   Suld1DI8Clamp,
253   Suld1DI16Clamp,
254   Suld1DI32Clamp,
255   Suld1DI64Clamp,
256   Suld1DV2I8Clamp,
257   Suld1DV2I16Clamp,
258   Suld1DV2I32Clamp,
259   Suld1DV2I64Clamp,
260   Suld1DV4I8Clamp,
261   Suld1DV4I16Clamp,
262   Suld1DV4I32Clamp,
263 
264   Suld1DArrayI8Clamp,
265   Suld1DArrayI16Clamp,
266   Suld1DArrayI32Clamp,
267   Suld1DArrayI64Clamp,
268   Suld1DArrayV2I8Clamp,
269   Suld1DArrayV2I16Clamp,
270   Suld1DArrayV2I32Clamp,
271   Suld1DArrayV2I64Clamp,
272   Suld1DArrayV4I8Clamp,
273   Suld1DArrayV4I16Clamp,
274   Suld1DArrayV4I32Clamp,
275 
276   Suld2DI8Clamp,
277   Suld2DI16Clamp,
278   Suld2DI32Clamp,
279   Suld2DI64Clamp,
280   Suld2DV2I8Clamp,
281   Suld2DV2I16Clamp,
282   Suld2DV2I32Clamp,
283   Suld2DV2I64Clamp,
284   Suld2DV4I8Clamp,
285   Suld2DV4I16Clamp,
286   Suld2DV4I32Clamp,
287 
288   Suld2DArrayI8Clamp,
289   Suld2DArrayI16Clamp,
290   Suld2DArrayI32Clamp,
291   Suld2DArrayI64Clamp,
292   Suld2DArrayV2I8Clamp,
293   Suld2DArrayV2I16Clamp,
294   Suld2DArrayV2I32Clamp,
295   Suld2DArrayV2I64Clamp,
296   Suld2DArrayV4I8Clamp,
297   Suld2DArrayV4I16Clamp,
298   Suld2DArrayV4I32Clamp,
299 
300   Suld3DI8Clamp,
301   Suld3DI16Clamp,
302   Suld3DI32Clamp,
303   Suld3DI64Clamp,
304   Suld3DV2I8Clamp,
305   Suld3DV2I16Clamp,
306   Suld3DV2I32Clamp,
307   Suld3DV2I64Clamp,
308   Suld3DV4I8Clamp,
309   Suld3DV4I16Clamp,
310   Suld3DV4I32Clamp,
311 
312   Suld1DI8Trap,
313   Suld1DI16Trap,
314   Suld1DI32Trap,
315   Suld1DI64Trap,
316   Suld1DV2I8Trap,
317   Suld1DV2I16Trap,
318   Suld1DV2I32Trap,
319   Suld1DV2I64Trap,
320   Suld1DV4I8Trap,
321   Suld1DV4I16Trap,
322   Suld1DV4I32Trap,
323 
324   Suld1DArrayI8Trap,
325   Suld1DArrayI16Trap,
326   Suld1DArrayI32Trap,
327   Suld1DArrayI64Trap,
328   Suld1DArrayV2I8Trap,
329   Suld1DArrayV2I16Trap,
330   Suld1DArrayV2I32Trap,
331   Suld1DArrayV2I64Trap,
332   Suld1DArrayV4I8Trap,
333   Suld1DArrayV4I16Trap,
334   Suld1DArrayV4I32Trap,
335 
336   Suld2DI8Trap,
337   Suld2DI16Trap,
338   Suld2DI32Trap,
339   Suld2DI64Trap,
340   Suld2DV2I8Trap,
341   Suld2DV2I16Trap,
342   Suld2DV2I32Trap,
343   Suld2DV2I64Trap,
344   Suld2DV4I8Trap,
345   Suld2DV4I16Trap,
346   Suld2DV4I32Trap,
347 
348   Suld2DArrayI8Trap,
349   Suld2DArrayI16Trap,
350   Suld2DArrayI32Trap,
351   Suld2DArrayI64Trap,
352   Suld2DArrayV2I8Trap,
353   Suld2DArrayV2I16Trap,
354   Suld2DArrayV2I32Trap,
355   Suld2DArrayV2I64Trap,
356   Suld2DArrayV4I8Trap,
357   Suld2DArrayV4I16Trap,
358   Suld2DArrayV4I32Trap,
359 
360   Suld3DI8Trap,
361   Suld3DI16Trap,
362   Suld3DI32Trap,
363   Suld3DI64Trap,
364   Suld3DV2I8Trap,
365   Suld3DV2I16Trap,
366   Suld3DV2I32Trap,
367   Suld3DV2I64Trap,
368   Suld3DV4I8Trap,
369   Suld3DV4I16Trap,
370   Suld3DV4I32Trap,
371 
372   Suld1DI8Zero,
373   Suld1DI16Zero,
374   Suld1DI32Zero,
375   Suld1DI64Zero,
376   Suld1DV2I8Zero,
377   Suld1DV2I16Zero,
378   Suld1DV2I32Zero,
379   Suld1DV2I64Zero,
380   Suld1DV4I8Zero,
381   Suld1DV4I16Zero,
382   Suld1DV4I32Zero,
383 
384   Suld1DArrayI8Zero,
385   Suld1DArrayI16Zero,
386   Suld1DArrayI32Zero,
387   Suld1DArrayI64Zero,
388   Suld1DArrayV2I8Zero,
389   Suld1DArrayV2I16Zero,
390   Suld1DArrayV2I32Zero,
391   Suld1DArrayV2I64Zero,
392   Suld1DArrayV4I8Zero,
393   Suld1DArrayV4I16Zero,
394   Suld1DArrayV4I32Zero,
395 
396   Suld2DI8Zero,
397   Suld2DI16Zero,
398   Suld2DI32Zero,
399   Suld2DI64Zero,
400   Suld2DV2I8Zero,
401   Suld2DV2I16Zero,
402   Suld2DV2I32Zero,
403   Suld2DV2I64Zero,
404   Suld2DV4I8Zero,
405   Suld2DV4I16Zero,
406   Suld2DV4I32Zero,
407 
408   Suld2DArrayI8Zero,
409   Suld2DArrayI16Zero,
410   Suld2DArrayI32Zero,
411   Suld2DArrayI64Zero,
412   Suld2DArrayV2I8Zero,
413   Suld2DArrayV2I16Zero,
414   Suld2DArrayV2I32Zero,
415   Suld2DArrayV2I64Zero,
416   Suld2DArrayV4I8Zero,
417   Suld2DArrayV4I16Zero,
418   Suld2DArrayV4I32Zero,
419 
420   Suld3DI8Zero,
421   Suld3DI16Zero,
422   Suld3DI32Zero,
423   Suld3DI64Zero,
424   Suld3DV2I8Zero,
425   Suld3DV2I16Zero,
426   Suld3DV2I32Zero,
427   Suld3DV2I64Zero,
428   Suld3DV4I8Zero,
429   Suld3DV4I16Zero,
430   Suld3DV4I32Zero
431 };
432 }
433 
434 class NVPTXSubtarget;
435 
436 //===--------------------------------------------------------------------===//
437 // TargetLowering Implementation
438 //===--------------------------------------------------------------------===//
439 class NVPTXTargetLowering : public TargetLowering {
440 public:
441   explicit NVPTXTargetLowering(const NVPTXTargetMachine &TM,
442                                const NVPTXSubtarget &STI);
443   SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
444 
445   SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
446 
447   const char *getTargetNodeName(unsigned Opcode) const override;
448 
449   bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
450                           unsigned Intrinsic) const override;
451 
452   /// isLegalAddressingMode - Return true if the addressing mode represented
453   /// by AM is legal for this target, for a load/store of the specified type
454   /// Used to guide target specific optimizations, like loop strength
455   /// reduction (LoopStrengthReduce.cpp) and memory optimization for
456   /// address mode (CodeGenPrepare.cpp)
457   bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
458                              unsigned AS) const override;
459 
isTruncateFree(Type * SrcTy,Type * DstTy)460   bool isTruncateFree(Type *SrcTy, Type *DstTy) const override {
461     // Truncating 64-bit to 32-bit is free in SASS.
462     if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
463       return false;
464     return SrcTy->getPrimitiveSizeInBits() == 64 &&
465            DstTy->getPrimitiveSizeInBits() == 32;
466   }
467 
getSetCCResultType(const DataLayout & DL,LLVMContext & Ctx,EVT VT)468   EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Ctx,
469                          EVT VT) const override {
470     if (VT.isVector())
471       return EVT::getVectorVT(Ctx, MVT::i1, VT.getVectorNumElements());
472     return MVT::i1;
473   }
474 
475   ConstraintType getConstraintType(StringRef Constraint) const override;
476   std::pair<unsigned, const TargetRegisterClass *>
477   getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
478                                StringRef Constraint, MVT VT) const override;
479 
480   SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
481                                bool isVarArg,
482                                const SmallVectorImpl<ISD::InputArg> &Ins,
483                                const SDLoc &dl, SelectionDAG &DAG,
484                                SmallVectorImpl<SDValue> &InVals) const override;
485 
486   SDValue LowerCall(CallLoweringInfo &CLI,
487                     SmallVectorImpl<SDValue> &InVals) const override;
488 
489   std::string getPrototype(const DataLayout &DL, Type *, const ArgListTy &,
490                            const SmallVectorImpl<ISD::OutputArg> &,
491                            unsigned retAlignment,
492                            const ImmutableCallSite *CS) const;
493 
494   SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
495                       const SmallVectorImpl<ISD::OutputArg> &Outs,
496                       const SmallVectorImpl<SDValue> &OutVals, const SDLoc &dl,
497                       SelectionDAG &DAG) const override;
498 
499   void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
500                                     std::vector<SDValue> &Ops,
501                                     SelectionDAG &DAG) const override;
502 
503   const NVPTXTargetMachine *nvTM;
504 
505   // PTX always uses 32-bit shift amounts
getScalarShiftAmountTy(const DataLayout &,EVT)506   MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
507     return MVT::i32;
508   }
509 
510   TargetLoweringBase::LegalizeTypeAction
511   getPreferredVectorAction(EVT VT) const override;
512 
513   bool allowFMA(MachineFunction &MF, CodeGenOpt::Level OptLevel) const;
514 
isFMAFasterThanFMulAndFAdd(EVT)515   bool isFMAFasterThanFMulAndFAdd(EVT) const override { return true; }
516 
enableAggressiveFMAFusion(EVT VT)517   bool enableAggressiveFMAFusion(EVT VT) const override { return true; }
518 
519 private:
520   const NVPTXSubtarget &STI; // cache the subtarget here
521   SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const;
522 
523   SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
524 
525   SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
526   SDValue LowerLOADi1(SDValue Op, SelectionDAG &DAG) const;
527 
528   SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
529   SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const;
530   SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const;
531 
532   SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
533   SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
534 
535   SDValue LowerSelect(SDValue Op, SelectionDAG &DAG) const;
536 
537   void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
538                           SelectionDAG &DAG) const override;
539   SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
540 
541   unsigned getArgumentAlignment(SDValue Callee, const ImmutableCallSite *CS,
542                                 Type *Ty, unsigned Idx) const;
543 };
544 } // namespace llvm
545 
546 #endif
547