1 //===-- NVPTXISelLowering.h - NVPTX DAG Lowering Interface ------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the interfaces that NVPTX uses to lower LLVM code into a
11 // selection DAG.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H
16 #define LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H
17 
18 #include "NVPTX.h"
19 #include "llvm/CodeGen/SelectionDAG.h"
20 #include "llvm/Target/TargetLowering.h"
21 
22 namespace llvm {
23 namespace NVPTXISD {
24 enum NodeType {
25   // Start the numbering from where ISD NodeType finishes.
26   FIRST_NUMBER = ISD::BUILTIN_OP_END,
27   Wrapper,
28   CALL,
29   RET_FLAG,
30   LOAD_PARAM,
31   DeclareParam,
32   DeclareScalarParam,
33   DeclareRetParam,
34   DeclareRet,
35   DeclareScalarRet,
36   PrintCall,
37   PrintCallUni,
38   CallArgBegin,
39   CallArg,
40   LastCallArg,
41   CallArgEnd,
42   CallVoid,
43   CallVal,
44   CallSymbol,
45   Prototype,
46   MoveParam,
47   PseudoUseParam,
48   RETURN,
49   CallSeqBegin,
50   CallSeqEnd,
51   CallPrototype,
52   FUN_SHFL_CLAMP,
53   FUN_SHFR_CLAMP,
54   MUL_WIDE_SIGNED,
55   MUL_WIDE_UNSIGNED,
56   IMAD,
57   Dummy,
58 
59   LoadV2 = ISD::FIRST_TARGET_MEMORY_OPCODE,
60   LoadV4,
61   LDGV2, // LDG.v2
62   LDGV4, // LDG.v4
63   LDUV2, // LDU.v2
64   LDUV4, // LDU.v4
65   StoreV2,
66   StoreV4,
67   LoadParam,
68   LoadParamV2,
69   LoadParamV4,
70   StoreParam,
71   StoreParamV2,
72   StoreParamV4,
73   StoreParamS32, // to sext and store a <32bit value, not used currently
74   StoreParamU32, // to zext and store a <32bit value, not used currently
75   StoreRetval,
76   StoreRetvalV2,
77   StoreRetvalV4,
78 
79   // Texture intrinsics
80   Tex1DFloatS32,
81   Tex1DFloatFloat,
82   Tex1DFloatFloatLevel,
83   Tex1DFloatFloatGrad,
84   Tex1DS32S32,
85   Tex1DS32Float,
86   Tex1DS32FloatLevel,
87   Tex1DS32FloatGrad,
88   Tex1DU32S32,
89   Tex1DU32Float,
90   Tex1DU32FloatLevel,
91   Tex1DU32FloatGrad,
92   Tex1DArrayFloatS32,
93   Tex1DArrayFloatFloat,
94   Tex1DArrayFloatFloatLevel,
95   Tex1DArrayFloatFloatGrad,
96   Tex1DArrayS32S32,
97   Tex1DArrayS32Float,
98   Tex1DArrayS32FloatLevel,
99   Tex1DArrayS32FloatGrad,
100   Tex1DArrayU32S32,
101   Tex1DArrayU32Float,
102   Tex1DArrayU32FloatLevel,
103   Tex1DArrayU32FloatGrad,
104   Tex2DFloatS32,
105   Tex2DFloatFloat,
106   Tex2DFloatFloatLevel,
107   Tex2DFloatFloatGrad,
108   Tex2DS32S32,
109   Tex2DS32Float,
110   Tex2DS32FloatLevel,
111   Tex2DS32FloatGrad,
112   Tex2DU32S32,
113   Tex2DU32Float,
114   Tex2DU32FloatLevel,
115   Tex2DU32FloatGrad,
116   Tex2DArrayFloatS32,
117   Tex2DArrayFloatFloat,
118   Tex2DArrayFloatFloatLevel,
119   Tex2DArrayFloatFloatGrad,
120   Tex2DArrayS32S32,
121   Tex2DArrayS32Float,
122   Tex2DArrayS32FloatLevel,
123   Tex2DArrayS32FloatGrad,
124   Tex2DArrayU32S32,
125   Tex2DArrayU32Float,
126   Tex2DArrayU32FloatLevel,
127   Tex2DArrayU32FloatGrad,
128   Tex3DFloatS32,
129   Tex3DFloatFloat,
130   Tex3DFloatFloatLevel,
131   Tex3DFloatFloatGrad,
132   Tex3DS32S32,
133   Tex3DS32Float,
134   Tex3DS32FloatLevel,
135   Tex3DS32FloatGrad,
136   Tex3DU32S32,
137   Tex3DU32Float,
138   Tex3DU32FloatLevel,
139   Tex3DU32FloatGrad,
140   TexCubeFloatFloat,
141   TexCubeFloatFloatLevel,
142   TexCubeS32Float,
143   TexCubeS32FloatLevel,
144   TexCubeU32Float,
145   TexCubeU32FloatLevel,
146   TexCubeArrayFloatFloat,
147   TexCubeArrayFloatFloatLevel,
148   TexCubeArrayS32Float,
149   TexCubeArrayS32FloatLevel,
150   TexCubeArrayU32Float,
151   TexCubeArrayU32FloatLevel,
152   Tld4R2DFloatFloat,
153   Tld4G2DFloatFloat,
154   Tld4B2DFloatFloat,
155   Tld4A2DFloatFloat,
156   Tld4R2DS64Float,
157   Tld4G2DS64Float,
158   Tld4B2DS64Float,
159   Tld4A2DS64Float,
160   Tld4R2DU64Float,
161   Tld4G2DU64Float,
162   Tld4B2DU64Float,
163   Tld4A2DU64Float,
164   TexUnified1DFloatS32,
165   TexUnified1DFloatFloat,
166   TexUnified1DFloatFloatLevel,
167   TexUnified1DFloatFloatGrad,
168   TexUnified1DS32S32,
169   TexUnified1DS32Float,
170   TexUnified1DS32FloatLevel,
171   TexUnified1DS32FloatGrad,
172   TexUnified1DU32S32,
173   TexUnified1DU32Float,
174   TexUnified1DU32FloatLevel,
175   TexUnified1DU32FloatGrad,
176   TexUnified1DArrayFloatS32,
177   TexUnified1DArrayFloatFloat,
178   TexUnified1DArrayFloatFloatLevel,
179   TexUnified1DArrayFloatFloatGrad,
180   TexUnified1DArrayS32S32,
181   TexUnified1DArrayS32Float,
182   TexUnified1DArrayS32FloatLevel,
183   TexUnified1DArrayS32FloatGrad,
184   TexUnified1DArrayU32S32,
185   TexUnified1DArrayU32Float,
186   TexUnified1DArrayU32FloatLevel,
187   TexUnified1DArrayU32FloatGrad,
188   TexUnified2DFloatS32,
189   TexUnified2DFloatFloat,
190   TexUnified2DFloatFloatLevel,
191   TexUnified2DFloatFloatGrad,
192   TexUnified2DS32S32,
193   TexUnified2DS32Float,
194   TexUnified2DS32FloatLevel,
195   TexUnified2DS32FloatGrad,
196   TexUnified2DU32S32,
197   TexUnified2DU32Float,
198   TexUnified2DU32FloatLevel,
199   TexUnified2DU32FloatGrad,
200   TexUnified2DArrayFloatS32,
201   TexUnified2DArrayFloatFloat,
202   TexUnified2DArrayFloatFloatLevel,
203   TexUnified2DArrayFloatFloatGrad,
204   TexUnified2DArrayS32S32,
205   TexUnified2DArrayS32Float,
206   TexUnified2DArrayS32FloatLevel,
207   TexUnified2DArrayS32FloatGrad,
208   TexUnified2DArrayU32S32,
209   TexUnified2DArrayU32Float,
210   TexUnified2DArrayU32FloatLevel,
211   TexUnified2DArrayU32FloatGrad,
212   TexUnified3DFloatS32,
213   TexUnified3DFloatFloat,
214   TexUnified3DFloatFloatLevel,
215   TexUnified3DFloatFloatGrad,
216   TexUnified3DS32S32,
217   TexUnified3DS32Float,
218   TexUnified3DS32FloatLevel,
219   TexUnified3DS32FloatGrad,
220   TexUnified3DU32S32,
221   TexUnified3DU32Float,
222   TexUnified3DU32FloatLevel,
223   TexUnified3DU32FloatGrad,
224   TexUnifiedCubeFloatFloat,
225   TexUnifiedCubeFloatFloatLevel,
226   TexUnifiedCubeS32Float,
227   TexUnifiedCubeS32FloatLevel,
228   TexUnifiedCubeU32Float,
229   TexUnifiedCubeU32FloatLevel,
230   TexUnifiedCubeArrayFloatFloat,
231   TexUnifiedCubeArrayFloatFloatLevel,
232   TexUnifiedCubeArrayS32Float,
233   TexUnifiedCubeArrayS32FloatLevel,
234   TexUnifiedCubeArrayU32Float,
235   TexUnifiedCubeArrayU32FloatLevel,
236   Tld4UnifiedR2DFloatFloat,
237   Tld4UnifiedG2DFloatFloat,
238   Tld4UnifiedB2DFloatFloat,
239   Tld4UnifiedA2DFloatFloat,
240   Tld4UnifiedR2DS64Float,
241   Tld4UnifiedG2DS64Float,
242   Tld4UnifiedB2DS64Float,
243   Tld4UnifiedA2DS64Float,
244   Tld4UnifiedR2DU64Float,
245   Tld4UnifiedG2DU64Float,
246   Tld4UnifiedB2DU64Float,
247   Tld4UnifiedA2DU64Float,
248 
249   // Surface intrinsics
250   Suld1DI8Clamp,
251   Suld1DI16Clamp,
252   Suld1DI32Clamp,
253   Suld1DI64Clamp,
254   Suld1DV2I8Clamp,
255   Suld1DV2I16Clamp,
256   Suld1DV2I32Clamp,
257   Suld1DV2I64Clamp,
258   Suld1DV4I8Clamp,
259   Suld1DV4I16Clamp,
260   Suld1DV4I32Clamp,
261 
262   Suld1DArrayI8Clamp,
263   Suld1DArrayI16Clamp,
264   Suld1DArrayI32Clamp,
265   Suld1DArrayI64Clamp,
266   Suld1DArrayV2I8Clamp,
267   Suld1DArrayV2I16Clamp,
268   Suld1DArrayV2I32Clamp,
269   Suld1DArrayV2I64Clamp,
270   Suld1DArrayV4I8Clamp,
271   Suld1DArrayV4I16Clamp,
272   Suld1DArrayV4I32Clamp,
273 
274   Suld2DI8Clamp,
275   Suld2DI16Clamp,
276   Suld2DI32Clamp,
277   Suld2DI64Clamp,
278   Suld2DV2I8Clamp,
279   Suld2DV2I16Clamp,
280   Suld2DV2I32Clamp,
281   Suld2DV2I64Clamp,
282   Suld2DV4I8Clamp,
283   Suld2DV4I16Clamp,
284   Suld2DV4I32Clamp,
285 
286   Suld2DArrayI8Clamp,
287   Suld2DArrayI16Clamp,
288   Suld2DArrayI32Clamp,
289   Suld2DArrayI64Clamp,
290   Suld2DArrayV2I8Clamp,
291   Suld2DArrayV2I16Clamp,
292   Suld2DArrayV2I32Clamp,
293   Suld2DArrayV2I64Clamp,
294   Suld2DArrayV4I8Clamp,
295   Suld2DArrayV4I16Clamp,
296   Suld2DArrayV4I32Clamp,
297 
298   Suld3DI8Clamp,
299   Suld3DI16Clamp,
300   Suld3DI32Clamp,
301   Suld3DI64Clamp,
302   Suld3DV2I8Clamp,
303   Suld3DV2I16Clamp,
304   Suld3DV2I32Clamp,
305   Suld3DV2I64Clamp,
306   Suld3DV4I8Clamp,
307   Suld3DV4I16Clamp,
308   Suld3DV4I32Clamp,
309 
310   Suld1DI8Trap,
311   Suld1DI16Trap,
312   Suld1DI32Trap,
313   Suld1DI64Trap,
314   Suld1DV2I8Trap,
315   Suld1DV2I16Trap,
316   Suld1DV2I32Trap,
317   Suld1DV2I64Trap,
318   Suld1DV4I8Trap,
319   Suld1DV4I16Trap,
320   Suld1DV4I32Trap,
321 
322   Suld1DArrayI8Trap,
323   Suld1DArrayI16Trap,
324   Suld1DArrayI32Trap,
325   Suld1DArrayI64Trap,
326   Suld1DArrayV2I8Trap,
327   Suld1DArrayV2I16Trap,
328   Suld1DArrayV2I32Trap,
329   Suld1DArrayV2I64Trap,
330   Suld1DArrayV4I8Trap,
331   Suld1DArrayV4I16Trap,
332   Suld1DArrayV4I32Trap,
333 
334   Suld2DI8Trap,
335   Suld2DI16Trap,
336   Suld2DI32Trap,
337   Suld2DI64Trap,
338   Suld2DV2I8Trap,
339   Suld2DV2I16Trap,
340   Suld2DV2I32Trap,
341   Suld2DV2I64Trap,
342   Suld2DV4I8Trap,
343   Suld2DV4I16Trap,
344   Suld2DV4I32Trap,
345 
346   Suld2DArrayI8Trap,
347   Suld2DArrayI16Trap,
348   Suld2DArrayI32Trap,
349   Suld2DArrayI64Trap,
350   Suld2DArrayV2I8Trap,
351   Suld2DArrayV2I16Trap,
352   Suld2DArrayV2I32Trap,
353   Suld2DArrayV2I64Trap,
354   Suld2DArrayV4I8Trap,
355   Suld2DArrayV4I16Trap,
356   Suld2DArrayV4I32Trap,
357 
358   Suld3DI8Trap,
359   Suld3DI16Trap,
360   Suld3DI32Trap,
361   Suld3DI64Trap,
362   Suld3DV2I8Trap,
363   Suld3DV2I16Trap,
364   Suld3DV2I32Trap,
365   Suld3DV2I64Trap,
366   Suld3DV4I8Trap,
367   Suld3DV4I16Trap,
368   Suld3DV4I32Trap,
369 
370   Suld1DI8Zero,
371   Suld1DI16Zero,
372   Suld1DI32Zero,
373   Suld1DI64Zero,
374   Suld1DV2I8Zero,
375   Suld1DV2I16Zero,
376   Suld1DV2I32Zero,
377   Suld1DV2I64Zero,
378   Suld1DV4I8Zero,
379   Suld1DV4I16Zero,
380   Suld1DV4I32Zero,
381 
382   Suld1DArrayI8Zero,
383   Suld1DArrayI16Zero,
384   Suld1DArrayI32Zero,
385   Suld1DArrayI64Zero,
386   Suld1DArrayV2I8Zero,
387   Suld1DArrayV2I16Zero,
388   Suld1DArrayV2I32Zero,
389   Suld1DArrayV2I64Zero,
390   Suld1DArrayV4I8Zero,
391   Suld1DArrayV4I16Zero,
392   Suld1DArrayV4I32Zero,
393 
394   Suld2DI8Zero,
395   Suld2DI16Zero,
396   Suld2DI32Zero,
397   Suld2DI64Zero,
398   Suld2DV2I8Zero,
399   Suld2DV2I16Zero,
400   Suld2DV2I32Zero,
401   Suld2DV2I64Zero,
402   Suld2DV4I8Zero,
403   Suld2DV4I16Zero,
404   Suld2DV4I32Zero,
405 
406   Suld2DArrayI8Zero,
407   Suld2DArrayI16Zero,
408   Suld2DArrayI32Zero,
409   Suld2DArrayI64Zero,
410   Suld2DArrayV2I8Zero,
411   Suld2DArrayV2I16Zero,
412   Suld2DArrayV2I32Zero,
413   Suld2DArrayV2I64Zero,
414   Suld2DArrayV4I8Zero,
415   Suld2DArrayV4I16Zero,
416   Suld2DArrayV4I32Zero,
417 
418   Suld3DI8Zero,
419   Suld3DI16Zero,
420   Suld3DI32Zero,
421   Suld3DI64Zero,
422   Suld3DV2I8Zero,
423   Suld3DV2I16Zero,
424   Suld3DV2I32Zero,
425   Suld3DV2I64Zero,
426   Suld3DV4I8Zero,
427   Suld3DV4I16Zero,
428   Suld3DV4I32Zero
429 };
430 }
431 
432 class NVPTXSubtarget;
433 
434 //===--------------------------------------------------------------------===//
435 // TargetLowering Implementation
436 //===--------------------------------------------------------------------===//
437 class NVPTXTargetLowering : public TargetLowering {
438 public:
439   explicit NVPTXTargetLowering(const NVPTXTargetMachine &TM,
440                                const NVPTXSubtarget &STI);
441   SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
442 
443   SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
444   SDValue LowerGlobalAddress(const GlobalValue *GV, int64_t Offset,
445                              SelectionDAG &DAG) const;
446 
447   const char *getTargetNodeName(unsigned Opcode) const override;
448 
449   bool isTypeSupportedInIntrinsic(MVT VT) const;
450 
451   bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
452                           unsigned Intrinsic) const override;
453 
454   /// isLegalAddressingMode - Return true if the addressing mode represented
455   /// by AM is legal for this target, for a load/store of the specified type
456   /// Used to guide target specific optimizations, like loop strength
457   /// reduction (LoopStrengthReduce.cpp) and memory optimization for
458   /// address mode (CodeGenPrepare.cpp)
459   bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
460 
461   /// getFunctionAlignment - Return the Log2 alignment of this function.
462   unsigned getFunctionAlignment(const Function *F) const;
463 
getSetCCResultType(LLVMContext & Ctx,EVT VT)464   EVT getSetCCResultType(LLVMContext &Ctx, EVT VT) const override {
465     if (VT.isVector())
466       return EVT::getVectorVT(Ctx, MVT::i1, VT.getVectorNumElements());
467     return MVT::i1;
468   }
469 
470   ConstraintType
471   getConstraintType(const std::string &Constraint) const override;
472   std::pair<unsigned, const TargetRegisterClass *>
473   getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
474                                const std::string &Constraint,
475                                MVT VT) const override;
476 
477   SDValue LowerFormalArguments(
478       SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
479       const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl, SelectionDAG &DAG,
480       SmallVectorImpl<SDValue> &InVals) const override;
481 
482   SDValue LowerCall(CallLoweringInfo &CLI,
483                     SmallVectorImpl<SDValue> &InVals) const override;
484 
485   std::string getPrototype(Type *, const ArgListTy &,
486                            const SmallVectorImpl<ISD::OutputArg> &,
487                            unsigned retAlignment,
488                            const ImmutableCallSite *CS) const;
489 
490   SDValue
491   LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
492               const SmallVectorImpl<ISD::OutputArg> &Outs,
493               const SmallVectorImpl<SDValue> &OutVals, SDLoc dl,
494               SelectionDAG &DAG) const override;
495 
496   void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
497                                     std::vector<SDValue> &Ops,
498                                     SelectionDAG &DAG) const override;
499 
getInlineAsmMemConstraint(const std::string & ConstraintCode)500   unsigned getInlineAsmMemConstraint(
501       const std::string &ConstraintCode) const override {
502     // FIXME: Map different constraints differently.
503     return InlineAsm::Constraint_m;
504   }
505 
506   const NVPTXTargetMachine *nvTM;
507 
508   // PTX always uses 32-bit shift amounts
getScalarShiftAmountTy(EVT LHSTy)509   MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; }
510 
511   TargetLoweringBase::LegalizeTypeAction
512   getPreferredVectorAction(EVT VT) const override;
513 
514   bool allowFMA(MachineFunction &MF, CodeGenOpt::Level OptLevel) const;
515 
isFMAFasterThanFMulAndFAdd(EVT)516   bool isFMAFasterThanFMulAndFAdd(EVT) const override { return true; }
517 
enableAggressiveFMAFusion(EVT VT)518   bool enableAggressiveFMAFusion(EVT VT) const override { return true; }
519 
520 private:
521   const NVPTXSubtarget &STI; // cache the subtarget here
522 
523   SDValue getExtSymb(SelectionDAG &DAG, const char *name, int idx,
524                      EVT = MVT::i32) const;
525   SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const;
526   SDValue getParamHelpSymbol(SelectionDAG &DAG, int idx);
527 
528   SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
529 
530   SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
531   SDValue LowerLOADi1(SDValue Op, SelectionDAG &DAG) const;
532 
533   SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
534   SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const;
535   SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const;
536 
537   SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
538   SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
539 
540   SDValue LowerSelect(SDValue Op, SelectionDAG &DAG) const;
541 
542   void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
543                           SelectionDAG &DAG) const override;
544   SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
545 
546   unsigned getArgumentAlignment(SDValue Callee, const ImmutableCallSite *CS,
547                                 Type *Ty, unsigned Idx) const;
548 };
549 } // namespace llvm
550 
551 #endif
552