1 //===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines an instruction selector for the NVPTX target.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "NVPTXISelDAGToDAG.h"
15 #include "NVPTXUtilities.h"
16 #include "llvm/Analysis/ValueTracking.h"
17 #include "llvm/IR/GlobalValue.h"
18 #include "llvm/IR/Instructions.h"
19 #include "llvm/Support/CommandLine.h"
20 #include "llvm/Support/Debug.h"
21 #include "llvm/Support/ErrorHandling.h"
22 #include "llvm/Support/raw_ostream.h"
23 #include "llvm/Target/TargetIntrinsicInfo.h"
24 
25 using namespace llvm;
26 
27 #define DEBUG_TYPE "nvptx-isel"
28 
29 static cl::opt<int> UsePrecDivF32(
30     "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden,
31     cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
32              " IEEE Compliant F32 div.rnd if available."),
33     cl::init(2));
34 
35 static cl::opt<bool>
36 UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::Hidden,
37           cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
38           cl::init(true));
39 
40 static cl::opt<bool>
41 FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden,
42            cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."),
43            cl::init(false));
44 
45 
46 /// createNVPTXISelDag - This pass converts a legalized DAG into a
47 /// NVPTX-specific DAG, ready for instruction scheduling.
createNVPTXISelDag(NVPTXTargetMachine & TM,llvm::CodeGenOpt::Level OptLevel)48 FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
49                                        llvm::CodeGenOpt::Level OptLevel) {
50   return new NVPTXDAGToDAGISel(TM, OptLevel);
51 }
52 
NVPTXDAGToDAGISel(NVPTXTargetMachine & tm,CodeGenOpt::Level OptLevel)53 NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
54                                      CodeGenOpt::Level OptLevel)
55     : SelectionDAGISel(tm, OptLevel), TM(tm) {
56   doMulWide = (OptLevel > 0);
57 }
58 
runOnMachineFunction(MachineFunction & MF)59 bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
60     Subtarget = &static_cast<const NVPTXSubtarget &>(MF.getSubtarget());
61     return SelectionDAGISel::runOnMachineFunction(MF);
62 }
63 
getDivF32Level() const64 int NVPTXDAGToDAGISel::getDivF32Level() const {
65   if (UsePrecDivF32.getNumOccurrences() > 0) {
66     // If nvptx-prec-div32=N is used on the command-line, always honor it
67     return UsePrecDivF32;
68   } else {
69     // Otherwise, use div.approx if fast math is enabled
70     if (TM.Options.UnsafeFPMath)
71       return 0;
72     else
73       return 2;
74   }
75 }
76 
usePrecSqrtF32() const77 bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
78   if (UsePrecSqrtF32.getNumOccurrences() > 0) {
79     // If nvptx-prec-sqrtf32 is used on the command-line, always honor it
80     return UsePrecSqrtF32;
81   } else {
82     // Otherwise, use sqrt.approx if fast math is enabled
83     return !TM.Options.UnsafeFPMath;
84   }
85 }
86 
useF32FTZ() const87 bool NVPTXDAGToDAGISel::useF32FTZ() const {
88   if (FtzEnabled.getNumOccurrences() > 0) {
89     // If nvptx-f32ftz is used on the command-line, always honor it
90     return FtzEnabled;
91   } else {
92     const Function *F = MF->getFunction();
93     // Otherwise, check for an nvptx-f32ftz attribute on the function
94     if (F->hasFnAttribute("nvptx-f32ftz"))
95       return F->getFnAttribute("nvptx-f32ftz").getValueAsString() == "true";
96     else
97       return false;
98   }
99 }
100 
allowFMA() const101 bool NVPTXDAGToDAGISel::allowFMA() const {
102   const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
103   return TL->allowFMA(*MF, OptLevel);
104 }
105 
106 /// Select - Select instructions not customized! Used for
107 /// expanded, promoted and normal instructions.
Select(SDNode * N)108 SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
109 
110   if (N->isMachineOpcode()) {
111     N->setNodeId(-1);
112     return nullptr; // Already selected.
113   }
114 
115   SDNode *ResNode = nullptr;
116   switch (N->getOpcode()) {
117   case ISD::LOAD:
118     ResNode = SelectLoad(N);
119     break;
120   case ISD::STORE:
121     ResNode = SelectStore(N);
122     break;
123   case NVPTXISD::LoadV2:
124   case NVPTXISD::LoadV4:
125     ResNode = SelectLoadVector(N);
126     break;
127   case NVPTXISD::LDGV2:
128   case NVPTXISD::LDGV4:
129   case NVPTXISD::LDUV2:
130   case NVPTXISD::LDUV4:
131     ResNode = SelectLDGLDU(N);
132     break;
133   case NVPTXISD::StoreV2:
134   case NVPTXISD::StoreV4:
135     ResNode = SelectStoreVector(N);
136     break;
137   case NVPTXISD::LoadParam:
138   case NVPTXISD::LoadParamV2:
139   case NVPTXISD::LoadParamV4:
140     ResNode = SelectLoadParam(N);
141     break;
142   case NVPTXISD::StoreRetval:
143   case NVPTXISD::StoreRetvalV2:
144   case NVPTXISD::StoreRetvalV4:
145     ResNode = SelectStoreRetval(N);
146     break;
147   case NVPTXISD::StoreParam:
148   case NVPTXISD::StoreParamV2:
149   case NVPTXISD::StoreParamV4:
150   case NVPTXISD::StoreParamS32:
151   case NVPTXISD::StoreParamU32:
152     ResNode = SelectStoreParam(N);
153     break;
154   case ISD::INTRINSIC_WO_CHAIN:
155     ResNode = SelectIntrinsicNoChain(N);
156     break;
157   case ISD::INTRINSIC_W_CHAIN:
158     ResNode = SelectIntrinsicChain(N);
159     break;
160   case NVPTXISD::Tex1DFloatS32:
161   case NVPTXISD::Tex1DFloatFloat:
162   case NVPTXISD::Tex1DFloatFloatLevel:
163   case NVPTXISD::Tex1DFloatFloatGrad:
164   case NVPTXISD::Tex1DS32S32:
165   case NVPTXISD::Tex1DS32Float:
166   case NVPTXISD::Tex1DS32FloatLevel:
167   case NVPTXISD::Tex1DS32FloatGrad:
168   case NVPTXISD::Tex1DU32S32:
169   case NVPTXISD::Tex1DU32Float:
170   case NVPTXISD::Tex1DU32FloatLevel:
171   case NVPTXISD::Tex1DU32FloatGrad:
172   case NVPTXISD::Tex1DArrayFloatS32:
173   case NVPTXISD::Tex1DArrayFloatFloat:
174   case NVPTXISD::Tex1DArrayFloatFloatLevel:
175   case NVPTXISD::Tex1DArrayFloatFloatGrad:
176   case NVPTXISD::Tex1DArrayS32S32:
177   case NVPTXISD::Tex1DArrayS32Float:
178   case NVPTXISD::Tex1DArrayS32FloatLevel:
179   case NVPTXISD::Tex1DArrayS32FloatGrad:
180   case NVPTXISD::Tex1DArrayU32S32:
181   case NVPTXISD::Tex1DArrayU32Float:
182   case NVPTXISD::Tex1DArrayU32FloatLevel:
183   case NVPTXISD::Tex1DArrayU32FloatGrad:
184   case NVPTXISD::Tex2DFloatS32:
185   case NVPTXISD::Tex2DFloatFloat:
186   case NVPTXISD::Tex2DFloatFloatLevel:
187   case NVPTXISD::Tex2DFloatFloatGrad:
188   case NVPTXISD::Tex2DS32S32:
189   case NVPTXISD::Tex2DS32Float:
190   case NVPTXISD::Tex2DS32FloatLevel:
191   case NVPTXISD::Tex2DS32FloatGrad:
192   case NVPTXISD::Tex2DU32S32:
193   case NVPTXISD::Tex2DU32Float:
194   case NVPTXISD::Tex2DU32FloatLevel:
195   case NVPTXISD::Tex2DU32FloatGrad:
196   case NVPTXISD::Tex2DArrayFloatS32:
197   case NVPTXISD::Tex2DArrayFloatFloat:
198   case NVPTXISD::Tex2DArrayFloatFloatLevel:
199   case NVPTXISD::Tex2DArrayFloatFloatGrad:
200   case NVPTXISD::Tex2DArrayS32S32:
201   case NVPTXISD::Tex2DArrayS32Float:
202   case NVPTXISD::Tex2DArrayS32FloatLevel:
203   case NVPTXISD::Tex2DArrayS32FloatGrad:
204   case NVPTXISD::Tex2DArrayU32S32:
205   case NVPTXISD::Tex2DArrayU32Float:
206   case NVPTXISD::Tex2DArrayU32FloatLevel:
207   case NVPTXISD::Tex2DArrayU32FloatGrad:
208   case NVPTXISD::Tex3DFloatS32:
209   case NVPTXISD::Tex3DFloatFloat:
210   case NVPTXISD::Tex3DFloatFloatLevel:
211   case NVPTXISD::Tex3DFloatFloatGrad:
212   case NVPTXISD::Tex3DS32S32:
213   case NVPTXISD::Tex3DS32Float:
214   case NVPTXISD::Tex3DS32FloatLevel:
215   case NVPTXISD::Tex3DS32FloatGrad:
216   case NVPTXISD::Tex3DU32S32:
217   case NVPTXISD::Tex3DU32Float:
218   case NVPTXISD::Tex3DU32FloatLevel:
219   case NVPTXISD::Tex3DU32FloatGrad:
220   case NVPTXISD::TexCubeFloatFloat:
221   case NVPTXISD::TexCubeFloatFloatLevel:
222   case NVPTXISD::TexCubeS32Float:
223   case NVPTXISD::TexCubeS32FloatLevel:
224   case NVPTXISD::TexCubeU32Float:
225   case NVPTXISD::TexCubeU32FloatLevel:
226   case NVPTXISD::TexCubeArrayFloatFloat:
227   case NVPTXISD::TexCubeArrayFloatFloatLevel:
228   case NVPTXISD::TexCubeArrayS32Float:
229   case NVPTXISD::TexCubeArrayS32FloatLevel:
230   case NVPTXISD::TexCubeArrayU32Float:
231   case NVPTXISD::TexCubeArrayU32FloatLevel:
232   case NVPTXISD::Tld4R2DFloatFloat:
233   case NVPTXISD::Tld4G2DFloatFloat:
234   case NVPTXISD::Tld4B2DFloatFloat:
235   case NVPTXISD::Tld4A2DFloatFloat:
236   case NVPTXISD::Tld4R2DS64Float:
237   case NVPTXISD::Tld4G2DS64Float:
238   case NVPTXISD::Tld4B2DS64Float:
239   case NVPTXISD::Tld4A2DS64Float:
240   case NVPTXISD::Tld4R2DU64Float:
241   case NVPTXISD::Tld4G2DU64Float:
242   case NVPTXISD::Tld4B2DU64Float:
243   case NVPTXISD::Tld4A2DU64Float:
244   case NVPTXISD::TexUnified1DFloatS32:
245   case NVPTXISD::TexUnified1DFloatFloat:
246   case NVPTXISD::TexUnified1DFloatFloatLevel:
247   case NVPTXISD::TexUnified1DFloatFloatGrad:
248   case NVPTXISD::TexUnified1DS32S32:
249   case NVPTXISD::TexUnified1DS32Float:
250   case NVPTXISD::TexUnified1DS32FloatLevel:
251   case NVPTXISD::TexUnified1DS32FloatGrad:
252   case NVPTXISD::TexUnified1DU32S32:
253   case NVPTXISD::TexUnified1DU32Float:
254   case NVPTXISD::TexUnified1DU32FloatLevel:
255   case NVPTXISD::TexUnified1DU32FloatGrad:
256   case NVPTXISD::TexUnified1DArrayFloatS32:
257   case NVPTXISD::TexUnified1DArrayFloatFloat:
258   case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
259   case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
260   case NVPTXISD::TexUnified1DArrayS32S32:
261   case NVPTXISD::TexUnified1DArrayS32Float:
262   case NVPTXISD::TexUnified1DArrayS32FloatLevel:
263   case NVPTXISD::TexUnified1DArrayS32FloatGrad:
264   case NVPTXISD::TexUnified1DArrayU32S32:
265   case NVPTXISD::TexUnified1DArrayU32Float:
266   case NVPTXISD::TexUnified1DArrayU32FloatLevel:
267   case NVPTXISD::TexUnified1DArrayU32FloatGrad:
268   case NVPTXISD::TexUnified2DFloatS32:
269   case NVPTXISD::TexUnified2DFloatFloat:
270   case NVPTXISD::TexUnified2DFloatFloatLevel:
271   case NVPTXISD::TexUnified2DFloatFloatGrad:
272   case NVPTXISD::TexUnified2DS32S32:
273   case NVPTXISD::TexUnified2DS32Float:
274   case NVPTXISD::TexUnified2DS32FloatLevel:
275   case NVPTXISD::TexUnified2DS32FloatGrad:
276   case NVPTXISD::TexUnified2DU32S32:
277   case NVPTXISD::TexUnified2DU32Float:
278   case NVPTXISD::TexUnified2DU32FloatLevel:
279   case NVPTXISD::TexUnified2DU32FloatGrad:
280   case NVPTXISD::TexUnified2DArrayFloatS32:
281   case NVPTXISD::TexUnified2DArrayFloatFloat:
282   case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
283   case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
284   case NVPTXISD::TexUnified2DArrayS32S32:
285   case NVPTXISD::TexUnified2DArrayS32Float:
286   case NVPTXISD::TexUnified2DArrayS32FloatLevel:
287   case NVPTXISD::TexUnified2DArrayS32FloatGrad:
288   case NVPTXISD::TexUnified2DArrayU32S32:
289   case NVPTXISD::TexUnified2DArrayU32Float:
290   case NVPTXISD::TexUnified2DArrayU32FloatLevel:
291   case NVPTXISD::TexUnified2DArrayU32FloatGrad:
292   case NVPTXISD::TexUnified3DFloatS32:
293   case NVPTXISD::TexUnified3DFloatFloat:
294   case NVPTXISD::TexUnified3DFloatFloatLevel:
295   case NVPTXISD::TexUnified3DFloatFloatGrad:
296   case NVPTXISD::TexUnified3DS32S32:
297   case NVPTXISD::TexUnified3DS32Float:
298   case NVPTXISD::TexUnified3DS32FloatLevel:
299   case NVPTXISD::TexUnified3DS32FloatGrad:
300   case NVPTXISD::TexUnified3DU32S32:
301   case NVPTXISD::TexUnified3DU32Float:
302   case NVPTXISD::TexUnified3DU32FloatLevel:
303   case NVPTXISD::TexUnified3DU32FloatGrad:
304   case NVPTXISD::TexUnifiedCubeFloatFloat:
305   case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
306   case NVPTXISD::TexUnifiedCubeS32Float:
307   case NVPTXISD::TexUnifiedCubeS32FloatLevel:
308   case NVPTXISD::TexUnifiedCubeU32Float:
309   case NVPTXISD::TexUnifiedCubeU32FloatLevel:
310   case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
311   case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
312   case NVPTXISD::TexUnifiedCubeArrayS32Float:
313   case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
314   case NVPTXISD::TexUnifiedCubeArrayU32Float:
315   case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
316   case NVPTXISD::Tld4UnifiedR2DFloatFloat:
317   case NVPTXISD::Tld4UnifiedG2DFloatFloat:
318   case NVPTXISD::Tld4UnifiedB2DFloatFloat:
319   case NVPTXISD::Tld4UnifiedA2DFloatFloat:
320   case NVPTXISD::Tld4UnifiedR2DS64Float:
321   case NVPTXISD::Tld4UnifiedG2DS64Float:
322   case NVPTXISD::Tld4UnifiedB2DS64Float:
323   case NVPTXISD::Tld4UnifiedA2DS64Float:
324   case NVPTXISD::Tld4UnifiedR2DU64Float:
325   case NVPTXISD::Tld4UnifiedG2DU64Float:
326   case NVPTXISD::Tld4UnifiedB2DU64Float:
327   case NVPTXISD::Tld4UnifiedA2DU64Float:
328     ResNode = SelectTextureIntrinsic(N);
329     break;
330   case NVPTXISD::Suld1DI8Clamp:
331   case NVPTXISD::Suld1DI16Clamp:
332   case NVPTXISD::Suld1DI32Clamp:
333   case NVPTXISD::Suld1DI64Clamp:
334   case NVPTXISD::Suld1DV2I8Clamp:
335   case NVPTXISD::Suld1DV2I16Clamp:
336   case NVPTXISD::Suld1DV2I32Clamp:
337   case NVPTXISD::Suld1DV2I64Clamp:
338   case NVPTXISD::Suld1DV4I8Clamp:
339   case NVPTXISD::Suld1DV4I16Clamp:
340   case NVPTXISD::Suld1DV4I32Clamp:
341   case NVPTXISD::Suld1DArrayI8Clamp:
342   case NVPTXISD::Suld1DArrayI16Clamp:
343   case NVPTXISD::Suld1DArrayI32Clamp:
344   case NVPTXISD::Suld1DArrayI64Clamp:
345   case NVPTXISD::Suld1DArrayV2I8Clamp:
346   case NVPTXISD::Suld1DArrayV2I16Clamp:
347   case NVPTXISD::Suld1DArrayV2I32Clamp:
348   case NVPTXISD::Suld1DArrayV2I64Clamp:
349   case NVPTXISD::Suld1DArrayV4I8Clamp:
350   case NVPTXISD::Suld1DArrayV4I16Clamp:
351   case NVPTXISD::Suld1DArrayV4I32Clamp:
352   case NVPTXISD::Suld2DI8Clamp:
353   case NVPTXISD::Suld2DI16Clamp:
354   case NVPTXISD::Suld2DI32Clamp:
355   case NVPTXISD::Suld2DI64Clamp:
356   case NVPTXISD::Suld2DV2I8Clamp:
357   case NVPTXISD::Suld2DV2I16Clamp:
358   case NVPTXISD::Suld2DV2I32Clamp:
359   case NVPTXISD::Suld2DV2I64Clamp:
360   case NVPTXISD::Suld2DV4I8Clamp:
361   case NVPTXISD::Suld2DV4I16Clamp:
362   case NVPTXISD::Suld2DV4I32Clamp:
363   case NVPTXISD::Suld2DArrayI8Clamp:
364   case NVPTXISD::Suld2DArrayI16Clamp:
365   case NVPTXISD::Suld2DArrayI32Clamp:
366   case NVPTXISD::Suld2DArrayI64Clamp:
367   case NVPTXISD::Suld2DArrayV2I8Clamp:
368   case NVPTXISD::Suld2DArrayV2I16Clamp:
369   case NVPTXISD::Suld2DArrayV2I32Clamp:
370   case NVPTXISD::Suld2DArrayV2I64Clamp:
371   case NVPTXISD::Suld2DArrayV4I8Clamp:
372   case NVPTXISD::Suld2DArrayV4I16Clamp:
373   case NVPTXISD::Suld2DArrayV4I32Clamp:
374   case NVPTXISD::Suld3DI8Clamp:
375   case NVPTXISD::Suld3DI16Clamp:
376   case NVPTXISD::Suld3DI32Clamp:
377   case NVPTXISD::Suld3DI64Clamp:
378   case NVPTXISD::Suld3DV2I8Clamp:
379   case NVPTXISD::Suld3DV2I16Clamp:
380   case NVPTXISD::Suld3DV2I32Clamp:
381   case NVPTXISD::Suld3DV2I64Clamp:
382   case NVPTXISD::Suld3DV4I8Clamp:
383   case NVPTXISD::Suld3DV4I16Clamp:
384   case NVPTXISD::Suld3DV4I32Clamp:
385   case NVPTXISD::Suld1DI8Trap:
386   case NVPTXISD::Suld1DI16Trap:
387   case NVPTXISD::Suld1DI32Trap:
388   case NVPTXISD::Suld1DI64Trap:
389   case NVPTXISD::Suld1DV2I8Trap:
390   case NVPTXISD::Suld1DV2I16Trap:
391   case NVPTXISD::Suld1DV2I32Trap:
392   case NVPTXISD::Suld1DV2I64Trap:
393   case NVPTXISD::Suld1DV4I8Trap:
394   case NVPTXISD::Suld1DV4I16Trap:
395   case NVPTXISD::Suld1DV4I32Trap:
396   case NVPTXISD::Suld1DArrayI8Trap:
397   case NVPTXISD::Suld1DArrayI16Trap:
398   case NVPTXISD::Suld1DArrayI32Trap:
399   case NVPTXISD::Suld1DArrayI64Trap:
400   case NVPTXISD::Suld1DArrayV2I8Trap:
401   case NVPTXISD::Suld1DArrayV2I16Trap:
402   case NVPTXISD::Suld1DArrayV2I32Trap:
403   case NVPTXISD::Suld1DArrayV2I64Trap:
404   case NVPTXISD::Suld1DArrayV4I8Trap:
405   case NVPTXISD::Suld1DArrayV4I16Trap:
406   case NVPTXISD::Suld1DArrayV4I32Trap:
407   case NVPTXISD::Suld2DI8Trap:
408   case NVPTXISD::Suld2DI16Trap:
409   case NVPTXISD::Suld2DI32Trap:
410   case NVPTXISD::Suld2DI64Trap:
411   case NVPTXISD::Suld2DV2I8Trap:
412   case NVPTXISD::Suld2DV2I16Trap:
413   case NVPTXISD::Suld2DV2I32Trap:
414   case NVPTXISD::Suld2DV2I64Trap:
415   case NVPTXISD::Suld2DV4I8Trap:
416   case NVPTXISD::Suld2DV4I16Trap:
417   case NVPTXISD::Suld2DV4I32Trap:
418   case NVPTXISD::Suld2DArrayI8Trap:
419   case NVPTXISD::Suld2DArrayI16Trap:
420   case NVPTXISD::Suld2DArrayI32Trap:
421   case NVPTXISD::Suld2DArrayI64Trap:
422   case NVPTXISD::Suld2DArrayV2I8Trap:
423   case NVPTXISD::Suld2DArrayV2I16Trap:
424   case NVPTXISD::Suld2DArrayV2I32Trap:
425   case NVPTXISD::Suld2DArrayV2I64Trap:
426   case NVPTXISD::Suld2DArrayV4I8Trap:
427   case NVPTXISD::Suld2DArrayV4I16Trap:
428   case NVPTXISD::Suld2DArrayV4I32Trap:
429   case NVPTXISD::Suld3DI8Trap:
430   case NVPTXISD::Suld3DI16Trap:
431   case NVPTXISD::Suld3DI32Trap:
432   case NVPTXISD::Suld3DI64Trap:
433   case NVPTXISD::Suld3DV2I8Trap:
434   case NVPTXISD::Suld3DV2I16Trap:
435   case NVPTXISD::Suld3DV2I32Trap:
436   case NVPTXISD::Suld3DV2I64Trap:
437   case NVPTXISD::Suld3DV4I8Trap:
438   case NVPTXISD::Suld3DV4I16Trap:
439   case NVPTXISD::Suld3DV4I32Trap:
440   case NVPTXISD::Suld1DI8Zero:
441   case NVPTXISD::Suld1DI16Zero:
442   case NVPTXISD::Suld1DI32Zero:
443   case NVPTXISD::Suld1DI64Zero:
444   case NVPTXISD::Suld1DV2I8Zero:
445   case NVPTXISD::Suld1DV2I16Zero:
446   case NVPTXISD::Suld1DV2I32Zero:
447   case NVPTXISD::Suld1DV2I64Zero:
448   case NVPTXISD::Suld1DV4I8Zero:
449   case NVPTXISD::Suld1DV4I16Zero:
450   case NVPTXISD::Suld1DV4I32Zero:
451   case NVPTXISD::Suld1DArrayI8Zero:
452   case NVPTXISD::Suld1DArrayI16Zero:
453   case NVPTXISD::Suld1DArrayI32Zero:
454   case NVPTXISD::Suld1DArrayI64Zero:
455   case NVPTXISD::Suld1DArrayV2I8Zero:
456   case NVPTXISD::Suld1DArrayV2I16Zero:
457   case NVPTXISD::Suld1DArrayV2I32Zero:
458   case NVPTXISD::Suld1DArrayV2I64Zero:
459   case NVPTXISD::Suld1DArrayV4I8Zero:
460   case NVPTXISD::Suld1DArrayV4I16Zero:
461   case NVPTXISD::Suld1DArrayV4I32Zero:
462   case NVPTXISD::Suld2DI8Zero:
463   case NVPTXISD::Suld2DI16Zero:
464   case NVPTXISD::Suld2DI32Zero:
465   case NVPTXISD::Suld2DI64Zero:
466   case NVPTXISD::Suld2DV2I8Zero:
467   case NVPTXISD::Suld2DV2I16Zero:
468   case NVPTXISD::Suld2DV2I32Zero:
469   case NVPTXISD::Suld2DV2I64Zero:
470   case NVPTXISD::Suld2DV4I8Zero:
471   case NVPTXISD::Suld2DV4I16Zero:
472   case NVPTXISD::Suld2DV4I32Zero:
473   case NVPTXISD::Suld2DArrayI8Zero:
474   case NVPTXISD::Suld2DArrayI16Zero:
475   case NVPTXISD::Suld2DArrayI32Zero:
476   case NVPTXISD::Suld2DArrayI64Zero:
477   case NVPTXISD::Suld2DArrayV2I8Zero:
478   case NVPTXISD::Suld2DArrayV2I16Zero:
479   case NVPTXISD::Suld2DArrayV2I32Zero:
480   case NVPTXISD::Suld2DArrayV2I64Zero:
481   case NVPTXISD::Suld2DArrayV4I8Zero:
482   case NVPTXISD::Suld2DArrayV4I16Zero:
483   case NVPTXISD::Suld2DArrayV4I32Zero:
484   case NVPTXISD::Suld3DI8Zero:
485   case NVPTXISD::Suld3DI16Zero:
486   case NVPTXISD::Suld3DI32Zero:
487   case NVPTXISD::Suld3DI64Zero:
488   case NVPTXISD::Suld3DV2I8Zero:
489   case NVPTXISD::Suld3DV2I16Zero:
490   case NVPTXISD::Suld3DV2I32Zero:
491   case NVPTXISD::Suld3DV2I64Zero:
492   case NVPTXISD::Suld3DV4I8Zero:
493   case NVPTXISD::Suld3DV4I16Zero:
494   case NVPTXISD::Suld3DV4I32Zero:
495     ResNode = SelectSurfaceIntrinsic(N);
496     break;
497   case ISD::AND:
498   case ISD::SRA:
499   case ISD::SRL:
500     // Try to select BFE
501     ResNode = SelectBFE(N);
502     break;
503   case ISD::ADDRSPACECAST:
504     ResNode = SelectAddrSpaceCast(N);
505     break;
506   default:
507     break;
508   }
509   if (ResNode)
510     return ResNode;
511   return SelectCode(N);
512 }
513 
SelectIntrinsicChain(SDNode * N)514 SDNode *NVPTXDAGToDAGISel::SelectIntrinsicChain(SDNode *N) {
515   unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
516   switch (IID) {
517   default:
518     return NULL;
519   case Intrinsic::nvvm_ldg_global_f:
520   case Intrinsic::nvvm_ldg_global_i:
521   case Intrinsic::nvvm_ldg_global_p:
522   case Intrinsic::nvvm_ldu_global_f:
523   case Intrinsic::nvvm_ldu_global_i:
524   case Intrinsic::nvvm_ldu_global_p:
525     return SelectLDGLDU(N);
526   }
527 }
528 
getCodeAddrSpace(MemSDNode * N)529 static unsigned int getCodeAddrSpace(MemSDNode *N) {
530   const Value *Src = N->getMemOperand()->getValue();
531 
532   if (!Src)
533     return NVPTX::PTXLdStInstCode::GENERIC;
534 
535   if (auto *PT = dyn_cast<PointerType>(Src->getType())) {
536     switch (PT->getAddressSpace()) {
537     case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
538     case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
539     case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
540     case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
541     case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
542     case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT;
543     default: break;
544     }
545   }
546   return NVPTX::PTXLdStInstCode::GENERIC;
547 }
548 
canLowerToLDG(MemSDNode * N,const NVPTXSubtarget & Subtarget,unsigned CodeAddrSpace,MachineFunction * F)549 static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
550                           unsigned CodeAddrSpace, MachineFunction *F) {
551   // To use non-coherent caching, the load has to be from global
552   // memory and we have to prove that the memory area is not written
553   // to anywhere for the duration of the kernel call, not even after
554   // the load.
555   //
556   // To ensure that there are no writes to the memory, we require the
557   // underlying pointer to be a noalias (__restrict) kernel parameter
558   // that is never used for a write. We can only do this for kernel
559   // functions since from within a device function, we cannot know if
560   // there were or will be writes to the memory from the caller - or we
561   // could, but then we would have to do inter-procedural analysis.
562   if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL ||
563       !isKernelFunction(*F->getFunction())) {
564     return false;
565   }
566 
567   // We use GetUnderlyingObjects() here instead of
568   // GetUnderlyingObject() mainly because the former looks through phi
569   // nodes while the latter does not. We need to look through phi
570   // nodes to handle pointer induction variables.
571   SmallVector<Value *, 8> Objs;
572   GetUnderlyingObjects(const_cast<Value *>(N->getMemOperand()->getValue()),
573                        Objs, F->getDataLayout());
574   for (Value *Obj : Objs) {
575     auto *A = dyn_cast<const Argument>(Obj);
576     if (!A || !A->onlyReadsMemory() || !A->hasNoAliasAttr()) return false;
577   }
578 
579   return true;
580 }
581 
SelectIntrinsicNoChain(SDNode * N)582 SDNode *NVPTXDAGToDAGISel::SelectIntrinsicNoChain(SDNode *N) {
583   unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
584   switch (IID) {
585   default:
586     return nullptr;
587   case Intrinsic::nvvm_texsurf_handle_internal:
588     return SelectTexSurfHandle(N);
589   }
590 }
591 
SelectTexSurfHandle(SDNode * N)592 SDNode *NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
593   // Op 0 is the intrinsic ID
594   SDValue Wrapper = N->getOperand(1);
595   SDValue GlobalVal = Wrapper.getOperand(0);
596   return CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N), MVT::i64,
597                                 GlobalVal);
598 }
599 
SelectAddrSpaceCast(SDNode * N)600 SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
601   SDValue Src = N->getOperand(0);
602   AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
603   unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
604   unsigned DstAddrSpace = CastN->getDestAddressSpace();
605 
606   assert(SrcAddrSpace != DstAddrSpace &&
607          "addrspacecast must be between different address spaces");
608 
609   if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
610     // Specific to generic
611     unsigned Opc;
612     switch (SrcAddrSpace) {
613     default: report_fatal_error("Bad address space in addrspacecast");
614     case ADDRESS_SPACE_GLOBAL:
615       Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes;
616       break;
617     case ADDRESS_SPACE_SHARED:
618       Opc = TM.is64Bit() ? NVPTX::cvta_shared_yes_64 : NVPTX::cvta_shared_yes;
619       break;
620     case ADDRESS_SPACE_CONST:
621       Opc = TM.is64Bit() ? NVPTX::cvta_const_yes_64 : NVPTX::cvta_const_yes;
622       break;
623     case ADDRESS_SPACE_LOCAL:
624       Opc = TM.is64Bit() ? NVPTX::cvta_local_yes_64 : NVPTX::cvta_local_yes;
625       break;
626     }
627     return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
628   } else {
629     // Generic to specific
630     if (SrcAddrSpace != 0)
631       report_fatal_error("Cannot cast between two non-generic address spaces");
632     unsigned Opc;
633     switch (DstAddrSpace) {
634     default: report_fatal_error("Bad address space in addrspacecast");
635     case ADDRESS_SPACE_GLOBAL:
636       Opc = TM.is64Bit() ? NVPTX::cvta_to_global_yes_64
637                          : NVPTX::cvta_to_global_yes;
638       break;
639     case ADDRESS_SPACE_SHARED:
640       Opc = TM.is64Bit() ? NVPTX::cvta_to_shared_yes_64
641                          : NVPTX::cvta_to_shared_yes;
642       break;
643     case ADDRESS_SPACE_CONST:
644       Opc =
645           TM.is64Bit() ? NVPTX::cvta_to_const_yes_64 : NVPTX::cvta_to_const_yes;
646       break;
647     case ADDRESS_SPACE_LOCAL:
648       Opc =
649           TM.is64Bit() ? NVPTX::cvta_to_local_yes_64 : NVPTX::cvta_to_local_yes;
650       break;
651     case ADDRESS_SPACE_PARAM:
652       Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
653                          : NVPTX::nvvm_ptr_gen_to_param;
654       break;
655     }
656     return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
657   }
658 }
659 
SelectLoad(SDNode * N)660 SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
661   SDLoc dl(N);
662   LoadSDNode *LD = cast<LoadSDNode>(N);
663   EVT LoadedVT = LD->getMemoryVT();
664   SDNode *NVPTXLD = nullptr;
665 
666   // do not support pre/post inc/dec
667   if (LD->isIndexed())
668     return nullptr;
669 
670   if (!LoadedVT.isSimple())
671     return nullptr;
672 
673   // Address Space Setting
674   unsigned int codeAddrSpace = getCodeAddrSpace(LD);
675 
676   if (canLowerToLDG(LD, *Subtarget, codeAddrSpace, MF)) {
677     return SelectLDGLDU(N);
678   }
679 
680   // Volatile Setting
681   // - .volatile is only availalble for .global and .shared
682   bool isVolatile = LD->isVolatile();
683   if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
684       codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
685       codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
686     isVolatile = false;
687 
688   // Vector Setting
689   MVT SimpleVT = LoadedVT.getSimpleVT();
690   unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
691   if (SimpleVT.isVector()) {
692     unsigned num = SimpleVT.getVectorNumElements();
693     if (num == 2)
694       vecType = NVPTX::PTXLdStInstCode::V2;
695     else if (num == 4)
696       vecType = NVPTX::PTXLdStInstCode::V4;
697     else
698       return nullptr;
699   }
700 
701   // Type Setting: fromType + fromTypeWidth
702   //
703   // Sign   : ISD::SEXTLOAD
704   // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
705   //          type is integer
706   // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
707   MVT ScalarVT = SimpleVT.getScalarType();
708   // Read at least 8 bits (predicates are stored as 8-bit values)
709   unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
710   unsigned int fromType;
711   if ((LD->getExtensionType() == ISD::SEXTLOAD))
712     fromType = NVPTX::PTXLdStInstCode::Signed;
713   else if (ScalarVT.isFloatingPoint())
714     fromType = NVPTX::PTXLdStInstCode::Float;
715   else
716     fromType = NVPTX::PTXLdStInstCode::Unsigned;
717 
718   // Create the machine instruction DAG
719   SDValue Chain = N->getOperand(0);
720   SDValue N1 = N->getOperand(1);
721   SDValue Addr;
722   SDValue Offset, Base;
723   unsigned Opcode;
724   MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
725 
726   if (SelectDirectAddr(N1, Addr)) {
727     switch (TargetVT) {
728     case MVT::i8:
729       Opcode = NVPTX::LD_i8_avar;
730       break;
731     case MVT::i16:
732       Opcode = NVPTX::LD_i16_avar;
733       break;
734     case MVT::i32:
735       Opcode = NVPTX::LD_i32_avar;
736       break;
737     case MVT::i64:
738       Opcode = NVPTX::LD_i64_avar;
739       break;
740     case MVT::f32:
741       Opcode = NVPTX::LD_f32_avar;
742       break;
743     case MVT::f64:
744       Opcode = NVPTX::LD_f64_avar;
745       break;
746     default:
747       return nullptr;
748     }
749     SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
750                       getI32Imm(vecType, dl), getI32Imm(fromType, dl),
751                       getI32Imm(fromTypeWidth, dl), Addr, Chain };
752     NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
753   } else if (TM.is64Bit() ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
754                           : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
755     switch (TargetVT) {
756     case MVT::i8:
757       Opcode = NVPTX::LD_i8_asi;
758       break;
759     case MVT::i16:
760       Opcode = NVPTX::LD_i16_asi;
761       break;
762     case MVT::i32:
763       Opcode = NVPTX::LD_i32_asi;
764       break;
765     case MVT::i64:
766       Opcode = NVPTX::LD_i64_asi;
767       break;
768     case MVT::f32:
769       Opcode = NVPTX::LD_f32_asi;
770       break;
771     case MVT::f64:
772       Opcode = NVPTX::LD_f64_asi;
773       break;
774     default:
775       return nullptr;
776     }
777     SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
778                       getI32Imm(vecType, dl), getI32Imm(fromType, dl),
779                       getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
780     NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
781   } else if (TM.is64Bit() ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
782                           : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
783     if (TM.is64Bit()) {
784       switch (TargetVT) {
785       case MVT::i8:
786         Opcode = NVPTX::LD_i8_ari_64;
787         break;
788       case MVT::i16:
789         Opcode = NVPTX::LD_i16_ari_64;
790         break;
791       case MVT::i32:
792         Opcode = NVPTX::LD_i32_ari_64;
793         break;
794       case MVT::i64:
795         Opcode = NVPTX::LD_i64_ari_64;
796         break;
797       case MVT::f32:
798         Opcode = NVPTX::LD_f32_ari_64;
799         break;
800       case MVT::f64:
801         Opcode = NVPTX::LD_f64_ari_64;
802         break;
803       default:
804         return nullptr;
805       }
806     } else {
807       switch (TargetVT) {
808       case MVT::i8:
809         Opcode = NVPTX::LD_i8_ari;
810         break;
811       case MVT::i16:
812         Opcode = NVPTX::LD_i16_ari;
813         break;
814       case MVT::i32:
815         Opcode = NVPTX::LD_i32_ari;
816         break;
817       case MVT::i64:
818         Opcode = NVPTX::LD_i64_ari;
819         break;
820       case MVT::f32:
821         Opcode = NVPTX::LD_f32_ari;
822         break;
823       case MVT::f64:
824         Opcode = NVPTX::LD_f64_ari;
825         break;
826       default:
827         return nullptr;
828       }
829     }
830     SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
831                       getI32Imm(vecType, dl), getI32Imm(fromType, dl),
832                       getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
833     NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
834   } else {
835     if (TM.is64Bit()) {
836       switch (TargetVT) {
837       case MVT::i8:
838         Opcode = NVPTX::LD_i8_areg_64;
839         break;
840       case MVT::i16:
841         Opcode = NVPTX::LD_i16_areg_64;
842         break;
843       case MVT::i32:
844         Opcode = NVPTX::LD_i32_areg_64;
845         break;
846       case MVT::i64:
847         Opcode = NVPTX::LD_i64_areg_64;
848         break;
849       case MVT::f32:
850         Opcode = NVPTX::LD_f32_areg_64;
851         break;
852       case MVT::f64:
853         Opcode = NVPTX::LD_f64_areg_64;
854         break;
855       default:
856         return nullptr;
857       }
858     } else {
859       switch (TargetVT) {
860       case MVT::i8:
861         Opcode = NVPTX::LD_i8_areg;
862         break;
863       case MVT::i16:
864         Opcode = NVPTX::LD_i16_areg;
865         break;
866       case MVT::i32:
867         Opcode = NVPTX::LD_i32_areg;
868         break;
869       case MVT::i64:
870         Opcode = NVPTX::LD_i64_areg;
871         break;
872       case MVT::f32:
873         Opcode = NVPTX::LD_f32_areg;
874         break;
875       case MVT::f64:
876         Opcode = NVPTX::LD_f64_areg;
877         break;
878       default:
879         return nullptr;
880       }
881     }
882     SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
883                       getI32Imm(vecType, dl), getI32Imm(fromType, dl),
884                       getI32Imm(fromTypeWidth, dl), N1, Chain };
885     NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
886   }
887 
888   if (NVPTXLD) {
889     MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
890     MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
891     cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
892   }
893 
894   return NVPTXLD;
895 }
896 
SelectLoadVector(SDNode * N)897 SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
898 
899   SDValue Chain = N->getOperand(0);
900   SDValue Op1 = N->getOperand(1);
901   SDValue Addr, Offset, Base;
902   unsigned Opcode;
903   SDLoc DL(N);
904   SDNode *LD;
905   MemSDNode *MemSD = cast<MemSDNode>(N);
906   EVT LoadedVT = MemSD->getMemoryVT();
907 
908   if (!LoadedVT.isSimple())
909     return nullptr;
910 
911   // Address Space Setting
912   unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
913 
914   if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) {
915     return SelectLDGLDU(N);
916   }
917 
918   // Volatile Setting
919   // - .volatile is only availalble for .global and .shared
920   bool IsVolatile = MemSD->isVolatile();
921   if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
922       CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
923       CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
924     IsVolatile = false;
925 
926   // Vector Setting
927   MVT SimpleVT = LoadedVT.getSimpleVT();
928 
929   // Type Setting: fromType + fromTypeWidth
930   //
931   // Sign   : ISD::SEXTLOAD
932   // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
933   //          type is integer
934   // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
935   MVT ScalarVT = SimpleVT.getScalarType();
936   // Read at least 8 bits (predicates are stored as 8-bit values)
937   unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
938   unsigned int FromType;
939   // The last operand holds the original LoadSDNode::getExtensionType() value
940   unsigned ExtensionType = cast<ConstantSDNode>(
941       N->getOperand(N->getNumOperands() - 1))->getZExtValue();
942   if (ExtensionType == ISD::SEXTLOAD)
943     FromType = NVPTX::PTXLdStInstCode::Signed;
944   else if (ScalarVT.isFloatingPoint())
945     FromType = NVPTX::PTXLdStInstCode::Float;
946   else
947     FromType = NVPTX::PTXLdStInstCode::Unsigned;
948 
949   unsigned VecType;
950 
951   switch (N->getOpcode()) {
952   case NVPTXISD::LoadV2:
953     VecType = NVPTX::PTXLdStInstCode::V2;
954     break;
955   case NVPTXISD::LoadV4:
956     VecType = NVPTX::PTXLdStInstCode::V4;
957     break;
958   default:
959     return nullptr;
960   }
961 
962   EVT EltVT = N->getValueType(0);
963 
964   if (SelectDirectAddr(Op1, Addr)) {
965     switch (N->getOpcode()) {
966     default:
967       return nullptr;
968     case NVPTXISD::LoadV2:
969       switch (EltVT.getSimpleVT().SimpleTy) {
970       default:
971         return nullptr;
972       case MVT::i8:
973         Opcode = NVPTX::LDV_i8_v2_avar;
974         break;
975       case MVT::i16:
976         Opcode = NVPTX::LDV_i16_v2_avar;
977         break;
978       case MVT::i32:
979         Opcode = NVPTX::LDV_i32_v2_avar;
980         break;
981       case MVT::i64:
982         Opcode = NVPTX::LDV_i64_v2_avar;
983         break;
984       case MVT::f32:
985         Opcode = NVPTX::LDV_f32_v2_avar;
986         break;
987       case MVT::f64:
988         Opcode = NVPTX::LDV_f64_v2_avar;
989         break;
990       }
991       break;
992     case NVPTXISD::LoadV4:
993       switch (EltVT.getSimpleVT().SimpleTy) {
994       default:
995         return nullptr;
996       case MVT::i8:
997         Opcode = NVPTX::LDV_i8_v4_avar;
998         break;
999       case MVT::i16:
1000         Opcode = NVPTX::LDV_i16_v4_avar;
1001         break;
1002       case MVT::i32:
1003         Opcode = NVPTX::LDV_i32_v4_avar;
1004         break;
1005       case MVT::f32:
1006         Opcode = NVPTX::LDV_f32_v4_avar;
1007         break;
1008       }
1009       break;
1010     }
1011 
1012     SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1013                       getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1014                       getI32Imm(FromTypeWidth, DL), Addr, Chain };
1015     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1016   } else if (TM.is64Bit() ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
1017                           : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
1018     switch (N->getOpcode()) {
1019     default:
1020       return nullptr;
1021     case NVPTXISD::LoadV2:
1022       switch (EltVT.getSimpleVT().SimpleTy) {
1023       default:
1024         return nullptr;
1025       case MVT::i8:
1026         Opcode = NVPTX::LDV_i8_v2_asi;
1027         break;
1028       case MVT::i16:
1029         Opcode = NVPTX::LDV_i16_v2_asi;
1030         break;
1031       case MVT::i32:
1032         Opcode = NVPTX::LDV_i32_v2_asi;
1033         break;
1034       case MVT::i64:
1035         Opcode = NVPTX::LDV_i64_v2_asi;
1036         break;
1037       case MVT::f32:
1038         Opcode = NVPTX::LDV_f32_v2_asi;
1039         break;
1040       case MVT::f64:
1041         Opcode = NVPTX::LDV_f64_v2_asi;
1042         break;
1043       }
1044       break;
1045     case NVPTXISD::LoadV4:
1046       switch (EltVT.getSimpleVT().SimpleTy) {
1047       default:
1048         return nullptr;
1049       case MVT::i8:
1050         Opcode = NVPTX::LDV_i8_v4_asi;
1051         break;
1052       case MVT::i16:
1053         Opcode = NVPTX::LDV_i16_v4_asi;
1054         break;
1055       case MVT::i32:
1056         Opcode = NVPTX::LDV_i32_v4_asi;
1057         break;
1058       case MVT::f32:
1059         Opcode = NVPTX::LDV_f32_v4_asi;
1060         break;
1061       }
1062       break;
1063     }
1064 
1065     SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1066                       getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1067                       getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
1068     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1069   } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1070                           : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1071     if (TM.is64Bit()) {
1072       switch (N->getOpcode()) {
1073       default:
1074         return nullptr;
1075       case NVPTXISD::LoadV2:
1076         switch (EltVT.getSimpleVT().SimpleTy) {
1077         default:
1078           return nullptr;
1079         case MVT::i8:
1080           Opcode = NVPTX::LDV_i8_v2_ari_64;
1081           break;
1082         case MVT::i16:
1083           Opcode = NVPTX::LDV_i16_v2_ari_64;
1084           break;
1085         case MVT::i32:
1086           Opcode = NVPTX::LDV_i32_v2_ari_64;
1087           break;
1088         case MVT::i64:
1089           Opcode = NVPTX::LDV_i64_v2_ari_64;
1090           break;
1091         case MVT::f32:
1092           Opcode = NVPTX::LDV_f32_v2_ari_64;
1093           break;
1094         case MVT::f64:
1095           Opcode = NVPTX::LDV_f64_v2_ari_64;
1096           break;
1097         }
1098         break;
1099       case NVPTXISD::LoadV4:
1100         switch (EltVT.getSimpleVT().SimpleTy) {
1101         default:
1102           return nullptr;
1103         case MVT::i8:
1104           Opcode = NVPTX::LDV_i8_v4_ari_64;
1105           break;
1106         case MVT::i16:
1107           Opcode = NVPTX::LDV_i16_v4_ari_64;
1108           break;
1109         case MVT::i32:
1110           Opcode = NVPTX::LDV_i32_v4_ari_64;
1111           break;
1112         case MVT::f32:
1113           Opcode = NVPTX::LDV_f32_v4_ari_64;
1114           break;
1115         }
1116         break;
1117       }
1118     } else {
1119       switch (N->getOpcode()) {
1120       default:
1121         return nullptr;
1122       case NVPTXISD::LoadV2:
1123         switch (EltVT.getSimpleVT().SimpleTy) {
1124         default:
1125           return nullptr;
1126         case MVT::i8:
1127           Opcode = NVPTX::LDV_i8_v2_ari;
1128           break;
1129         case MVT::i16:
1130           Opcode = NVPTX::LDV_i16_v2_ari;
1131           break;
1132         case MVT::i32:
1133           Opcode = NVPTX::LDV_i32_v2_ari;
1134           break;
1135         case MVT::i64:
1136           Opcode = NVPTX::LDV_i64_v2_ari;
1137           break;
1138         case MVT::f32:
1139           Opcode = NVPTX::LDV_f32_v2_ari;
1140           break;
1141         case MVT::f64:
1142           Opcode = NVPTX::LDV_f64_v2_ari;
1143           break;
1144         }
1145         break;
1146       case NVPTXISD::LoadV4:
1147         switch (EltVT.getSimpleVT().SimpleTy) {
1148         default:
1149           return nullptr;
1150         case MVT::i8:
1151           Opcode = NVPTX::LDV_i8_v4_ari;
1152           break;
1153         case MVT::i16:
1154           Opcode = NVPTX::LDV_i16_v4_ari;
1155           break;
1156         case MVT::i32:
1157           Opcode = NVPTX::LDV_i32_v4_ari;
1158           break;
1159         case MVT::f32:
1160           Opcode = NVPTX::LDV_f32_v4_ari;
1161           break;
1162         }
1163         break;
1164       }
1165     }
1166 
1167     SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1168                       getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1169                       getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
1170 
1171     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1172   } else {
1173     if (TM.is64Bit()) {
1174       switch (N->getOpcode()) {
1175       default:
1176         return nullptr;
1177       case NVPTXISD::LoadV2:
1178         switch (EltVT.getSimpleVT().SimpleTy) {
1179         default:
1180           return nullptr;
1181         case MVT::i8:
1182           Opcode = NVPTX::LDV_i8_v2_areg_64;
1183           break;
1184         case MVT::i16:
1185           Opcode = NVPTX::LDV_i16_v2_areg_64;
1186           break;
1187         case MVT::i32:
1188           Opcode = NVPTX::LDV_i32_v2_areg_64;
1189           break;
1190         case MVT::i64:
1191           Opcode = NVPTX::LDV_i64_v2_areg_64;
1192           break;
1193         case MVT::f32:
1194           Opcode = NVPTX::LDV_f32_v2_areg_64;
1195           break;
1196         case MVT::f64:
1197           Opcode = NVPTX::LDV_f64_v2_areg_64;
1198           break;
1199         }
1200         break;
1201       case NVPTXISD::LoadV4:
1202         switch (EltVT.getSimpleVT().SimpleTy) {
1203         default:
1204           return nullptr;
1205         case MVT::i8:
1206           Opcode = NVPTX::LDV_i8_v4_areg_64;
1207           break;
1208         case MVT::i16:
1209           Opcode = NVPTX::LDV_i16_v4_areg_64;
1210           break;
1211         case MVT::i32:
1212           Opcode = NVPTX::LDV_i32_v4_areg_64;
1213           break;
1214         case MVT::f32:
1215           Opcode = NVPTX::LDV_f32_v4_areg_64;
1216           break;
1217         }
1218         break;
1219       }
1220     } else {
1221       switch (N->getOpcode()) {
1222       default:
1223         return nullptr;
1224       case NVPTXISD::LoadV2:
1225         switch (EltVT.getSimpleVT().SimpleTy) {
1226         default:
1227           return nullptr;
1228         case MVT::i8:
1229           Opcode = NVPTX::LDV_i8_v2_areg;
1230           break;
1231         case MVT::i16:
1232           Opcode = NVPTX::LDV_i16_v2_areg;
1233           break;
1234         case MVT::i32:
1235           Opcode = NVPTX::LDV_i32_v2_areg;
1236           break;
1237         case MVT::i64:
1238           Opcode = NVPTX::LDV_i64_v2_areg;
1239           break;
1240         case MVT::f32:
1241           Opcode = NVPTX::LDV_f32_v2_areg;
1242           break;
1243         case MVT::f64:
1244           Opcode = NVPTX::LDV_f64_v2_areg;
1245           break;
1246         }
1247         break;
1248       case NVPTXISD::LoadV4:
1249         switch (EltVT.getSimpleVT().SimpleTy) {
1250         default:
1251           return nullptr;
1252         case MVT::i8:
1253           Opcode = NVPTX::LDV_i8_v4_areg;
1254           break;
1255         case MVT::i16:
1256           Opcode = NVPTX::LDV_i16_v4_areg;
1257           break;
1258         case MVT::i32:
1259           Opcode = NVPTX::LDV_i32_v4_areg;
1260           break;
1261         case MVT::f32:
1262           Opcode = NVPTX::LDV_f32_v4_areg;
1263           break;
1264         }
1265         break;
1266       }
1267     }
1268 
1269     SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1270                       getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1271                       getI32Imm(FromTypeWidth, DL), Op1, Chain };
1272     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1273   }
1274 
1275   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1276   MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1277   cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1278 
1279   return LD;
1280 }
1281 
SelectLDGLDU(SDNode * N)1282 SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
1283 
1284   SDValue Chain = N->getOperand(0);
1285   SDValue Op1;
1286   MemSDNode *Mem;
1287   bool IsLDG = true;
1288 
1289   // If this is an LDG intrinsic, the address is the third operand. Its its an
1290   // LDG/LDU SD node (from custom vector handling), then its the second operand
1291   if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
1292     Op1 = N->getOperand(2);
1293     Mem = cast<MemIntrinsicSDNode>(N);
1294     unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
1295     switch (IID) {
1296     default:
1297       return NULL;
1298     case Intrinsic::nvvm_ldg_global_f:
1299     case Intrinsic::nvvm_ldg_global_i:
1300     case Intrinsic::nvvm_ldg_global_p:
1301       IsLDG = true;
1302       break;
1303     case Intrinsic::nvvm_ldu_global_f:
1304     case Intrinsic::nvvm_ldu_global_i:
1305     case Intrinsic::nvvm_ldu_global_p:
1306       IsLDG = false;
1307       break;
1308     }
1309   } else {
1310     Op1 = N->getOperand(1);
1311     Mem = cast<MemSDNode>(N);
1312   }
1313 
1314   unsigned Opcode;
1315   SDLoc DL(N);
1316   SDNode *LD;
1317   SDValue Base, Offset, Addr;
1318 
1319   EVT EltVT = Mem->getMemoryVT();
1320   if (EltVT.isVector()) {
1321     EltVT = EltVT.getVectorElementType();
1322   }
1323 
1324   if (SelectDirectAddr(Op1, Addr)) {
1325     switch (N->getOpcode()) {
1326     default:
1327       return nullptr;
1328     case ISD::INTRINSIC_W_CHAIN:
1329       if (IsLDG) {
1330         switch (EltVT.getSimpleVT().SimpleTy) {
1331         default:
1332           return nullptr;
1333         case MVT::i8:
1334           Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8avar;
1335           break;
1336         case MVT::i16:
1337           Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16avar;
1338           break;
1339         case MVT::i32:
1340           Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32avar;
1341           break;
1342         case MVT::i64:
1343           Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64avar;
1344           break;
1345         case MVT::f32:
1346           Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32avar;
1347           break;
1348         case MVT::f64:
1349           Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64avar;
1350           break;
1351         }
1352       } else {
1353         switch (EltVT.getSimpleVT().SimpleTy) {
1354         default:
1355           return nullptr;
1356         case MVT::i8:
1357           Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8avar;
1358           break;
1359         case MVT::i16:
1360           Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16avar;
1361           break;
1362         case MVT::i32:
1363           Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32avar;
1364           break;
1365         case MVT::i64:
1366           Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64avar;
1367           break;
1368         case MVT::f32:
1369           Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32avar;
1370           break;
1371         case MVT::f64:
1372           Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64avar;
1373           break;
1374         }
1375       }
1376       break;
1377     case NVPTXISD::LDGV2:
1378       switch (EltVT.getSimpleVT().SimpleTy) {
1379       default:
1380         return nullptr;
1381       case MVT::i8:
1382         Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar;
1383         break;
1384       case MVT::i16:
1385         Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar;
1386         break;
1387       case MVT::i32:
1388         Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar;
1389         break;
1390       case MVT::i64:
1391         Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar;
1392         break;
1393       case MVT::f32:
1394         Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar;
1395         break;
1396       case MVT::f64:
1397         Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar;
1398         break;
1399       }
1400       break;
1401     case NVPTXISD::LDUV2:
1402       switch (EltVT.getSimpleVT().SimpleTy) {
1403       default:
1404         return nullptr;
1405       case MVT::i8:
1406         Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar;
1407         break;
1408       case MVT::i16:
1409         Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar;
1410         break;
1411       case MVT::i32:
1412         Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar;
1413         break;
1414       case MVT::i64:
1415         Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar;
1416         break;
1417       case MVT::f32:
1418         Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar;
1419         break;
1420       case MVT::f64:
1421         Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar;
1422         break;
1423       }
1424       break;
1425     case NVPTXISD::LDGV4:
1426       switch (EltVT.getSimpleVT().SimpleTy) {
1427       default:
1428         return nullptr;
1429       case MVT::i8:
1430         Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar;
1431         break;
1432       case MVT::i16:
1433         Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar;
1434         break;
1435       case MVT::i32:
1436         Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar;
1437         break;
1438       case MVT::f32:
1439         Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar;
1440         break;
1441       }
1442       break;
1443     case NVPTXISD::LDUV4:
1444       switch (EltVT.getSimpleVT().SimpleTy) {
1445       default:
1446         return nullptr;
1447       case MVT::i8:
1448         Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar;
1449         break;
1450       case MVT::i16:
1451         Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar;
1452         break;
1453       case MVT::i32:
1454         Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar;
1455         break;
1456       case MVT::f32:
1457         Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar;
1458         break;
1459       }
1460       break;
1461     }
1462 
1463     SDValue Ops[] = { Addr, Chain };
1464     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1465   } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1466                           : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1467     if (TM.is64Bit()) {
1468       switch (N->getOpcode()) {
1469       default:
1470         return nullptr;
1471       case ISD::LOAD:
1472       case ISD::INTRINSIC_W_CHAIN:
1473         if (IsLDG) {
1474           switch (EltVT.getSimpleVT().SimpleTy) {
1475           default:
1476             return nullptr;
1477           case MVT::i8:
1478             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari64;
1479             break;
1480           case MVT::i16:
1481             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari64;
1482             break;
1483           case MVT::i32:
1484             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari64;
1485             break;
1486           case MVT::i64:
1487             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari64;
1488             break;
1489           case MVT::f32:
1490             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari64;
1491             break;
1492           case MVT::f64:
1493             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari64;
1494             break;
1495           }
1496         } else {
1497           switch (EltVT.getSimpleVT().SimpleTy) {
1498           default:
1499             return nullptr;
1500           case MVT::i8:
1501             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari64;
1502             break;
1503           case MVT::i16:
1504             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari64;
1505             break;
1506           case MVT::i32:
1507             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari64;
1508             break;
1509           case MVT::i64:
1510             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari64;
1511             break;
1512           case MVT::f32:
1513             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari64;
1514             break;
1515           case MVT::f64:
1516             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari64;
1517             break;
1518           }
1519         }
1520         break;
1521       case NVPTXISD::LoadV2:
1522       case NVPTXISD::LDGV2:
1523         switch (EltVT.getSimpleVT().SimpleTy) {
1524         default:
1525           return nullptr;
1526         case MVT::i8:
1527           Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64;
1528           break;
1529         case MVT::i16:
1530           Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64;
1531           break;
1532         case MVT::i32:
1533           Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64;
1534           break;
1535         case MVT::i64:
1536           Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64;
1537           break;
1538         case MVT::f32:
1539           Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64;
1540           break;
1541         case MVT::f64:
1542           Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64;
1543           break;
1544         }
1545         break;
1546       case NVPTXISD::LDUV2:
1547         switch (EltVT.getSimpleVT().SimpleTy) {
1548         default:
1549           return nullptr;
1550         case MVT::i8:
1551           Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64;
1552           break;
1553         case MVT::i16:
1554           Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64;
1555           break;
1556         case MVT::i32:
1557           Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64;
1558           break;
1559         case MVT::i64:
1560           Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64;
1561           break;
1562         case MVT::f32:
1563           Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64;
1564           break;
1565         case MVT::f64:
1566           Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64;
1567           break;
1568         }
1569         break;
1570       case NVPTXISD::LoadV4:
1571       case NVPTXISD::LDGV4:
1572         switch (EltVT.getSimpleVT().SimpleTy) {
1573         default:
1574           return nullptr;
1575         case MVT::i8:
1576           Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64;
1577           break;
1578         case MVT::i16:
1579           Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64;
1580           break;
1581         case MVT::i32:
1582           Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64;
1583           break;
1584         case MVT::f32:
1585           Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64;
1586           break;
1587         }
1588         break;
1589       case NVPTXISD::LDUV4:
1590         switch (EltVT.getSimpleVT().SimpleTy) {
1591         default:
1592           return nullptr;
1593         case MVT::i8:
1594           Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64;
1595           break;
1596         case MVT::i16:
1597           Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64;
1598           break;
1599         case MVT::i32:
1600           Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64;
1601           break;
1602         case MVT::f32:
1603           Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64;
1604           break;
1605         }
1606         break;
1607       }
1608     } else {
1609       switch (N->getOpcode()) {
1610       default:
1611         return nullptr;
1612       case ISD::LOAD:
1613       case ISD::INTRINSIC_W_CHAIN:
1614         if (IsLDG) {
1615           switch (EltVT.getSimpleVT().SimpleTy) {
1616           default:
1617             return nullptr;
1618           case MVT::i8:
1619             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari;
1620             break;
1621           case MVT::i16:
1622             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari;
1623             break;
1624           case MVT::i32:
1625             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari;
1626             break;
1627           case MVT::i64:
1628             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari;
1629             break;
1630           case MVT::f32:
1631             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari;
1632             break;
1633           case MVT::f64:
1634             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari;
1635             break;
1636           }
1637         } else {
1638           switch (EltVT.getSimpleVT().SimpleTy) {
1639           default:
1640             return nullptr;
1641           case MVT::i8:
1642             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari;
1643             break;
1644           case MVT::i16:
1645             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari;
1646             break;
1647           case MVT::i32:
1648             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari;
1649             break;
1650           case MVT::i64:
1651             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari;
1652             break;
1653           case MVT::f32:
1654             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari;
1655             break;
1656           case MVT::f64:
1657             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari;
1658             break;
1659           }
1660         }
1661         break;
1662       case NVPTXISD::LoadV2:
1663       case NVPTXISD::LDGV2:
1664         switch (EltVT.getSimpleVT().SimpleTy) {
1665         default:
1666           return nullptr;
1667         case MVT::i8:
1668           Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32;
1669           break;
1670         case MVT::i16:
1671           Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32;
1672           break;
1673         case MVT::i32:
1674           Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32;
1675           break;
1676         case MVT::i64:
1677           Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32;
1678           break;
1679         case MVT::f32:
1680           Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32;
1681           break;
1682         case MVT::f64:
1683           Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32;
1684           break;
1685         }
1686         break;
1687       case NVPTXISD::LDUV2:
1688         switch (EltVT.getSimpleVT().SimpleTy) {
1689         default:
1690           return nullptr;
1691         case MVT::i8:
1692           Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32;
1693           break;
1694         case MVT::i16:
1695           Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32;
1696           break;
1697         case MVT::i32:
1698           Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32;
1699           break;
1700         case MVT::i64:
1701           Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32;
1702           break;
1703         case MVT::f32:
1704           Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32;
1705           break;
1706         case MVT::f64:
1707           Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32;
1708           break;
1709         }
1710         break;
1711       case NVPTXISD::LoadV4:
1712       case NVPTXISD::LDGV4:
1713         switch (EltVT.getSimpleVT().SimpleTy) {
1714         default:
1715           return nullptr;
1716         case MVT::i8:
1717           Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32;
1718           break;
1719         case MVT::i16:
1720           Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32;
1721           break;
1722         case MVT::i32:
1723           Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32;
1724           break;
1725         case MVT::f32:
1726           Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32;
1727           break;
1728         }
1729         break;
1730       case NVPTXISD::LDUV4:
1731         switch (EltVT.getSimpleVT().SimpleTy) {
1732         default:
1733           return nullptr;
1734         case MVT::i8:
1735           Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32;
1736           break;
1737         case MVT::i16:
1738           Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32;
1739           break;
1740         case MVT::i32:
1741           Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32;
1742           break;
1743         case MVT::f32:
1744           Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32;
1745           break;
1746         }
1747         break;
1748       }
1749     }
1750 
1751     SDValue Ops[] = { Base, Offset, Chain };
1752 
1753     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1754   } else {
1755     if (TM.is64Bit()) {
1756       switch (N->getOpcode()) {
1757       default:
1758         return nullptr;
1759       case ISD::LOAD:
1760       case ISD::INTRINSIC_W_CHAIN:
1761         if (IsLDG) {
1762           switch (EltVT.getSimpleVT().SimpleTy) {
1763           default:
1764             return nullptr;
1765           case MVT::i8:
1766             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg64;
1767             break;
1768           case MVT::i16:
1769             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg64;
1770             break;
1771           case MVT::i32:
1772             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg64;
1773             break;
1774           case MVT::i64:
1775             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg64;
1776             break;
1777           case MVT::f32:
1778             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg64;
1779             break;
1780           case MVT::f64:
1781             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg64;
1782             break;
1783           }
1784         } else {
1785           switch (EltVT.getSimpleVT().SimpleTy) {
1786           default:
1787             return nullptr;
1788           case MVT::i8:
1789             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg64;
1790             break;
1791           case MVT::i16:
1792             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg64;
1793             break;
1794           case MVT::i32:
1795             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg64;
1796             break;
1797           case MVT::i64:
1798             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg64;
1799             break;
1800           case MVT::f32:
1801             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg64;
1802             break;
1803           case MVT::f64:
1804             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg64;
1805             break;
1806           }
1807         }
1808         break;
1809       case NVPTXISD::LoadV2:
1810       case NVPTXISD::LDGV2:
1811         switch (EltVT.getSimpleVT().SimpleTy) {
1812         default:
1813           return nullptr;
1814         case MVT::i8:
1815           Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64;
1816           break;
1817         case MVT::i16:
1818           Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64;
1819           break;
1820         case MVT::i32:
1821           Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64;
1822           break;
1823         case MVT::i64:
1824           Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64;
1825           break;
1826         case MVT::f32:
1827           Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64;
1828           break;
1829         case MVT::f64:
1830           Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64;
1831           break;
1832         }
1833         break;
1834       case NVPTXISD::LDUV2:
1835         switch (EltVT.getSimpleVT().SimpleTy) {
1836         default:
1837           return nullptr;
1838         case MVT::i8:
1839           Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64;
1840           break;
1841         case MVT::i16:
1842           Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64;
1843           break;
1844         case MVT::i32:
1845           Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64;
1846           break;
1847         case MVT::i64:
1848           Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64;
1849           break;
1850         case MVT::f32:
1851           Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64;
1852           break;
1853         case MVT::f64:
1854           Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64;
1855           break;
1856         }
1857         break;
1858       case NVPTXISD::LoadV4:
1859       case NVPTXISD::LDGV4:
1860         switch (EltVT.getSimpleVT().SimpleTy) {
1861         default:
1862           return nullptr;
1863         case MVT::i8:
1864           Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64;
1865           break;
1866         case MVT::i16:
1867           Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64;
1868           break;
1869         case MVT::i32:
1870           Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64;
1871           break;
1872         case MVT::f32:
1873           Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64;
1874           break;
1875         }
1876         break;
1877       case NVPTXISD::LDUV4:
1878         switch (EltVT.getSimpleVT().SimpleTy) {
1879         default:
1880           return nullptr;
1881         case MVT::i8:
1882           Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64;
1883           break;
1884         case MVT::i16:
1885           Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64;
1886           break;
1887         case MVT::i32:
1888           Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64;
1889           break;
1890         case MVT::f32:
1891           Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64;
1892           break;
1893         }
1894         break;
1895       }
1896     } else {
1897       switch (N->getOpcode()) {
1898       default:
1899         return nullptr;
1900       case ISD::LOAD:
1901       case ISD::INTRINSIC_W_CHAIN:
1902         if (IsLDG) {
1903           switch (EltVT.getSimpleVT().SimpleTy) {
1904           default:
1905             return nullptr;
1906           case MVT::i8:
1907             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg;
1908             break;
1909           case MVT::i16:
1910             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg;
1911             break;
1912           case MVT::i32:
1913             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg;
1914             break;
1915           case MVT::i64:
1916             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg;
1917             break;
1918           case MVT::f32:
1919             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg;
1920             break;
1921           case MVT::f64:
1922             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg;
1923             break;
1924           }
1925         } else {
1926           switch (EltVT.getSimpleVT().SimpleTy) {
1927           default:
1928             return nullptr;
1929           case MVT::i8:
1930             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg;
1931             break;
1932           case MVT::i16:
1933             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg;
1934             break;
1935           case MVT::i32:
1936             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg;
1937             break;
1938           case MVT::i64:
1939             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg;
1940             break;
1941           case MVT::f32:
1942             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg;
1943             break;
1944           case MVT::f64:
1945             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg;
1946             break;
1947           }
1948         }
1949         break;
1950       case NVPTXISD::LoadV2:
1951       case NVPTXISD::LDGV2:
1952         switch (EltVT.getSimpleVT().SimpleTy) {
1953         default:
1954           return nullptr;
1955         case MVT::i8:
1956           Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32;
1957           break;
1958         case MVT::i16:
1959           Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32;
1960           break;
1961         case MVT::i32:
1962           Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32;
1963           break;
1964         case MVT::i64:
1965           Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32;
1966           break;
1967         case MVT::f32:
1968           Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32;
1969           break;
1970         case MVT::f64:
1971           Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32;
1972           break;
1973         }
1974         break;
1975       case NVPTXISD::LDUV2:
1976         switch (EltVT.getSimpleVT().SimpleTy) {
1977         default:
1978           return nullptr;
1979         case MVT::i8:
1980           Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32;
1981           break;
1982         case MVT::i16:
1983           Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32;
1984           break;
1985         case MVT::i32:
1986           Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32;
1987           break;
1988         case MVT::i64:
1989           Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32;
1990           break;
1991         case MVT::f32:
1992           Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32;
1993           break;
1994         case MVT::f64:
1995           Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32;
1996           break;
1997         }
1998         break;
1999       case NVPTXISD::LoadV4:
2000       case NVPTXISD::LDGV4:
2001         switch (EltVT.getSimpleVT().SimpleTy) {
2002         default:
2003           return nullptr;
2004         case MVT::i8:
2005           Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32;
2006           break;
2007         case MVT::i16:
2008           Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32;
2009           break;
2010         case MVT::i32:
2011           Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32;
2012           break;
2013         case MVT::f32:
2014           Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32;
2015           break;
2016         }
2017         break;
2018       case NVPTXISD::LDUV4:
2019         switch (EltVT.getSimpleVT().SimpleTy) {
2020         default:
2021           return nullptr;
2022         case MVT::i8:
2023           Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32;
2024           break;
2025         case MVT::i16:
2026           Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32;
2027           break;
2028         case MVT::i32:
2029           Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32;
2030           break;
2031         case MVT::f32:
2032           Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32;
2033           break;
2034         }
2035         break;
2036       }
2037     }
2038 
2039     SDValue Ops[] = { Op1, Chain };
2040     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
2041   }
2042 
2043   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2044   MemRefs0[0] = Mem->getMemOperand();
2045   cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
2046 
2047   return LD;
2048 }
2049 
SelectStore(SDNode * N)2050 SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
2051   SDLoc dl(N);
2052   StoreSDNode *ST = cast<StoreSDNode>(N);
2053   EVT StoreVT = ST->getMemoryVT();
2054   SDNode *NVPTXST = nullptr;
2055 
2056   // do not support pre/post inc/dec
2057   if (ST->isIndexed())
2058     return nullptr;
2059 
2060   if (!StoreVT.isSimple())
2061     return nullptr;
2062 
2063   // Address Space Setting
2064   unsigned int codeAddrSpace = getCodeAddrSpace(ST);
2065 
2066   // Volatile Setting
2067   // - .volatile is only availalble for .global and .shared
2068   bool isVolatile = ST->isVolatile();
2069   if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
2070       codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
2071       codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
2072     isVolatile = false;
2073 
2074   // Vector Setting
2075   MVT SimpleVT = StoreVT.getSimpleVT();
2076   unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
2077   if (SimpleVT.isVector()) {
2078     unsigned num = SimpleVT.getVectorNumElements();
2079     if (num == 2)
2080       vecType = NVPTX::PTXLdStInstCode::V2;
2081     else if (num == 4)
2082       vecType = NVPTX::PTXLdStInstCode::V4;
2083     else
2084       return nullptr;
2085   }
2086 
2087   // Type Setting: toType + toTypeWidth
2088   // - for integer type, always use 'u'
2089   //
2090   MVT ScalarVT = SimpleVT.getScalarType();
2091   unsigned toTypeWidth = ScalarVT.getSizeInBits();
2092   unsigned int toType;
2093   if (ScalarVT.isFloatingPoint())
2094     toType = NVPTX::PTXLdStInstCode::Float;
2095   else
2096     toType = NVPTX::PTXLdStInstCode::Unsigned;
2097 
2098   // Create the machine instruction DAG
2099   SDValue Chain = N->getOperand(0);
2100   SDValue N1 = N->getOperand(1);
2101   SDValue N2 = N->getOperand(2);
2102   SDValue Addr;
2103   SDValue Offset, Base;
2104   unsigned Opcode;
2105   MVT::SimpleValueType SourceVT = N1.getNode()->getSimpleValueType(0).SimpleTy;
2106 
2107   if (SelectDirectAddr(N2, Addr)) {
2108     switch (SourceVT) {
2109     case MVT::i8:
2110       Opcode = NVPTX::ST_i8_avar;
2111       break;
2112     case MVT::i16:
2113       Opcode = NVPTX::ST_i16_avar;
2114       break;
2115     case MVT::i32:
2116       Opcode = NVPTX::ST_i32_avar;
2117       break;
2118     case MVT::i64:
2119       Opcode = NVPTX::ST_i64_avar;
2120       break;
2121     case MVT::f32:
2122       Opcode = NVPTX::ST_f32_avar;
2123       break;
2124     case MVT::f64:
2125       Opcode = NVPTX::ST_f64_avar;
2126       break;
2127     default:
2128       return nullptr;
2129     }
2130     SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2131                       getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2132                       getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Addr,
2133                       Chain };
2134     NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2135   } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
2136                           : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
2137     switch (SourceVT) {
2138     case MVT::i8:
2139       Opcode = NVPTX::ST_i8_asi;
2140       break;
2141     case MVT::i16:
2142       Opcode = NVPTX::ST_i16_asi;
2143       break;
2144     case MVT::i32:
2145       Opcode = NVPTX::ST_i32_asi;
2146       break;
2147     case MVT::i64:
2148       Opcode = NVPTX::ST_i64_asi;
2149       break;
2150     case MVT::f32:
2151       Opcode = NVPTX::ST_f32_asi;
2152       break;
2153     case MVT::f64:
2154       Opcode = NVPTX::ST_f64_asi;
2155       break;
2156     default:
2157       return nullptr;
2158     }
2159     SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2160                       getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2161                       getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
2162                       Offset, Chain };
2163     NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2164   } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
2165                           : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
2166     if (TM.is64Bit()) {
2167       switch (SourceVT) {
2168       case MVT::i8:
2169         Opcode = NVPTX::ST_i8_ari_64;
2170         break;
2171       case MVT::i16:
2172         Opcode = NVPTX::ST_i16_ari_64;
2173         break;
2174       case MVT::i32:
2175         Opcode = NVPTX::ST_i32_ari_64;
2176         break;
2177       case MVT::i64:
2178         Opcode = NVPTX::ST_i64_ari_64;
2179         break;
2180       case MVT::f32:
2181         Opcode = NVPTX::ST_f32_ari_64;
2182         break;
2183       case MVT::f64:
2184         Opcode = NVPTX::ST_f64_ari_64;
2185         break;
2186       default:
2187         return nullptr;
2188       }
2189     } else {
2190       switch (SourceVT) {
2191       case MVT::i8:
2192         Opcode = NVPTX::ST_i8_ari;
2193         break;
2194       case MVT::i16:
2195         Opcode = NVPTX::ST_i16_ari;
2196         break;
2197       case MVT::i32:
2198         Opcode = NVPTX::ST_i32_ari;
2199         break;
2200       case MVT::i64:
2201         Opcode = NVPTX::ST_i64_ari;
2202         break;
2203       case MVT::f32:
2204         Opcode = NVPTX::ST_f32_ari;
2205         break;
2206       case MVT::f64:
2207         Opcode = NVPTX::ST_f64_ari;
2208         break;
2209       default:
2210         return nullptr;
2211       }
2212     }
2213     SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2214                       getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2215                       getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
2216                       Offset, Chain };
2217     NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2218   } else {
2219     if (TM.is64Bit()) {
2220       switch (SourceVT) {
2221       case MVT::i8:
2222         Opcode = NVPTX::ST_i8_areg_64;
2223         break;
2224       case MVT::i16:
2225         Opcode = NVPTX::ST_i16_areg_64;
2226         break;
2227       case MVT::i32:
2228         Opcode = NVPTX::ST_i32_areg_64;
2229         break;
2230       case MVT::i64:
2231         Opcode = NVPTX::ST_i64_areg_64;
2232         break;
2233       case MVT::f32:
2234         Opcode = NVPTX::ST_f32_areg_64;
2235         break;
2236       case MVT::f64:
2237         Opcode = NVPTX::ST_f64_areg_64;
2238         break;
2239       default:
2240         return nullptr;
2241       }
2242     } else {
2243       switch (SourceVT) {
2244       case MVT::i8:
2245         Opcode = NVPTX::ST_i8_areg;
2246         break;
2247       case MVT::i16:
2248         Opcode = NVPTX::ST_i16_areg;
2249         break;
2250       case MVT::i32:
2251         Opcode = NVPTX::ST_i32_areg;
2252         break;
2253       case MVT::i64:
2254         Opcode = NVPTX::ST_i64_areg;
2255         break;
2256       case MVT::f32:
2257         Opcode = NVPTX::ST_f32_areg;
2258         break;
2259       case MVT::f64:
2260         Opcode = NVPTX::ST_f64_areg;
2261         break;
2262       default:
2263         return nullptr;
2264       }
2265     }
2266     SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2267                       getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2268                       getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), N2,
2269                       Chain };
2270     NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2271   }
2272 
2273   if (NVPTXST) {
2274     MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2275     MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2276     cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2277   }
2278 
2279   return NVPTXST;
2280 }
2281 
SelectStoreVector(SDNode * N)2282 SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
2283   SDValue Chain = N->getOperand(0);
2284   SDValue Op1 = N->getOperand(1);
2285   SDValue Addr, Offset, Base;
2286   unsigned Opcode;
2287   SDLoc DL(N);
2288   SDNode *ST;
2289   EVT EltVT = Op1.getValueType();
2290   MemSDNode *MemSD = cast<MemSDNode>(N);
2291   EVT StoreVT = MemSD->getMemoryVT();
2292 
2293   // Address Space Setting
2294   unsigned CodeAddrSpace = getCodeAddrSpace(MemSD);
2295 
2296   if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
2297     report_fatal_error("Cannot store to pointer that points to constant "
2298                        "memory space");
2299   }
2300 
2301   // Volatile Setting
2302   // - .volatile is only availalble for .global and .shared
2303   bool IsVolatile = MemSD->isVolatile();
2304   if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
2305       CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
2306       CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
2307     IsVolatile = false;
2308 
2309   // Type Setting: toType + toTypeWidth
2310   // - for integer type, always use 'u'
2311   assert(StoreVT.isSimple() && "Store value is not simple");
2312   MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
2313   unsigned ToTypeWidth = ScalarVT.getSizeInBits();
2314   unsigned ToType;
2315   if (ScalarVT.isFloatingPoint())
2316     ToType = NVPTX::PTXLdStInstCode::Float;
2317   else
2318     ToType = NVPTX::PTXLdStInstCode::Unsigned;
2319 
2320   SmallVector<SDValue, 12> StOps;
2321   SDValue N2;
2322   unsigned VecType;
2323 
2324   switch (N->getOpcode()) {
2325   case NVPTXISD::StoreV2:
2326     VecType = NVPTX::PTXLdStInstCode::V2;
2327     StOps.push_back(N->getOperand(1));
2328     StOps.push_back(N->getOperand(2));
2329     N2 = N->getOperand(3);
2330     break;
2331   case NVPTXISD::StoreV4:
2332     VecType = NVPTX::PTXLdStInstCode::V4;
2333     StOps.push_back(N->getOperand(1));
2334     StOps.push_back(N->getOperand(2));
2335     StOps.push_back(N->getOperand(3));
2336     StOps.push_back(N->getOperand(4));
2337     N2 = N->getOperand(5);
2338     break;
2339   default:
2340     return nullptr;
2341   }
2342 
2343   StOps.push_back(getI32Imm(IsVolatile, DL));
2344   StOps.push_back(getI32Imm(CodeAddrSpace, DL));
2345   StOps.push_back(getI32Imm(VecType, DL));
2346   StOps.push_back(getI32Imm(ToType, DL));
2347   StOps.push_back(getI32Imm(ToTypeWidth, DL));
2348 
2349   if (SelectDirectAddr(N2, Addr)) {
2350     switch (N->getOpcode()) {
2351     default:
2352       return nullptr;
2353     case NVPTXISD::StoreV2:
2354       switch (EltVT.getSimpleVT().SimpleTy) {
2355       default:
2356         return nullptr;
2357       case MVT::i8:
2358         Opcode = NVPTX::STV_i8_v2_avar;
2359         break;
2360       case MVT::i16:
2361         Opcode = NVPTX::STV_i16_v2_avar;
2362         break;
2363       case MVT::i32:
2364         Opcode = NVPTX::STV_i32_v2_avar;
2365         break;
2366       case MVT::i64:
2367         Opcode = NVPTX::STV_i64_v2_avar;
2368         break;
2369       case MVT::f32:
2370         Opcode = NVPTX::STV_f32_v2_avar;
2371         break;
2372       case MVT::f64:
2373         Opcode = NVPTX::STV_f64_v2_avar;
2374         break;
2375       }
2376       break;
2377     case NVPTXISD::StoreV4:
2378       switch (EltVT.getSimpleVT().SimpleTy) {
2379       default:
2380         return nullptr;
2381       case MVT::i8:
2382         Opcode = NVPTX::STV_i8_v4_avar;
2383         break;
2384       case MVT::i16:
2385         Opcode = NVPTX::STV_i16_v4_avar;
2386         break;
2387       case MVT::i32:
2388         Opcode = NVPTX::STV_i32_v4_avar;
2389         break;
2390       case MVT::f32:
2391         Opcode = NVPTX::STV_f32_v4_avar;
2392         break;
2393       }
2394       break;
2395     }
2396     StOps.push_back(Addr);
2397   } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
2398                           : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
2399     switch (N->getOpcode()) {
2400     default:
2401       return nullptr;
2402     case NVPTXISD::StoreV2:
2403       switch (EltVT.getSimpleVT().SimpleTy) {
2404       default:
2405         return nullptr;
2406       case MVT::i8:
2407         Opcode = NVPTX::STV_i8_v2_asi;
2408         break;
2409       case MVT::i16:
2410         Opcode = NVPTX::STV_i16_v2_asi;
2411         break;
2412       case MVT::i32:
2413         Opcode = NVPTX::STV_i32_v2_asi;
2414         break;
2415       case MVT::i64:
2416         Opcode = NVPTX::STV_i64_v2_asi;
2417         break;
2418       case MVT::f32:
2419         Opcode = NVPTX::STV_f32_v2_asi;
2420         break;
2421       case MVT::f64:
2422         Opcode = NVPTX::STV_f64_v2_asi;
2423         break;
2424       }
2425       break;
2426     case NVPTXISD::StoreV4:
2427       switch (EltVT.getSimpleVT().SimpleTy) {
2428       default:
2429         return nullptr;
2430       case MVT::i8:
2431         Opcode = NVPTX::STV_i8_v4_asi;
2432         break;
2433       case MVT::i16:
2434         Opcode = NVPTX::STV_i16_v4_asi;
2435         break;
2436       case MVT::i32:
2437         Opcode = NVPTX::STV_i32_v4_asi;
2438         break;
2439       case MVT::f32:
2440         Opcode = NVPTX::STV_f32_v4_asi;
2441         break;
2442       }
2443       break;
2444     }
2445     StOps.push_back(Base);
2446     StOps.push_back(Offset);
2447   } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
2448                           : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
2449     if (TM.is64Bit()) {
2450       switch (N->getOpcode()) {
2451       default:
2452         return nullptr;
2453       case NVPTXISD::StoreV2:
2454         switch (EltVT.getSimpleVT().SimpleTy) {
2455         default:
2456           return nullptr;
2457         case MVT::i8:
2458           Opcode = NVPTX::STV_i8_v2_ari_64;
2459           break;
2460         case MVT::i16:
2461           Opcode = NVPTX::STV_i16_v2_ari_64;
2462           break;
2463         case MVT::i32:
2464           Opcode = NVPTX::STV_i32_v2_ari_64;
2465           break;
2466         case MVT::i64:
2467           Opcode = NVPTX::STV_i64_v2_ari_64;
2468           break;
2469         case MVT::f32:
2470           Opcode = NVPTX::STV_f32_v2_ari_64;
2471           break;
2472         case MVT::f64:
2473           Opcode = NVPTX::STV_f64_v2_ari_64;
2474           break;
2475         }
2476         break;
2477       case NVPTXISD::StoreV4:
2478         switch (EltVT.getSimpleVT().SimpleTy) {
2479         default:
2480           return nullptr;
2481         case MVT::i8:
2482           Opcode = NVPTX::STV_i8_v4_ari_64;
2483           break;
2484         case MVT::i16:
2485           Opcode = NVPTX::STV_i16_v4_ari_64;
2486           break;
2487         case MVT::i32:
2488           Opcode = NVPTX::STV_i32_v4_ari_64;
2489           break;
2490         case MVT::f32:
2491           Opcode = NVPTX::STV_f32_v4_ari_64;
2492           break;
2493         }
2494         break;
2495       }
2496     } else {
2497       switch (N->getOpcode()) {
2498       default:
2499         return nullptr;
2500       case NVPTXISD::StoreV2:
2501         switch (EltVT.getSimpleVT().SimpleTy) {
2502         default:
2503           return nullptr;
2504         case MVT::i8:
2505           Opcode = NVPTX::STV_i8_v2_ari;
2506           break;
2507         case MVT::i16:
2508           Opcode = NVPTX::STV_i16_v2_ari;
2509           break;
2510         case MVT::i32:
2511           Opcode = NVPTX::STV_i32_v2_ari;
2512           break;
2513         case MVT::i64:
2514           Opcode = NVPTX::STV_i64_v2_ari;
2515           break;
2516         case MVT::f32:
2517           Opcode = NVPTX::STV_f32_v2_ari;
2518           break;
2519         case MVT::f64:
2520           Opcode = NVPTX::STV_f64_v2_ari;
2521           break;
2522         }
2523         break;
2524       case NVPTXISD::StoreV4:
2525         switch (EltVT.getSimpleVT().SimpleTy) {
2526         default:
2527           return nullptr;
2528         case MVT::i8:
2529           Opcode = NVPTX::STV_i8_v4_ari;
2530           break;
2531         case MVT::i16:
2532           Opcode = NVPTX::STV_i16_v4_ari;
2533           break;
2534         case MVT::i32:
2535           Opcode = NVPTX::STV_i32_v4_ari;
2536           break;
2537         case MVT::f32:
2538           Opcode = NVPTX::STV_f32_v4_ari;
2539           break;
2540         }
2541         break;
2542       }
2543     }
2544     StOps.push_back(Base);
2545     StOps.push_back(Offset);
2546   } else {
2547     if (TM.is64Bit()) {
2548       switch (N->getOpcode()) {
2549       default:
2550         return nullptr;
2551       case NVPTXISD::StoreV2:
2552         switch (EltVT.getSimpleVT().SimpleTy) {
2553         default:
2554           return nullptr;
2555         case MVT::i8:
2556           Opcode = NVPTX::STV_i8_v2_areg_64;
2557           break;
2558         case MVT::i16:
2559           Opcode = NVPTX::STV_i16_v2_areg_64;
2560           break;
2561         case MVT::i32:
2562           Opcode = NVPTX::STV_i32_v2_areg_64;
2563           break;
2564         case MVT::i64:
2565           Opcode = NVPTX::STV_i64_v2_areg_64;
2566           break;
2567         case MVT::f32:
2568           Opcode = NVPTX::STV_f32_v2_areg_64;
2569           break;
2570         case MVT::f64:
2571           Opcode = NVPTX::STV_f64_v2_areg_64;
2572           break;
2573         }
2574         break;
2575       case NVPTXISD::StoreV4:
2576         switch (EltVT.getSimpleVT().SimpleTy) {
2577         default:
2578           return nullptr;
2579         case MVT::i8:
2580           Opcode = NVPTX::STV_i8_v4_areg_64;
2581           break;
2582         case MVT::i16:
2583           Opcode = NVPTX::STV_i16_v4_areg_64;
2584           break;
2585         case MVT::i32:
2586           Opcode = NVPTX::STV_i32_v4_areg_64;
2587           break;
2588         case MVT::f32:
2589           Opcode = NVPTX::STV_f32_v4_areg_64;
2590           break;
2591         }
2592         break;
2593       }
2594     } else {
2595       switch (N->getOpcode()) {
2596       default:
2597         return nullptr;
2598       case NVPTXISD::StoreV2:
2599         switch (EltVT.getSimpleVT().SimpleTy) {
2600         default:
2601           return nullptr;
2602         case MVT::i8:
2603           Opcode = NVPTX::STV_i8_v2_areg;
2604           break;
2605         case MVT::i16:
2606           Opcode = NVPTX::STV_i16_v2_areg;
2607           break;
2608         case MVT::i32:
2609           Opcode = NVPTX::STV_i32_v2_areg;
2610           break;
2611         case MVT::i64:
2612           Opcode = NVPTX::STV_i64_v2_areg;
2613           break;
2614         case MVT::f32:
2615           Opcode = NVPTX::STV_f32_v2_areg;
2616           break;
2617         case MVT::f64:
2618           Opcode = NVPTX::STV_f64_v2_areg;
2619           break;
2620         }
2621         break;
2622       case NVPTXISD::StoreV4:
2623         switch (EltVT.getSimpleVT().SimpleTy) {
2624         default:
2625           return nullptr;
2626         case MVT::i8:
2627           Opcode = NVPTX::STV_i8_v4_areg;
2628           break;
2629         case MVT::i16:
2630           Opcode = NVPTX::STV_i16_v4_areg;
2631           break;
2632         case MVT::i32:
2633           Opcode = NVPTX::STV_i32_v4_areg;
2634           break;
2635         case MVT::f32:
2636           Opcode = NVPTX::STV_f32_v4_areg;
2637           break;
2638         }
2639         break;
2640       }
2641     }
2642     StOps.push_back(N2);
2643   }
2644 
2645   StOps.push_back(Chain);
2646 
2647   ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps);
2648 
2649   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2650   MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2651   cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2652 
2653   return ST;
2654 }
2655 
SelectLoadParam(SDNode * Node)2656 SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {
2657   SDValue Chain = Node->getOperand(0);
2658   SDValue Offset = Node->getOperand(2);
2659   SDValue Flag = Node->getOperand(3);
2660   SDLoc DL(Node);
2661   MemSDNode *Mem = cast<MemSDNode>(Node);
2662 
2663   unsigned VecSize;
2664   switch (Node->getOpcode()) {
2665   default:
2666     return nullptr;
2667   case NVPTXISD::LoadParam:
2668     VecSize = 1;
2669     break;
2670   case NVPTXISD::LoadParamV2:
2671     VecSize = 2;
2672     break;
2673   case NVPTXISD::LoadParamV4:
2674     VecSize = 4;
2675     break;
2676   }
2677 
2678   EVT EltVT = Node->getValueType(0);
2679   EVT MemVT = Mem->getMemoryVT();
2680 
2681   unsigned Opc = 0;
2682 
2683   switch (VecSize) {
2684   default:
2685     return nullptr;
2686   case 1:
2687     switch (MemVT.getSimpleVT().SimpleTy) {
2688     default:
2689       return nullptr;
2690     case MVT::i1:
2691       Opc = NVPTX::LoadParamMemI8;
2692       break;
2693     case MVT::i8:
2694       Opc = NVPTX::LoadParamMemI8;
2695       break;
2696     case MVT::i16:
2697       Opc = NVPTX::LoadParamMemI16;
2698       break;
2699     case MVT::i32:
2700       Opc = NVPTX::LoadParamMemI32;
2701       break;
2702     case MVT::i64:
2703       Opc = NVPTX::LoadParamMemI64;
2704       break;
2705     case MVT::f32:
2706       Opc = NVPTX::LoadParamMemF32;
2707       break;
2708     case MVT::f64:
2709       Opc = NVPTX::LoadParamMemF64;
2710       break;
2711     }
2712     break;
2713   case 2:
2714     switch (MemVT.getSimpleVT().SimpleTy) {
2715     default:
2716       return nullptr;
2717     case MVT::i1:
2718       Opc = NVPTX::LoadParamMemV2I8;
2719       break;
2720     case MVT::i8:
2721       Opc = NVPTX::LoadParamMemV2I8;
2722       break;
2723     case MVT::i16:
2724       Opc = NVPTX::LoadParamMemV2I16;
2725       break;
2726     case MVT::i32:
2727       Opc = NVPTX::LoadParamMemV2I32;
2728       break;
2729     case MVT::i64:
2730       Opc = NVPTX::LoadParamMemV2I64;
2731       break;
2732     case MVT::f32:
2733       Opc = NVPTX::LoadParamMemV2F32;
2734       break;
2735     case MVT::f64:
2736       Opc = NVPTX::LoadParamMemV2F64;
2737       break;
2738     }
2739     break;
2740   case 4:
2741     switch (MemVT.getSimpleVT().SimpleTy) {
2742     default:
2743       return nullptr;
2744     case MVT::i1:
2745       Opc = NVPTX::LoadParamMemV4I8;
2746       break;
2747     case MVT::i8:
2748       Opc = NVPTX::LoadParamMemV4I8;
2749       break;
2750     case MVT::i16:
2751       Opc = NVPTX::LoadParamMemV4I16;
2752       break;
2753     case MVT::i32:
2754       Opc = NVPTX::LoadParamMemV4I32;
2755       break;
2756     case MVT::f32:
2757       Opc = NVPTX::LoadParamMemV4F32;
2758       break;
2759     }
2760     break;
2761   }
2762 
2763   SDVTList VTs;
2764   if (VecSize == 1) {
2765     VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
2766   } else if (VecSize == 2) {
2767     VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
2768   } else {
2769     EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
2770     VTs = CurDAG->getVTList(EVTs);
2771   }
2772 
2773   unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2774 
2775   SmallVector<SDValue, 2> Ops;
2776   Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2777   Ops.push_back(Chain);
2778   Ops.push_back(Flag);
2779 
2780   return CurDAG->getMachineNode(Opc, DL, VTs, Ops);
2781 }
2782 
SelectStoreRetval(SDNode * N)2783 SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) {
2784   SDLoc DL(N);
2785   SDValue Chain = N->getOperand(0);
2786   SDValue Offset = N->getOperand(1);
2787   unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2788   MemSDNode *Mem = cast<MemSDNode>(N);
2789 
2790   // How many elements do we have?
2791   unsigned NumElts = 1;
2792   switch (N->getOpcode()) {
2793   default:
2794     return nullptr;
2795   case NVPTXISD::StoreRetval:
2796     NumElts = 1;
2797     break;
2798   case NVPTXISD::StoreRetvalV2:
2799     NumElts = 2;
2800     break;
2801   case NVPTXISD::StoreRetvalV4:
2802     NumElts = 4;
2803     break;
2804   }
2805 
2806   // Build vector of operands
2807   SmallVector<SDValue, 6> Ops;
2808   for (unsigned i = 0; i < NumElts; ++i)
2809     Ops.push_back(N->getOperand(i + 2));
2810   Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2811   Ops.push_back(Chain);
2812 
2813   // Determine target opcode
2814   // If we have an i1, use an 8-bit store. The lowering code in
2815   // NVPTXISelLowering will have already emitted an upcast.
2816   unsigned Opcode = 0;
2817   switch (NumElts) {
2818   default:
2819     return nullptr;
2820   case 1:
2821     switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2822     default:
2823       return nullptr;
2824     case MVT::i1:
2825       Opcode = NVPTX::StoreRetvalI8;
2826       break;
2827     case MVT::i8:
2828       Opcode = NVPTX::StoreRetvalI8;
2829       break;
2830     case MVT::i16:
2831       Opcode = NVPTX::StoreRetvalI16;
2832       break;
2833     case MVT::i32:
2834       Opcode = NVPTX::StoreRetvalI32;
2835       break;
2836     case MVT::i64:
2837       Opcode = NVPTX::StoreRetvalI64;
2838       break;
2839     case MVT::f32:
2840       Opcode = NVPTX::StoreRetvalF32;
2841       break;
2842     case MVT::f64:
2843       Opcode = NVPTX::StoreRetvalF64;
2844       break;
2845     }
2846     break;
2847   case 2:
2848     switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2849     default:
2850       return nullptr;
2851     case MVT::i1:
2852       Opcode = NVPTX::StoreRetvalV2I8;
2853       break;
2854     case MVT::i8:
2855       Opcode = NVPTX::StoreRetvalV2I8;
2856       break;
2857     case MVT::i16:
2858       Opcode = NVPTX::StoreRetvalV2I16;
2859       break;
2860     case MVT::i32:
2861       Opcode = NVPTX::StoreRetvalV2I32;
2862       break;
2863     case MVT::i64:
2864       Opcode = NVPTX::StoreRetvalV2I64;
2865       break;
2866     case MVT::f32:
2867       Opcode = NVPTX::StoreRetvalV2F32;
2868       break;
2869     case MVT::f64:
2870       Opcode = NVPTX::StoreRetvalV2F64;
2871       break;
2872     }
2873     break;
2874   case 4:
2875     switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2876     default:
2877       return nullptr;
2878     case MVT::i1:
2879       Opcode = NVPTX::StoreRetvalV4I8;
2880       break;
2881     case MVT::i8:
2882       Opcode = NVPTX::StoreRetvalV4I8;
2883       break;
2884     case MVT::i16:
2885       Opcode = NVPTX::StoreRetvalV4I16;
2886       break;
2887     case MVT::i32:
2888       Opcode = NVPTX::StoreRetvalV4I32;
2889       break;
2890     case MVT::f32:
2891       Opcode = NVPTX::StoreRetvalV4F32;
2892       break;
2893     }
2894     break;
2895   }
2896 
2897   SDNode *Ret =
2898       CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
2899   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2900   MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2901   cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2902 
2903   return Ret;
2904 }
2905 
SelectStoreParam(SDNode * N)2906 SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
2907   SDLoc DL(N);
2908   SDValue Chain = N->getOperand(0);
2909   SDValue Param = N->getOperand(1);
2910   unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
2911   SDValue Offset = N->getOperand(2);
2912   unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2913   MemSDNode *Mem = cast<MemSDNode>(N);
2914   SDValue Flag = N->getOperand(N->getNumOperands() - 1);
2915 
2916   // How many elements do we have?
2917   unsigned NumElts = 1;
2918   switch (N->getOpcode()) {
2919   default:
2920     return nullptr;
2921   case NVPTXISD::StoreParamU32:
2922   case NVPTXISD::StoreParamS32:
2923   case NVPTXISD::StoreParam:
2924     NumElts = 1;
2925     break;
2926   case NVPTXISD::StoreParamV2:
2927     NumElts = 2;
2928     break;
2929   case NVPTXISD::StoreParamV4:
2930     NumElts = 4;
2931     break;
2932   }
2933 
2934   // Build vector of operands
2935   SmallVector<SDValue, 8> Ops;
2936   for (unsigned i = 0; i < NumElts; ++i)
2937     Ops.push_back(N->getOperand(i + 3));
2938   Ops.push_back(CurDAG->getTargetConstant(ParamVal, DL, MVT::i32));
2939   Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2940   Ops.push_back(Chain);
2941   Ops.push_back(Flag);
2942 
2943   // Determine target opcode
2944   // If we have an i1, use an 8-bit store. The lowering code in
2945   // NVPTXISelLowering will have already emitted an upcast.
2946   unsigned Opcode = 0;
2947   switch (N->getOpcode()) {
2948   default:
2949     switch (NumElts) {
2950     default:
2951       return nullptr;
2952     case 1:
2953       switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2954       default:
2955         return nullptr;
2956       case MVT::i1:
2957         Opcode = NVPTX::StoreParamI8;
2958         break;
2959       case MVT::i8:
2960         Opcode = NVPTX::StoreParamI8;
2961         break;
2962       case MVT::i16:
2963         Opcode = NVPTX::StoreParamI16;
2964         break;
2965       case MVT::i32:
2966         Opcode = NVPTX::StoreParamI32;
2967         break;
2968       case MVT::i64:
2969         Opcode = NVPTX::StoreParamI64;
2970         break;
2971       case MVT::f32:
2972         Opcode = NVPTX::StoreParamF32;
2973         break;
2974       case MVT::f64:
2975         Opcode = NVPTX::StoreParamF64;
2976         break;
2977       }
2978       break;
2979     case 2:
2980       switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2981       default:
2982         return nullptr;
2983       case MVT::i1:
2984         Opcode = NVPTX::StoreParamV2I8;
2985         break;
2986       case MVT::i8:
2987         Opcode = NVPTX::StoreParamV2I8;
2988         break;
2989       case MVT::i16:
2990         Opcode = NVPTX::StoreParamV2I16;
2991         break;
2992       case MVT::i32:
2993         Opcode = NVPTX::StoreParamV2I32;
2994         break;
2995       case MVT::i64:
2996         Opcode = NVPTX::StoreParamV2I64;
2997         break;
2998       case MVT::f32:
2999         Opcode = NVPTX::StoreParamV2F32;
3000         break;
3001       case MVT::f64:
3002         Opcode = NVPTX::StoreParamV2F64;
3003         break;
3004       }
3005       break;
3006     case 4:
3007       switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
3008       default:
3009         return nullptr;
3010       case MVT::i1:
3011         Opcode = NVPTX::StoreParamV4I8;
3012         break;
3013       case MVT::i8:
3014         Opcode = NVPTX::StoreParamV4I8;
3015         break;
3016       case MVT::i16:
3017         Opcode = NVPTX::StoreParamV4I16;
3018         break;
3019       case MVT::i32:
3020         Opcode = NVPTX::StoreParamV4I32;
3021         break;
3022       case MVT::f32:
3023         Opcode = NVPTX::StoreParamV4F32;
3024         break;
3025       }
3026       break;
3027     }
3028     break;
3029   // Special case: if we have a sign-extend/zero-extend node, insert the
3030   // conversion instruction first, and use that as the value operand to
3031   // the selected StoreParam node.
3032   case NVPTXISD::StoreParamU32: {
3033     Opcode = NVPTX::StoreParamI32;
3034     SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
3035                                                 MVT::i32);
3036     SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
3037                                          MVT::i32, Ops[0], CvtNone);
3038     Ops[0] = SDValue(Cvt, 0);
3039     break;
3040   }
3041   case NVPTXISD::StoreParamS32: {
3042     Opcode = NVPTX::StoreParamI32;
3043     SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
3044                                                 MVT::i32);
3045     SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
3046                                          MVT::i32, Ops[0], CvtNone);
3047     Ops[0] = SDValue(Cvt, 0);
3048     break;
3049   }
3050   }
3051 
3052   SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
3053   SDNode *Ret =
3054       CurDAG->getMachineNode(Opcode, DL, RetVTs, Ops);
3055   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
3056   MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
3057   cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
3058 
3059   return Ret;
3060 }
3061 
SelectTextureIntrinsic(SDNode * N)3062 SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) {
3063   SDValue Chain = N->getOperand(0);
3064   SDNode *Ret = nullptr;
3065   unsigned Opc = 0;
3066   SmallVector<SDValue, 8> Ops;
3067 
3068   switch (N->getOpcode()) {
3069   default: return nullptr;
3070   case NVPTXISD::Tex1DFloatS32:
3071     Opc = NVPTX::TEX_1D_F32_S32;
3072     break;
3073   case NVPTXISD::Tex1DFloatFloat:
3074     Opc = NVPTX::TEX_1D_F32_F32;
3075     break;
3076   case NVPTXISD::Tex1DFloatFloatLevel:
3077     Opc = NVPTX::TEX_1D_F32_F32_LEVEL;
3078     break;
3079   case NVPTXISD::Tex1DFloatFloatGrad:
3080     Opc = NVPTX::TEX_1D_F32_F32_GRAD;
3081     break;
3082   case NVPTXISD::Tex1DS32S32:
3083     Opc = NVPTX::TEX_1D_S32_S32;
3084     break;
3085   case NVPTXISD::Tex1DS32Float:
3086     Opc = NVPTX::TEX_1D_S32_F32;
3087     break;
3088   case NVPTXISD::Tex1DS32FloatLevel:
3089     Opc = NVPTX::TEX_1D_S32_F32_LEVEL;
3090     break;
3091   case NVPTXISD::Tex1DS32FloatGrad:
3092     Opc = NVPTX::TEX_1D_S32_F32_GRAD;
3093     break;
3094   case NVPTXISD::Tex1DU32S32:
3095     Opc = NVPTX::TEX_1D_U32_S32;
3096     break;
3097   case NVPTXISD::Tex1DU32Float:
3098     Opc = NVPTX::TEX_1D_U32_F32;
3099     break;
3100   case NVPTXISD::Tex1DU32FloatLevel:
3101     Opc = NVPTX::TEX_1D_U32_F32_LEVEL;
3102     break;
3103   case NVPTXISD::Tex1DU32FloatGrad:
3104     Opc = NVPTX::TEX_1D_U32_F32_GRAD;
3105     break;
3106   case NVPTXISD::Tex1DArrayFloatS32:
3107     Opc = NVPTX::TEX_1D_ARRAY_F32_S32;
3108     break;
3109   case NVPTXISD::Tex1DArrayFloatFloat:
3110     Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
3111     break;
3112   case NVPTXISD::Tex1DArrayFloatFloatLevel:
3113     Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL;
3114     break;
3115   case NVPTXISD::Tex1DArrayFloatFloatGrad:
3116     Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
3117     break;
3118   case NVPTXISD::Tex1DArrayS32S32:
3119     Opc = NVPTX::TEX_1D_ARRAY_S32_S32;
3120     break;
3121   case NVPTXISD::Tex1DArrayS32Float:
3122     Opc = NVPTX::TEX_1D_ARRAY_S32_F32;
3123     break;
3124   case NVPTXISD::Tex1DArrayS32FloatLevel:
3125     Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL;
3126     break;
3127   case NVPTXISD::Tex1DArrayS32FloatGrad:
3128     Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD;
3129     break;
3130   case NVPTXISD::Tex1DArrayU32S32:
3131     Opc = NVPTX::TEX_1D_ARRAY_U32_S32;
3132     break;
3133   case NVPTXISD::Tex1DArrayU32Float:
3134     Opc = NVPTX::TEX_1D_ARRAY_U32_F32;
3135     break;
3136   case NVPTXISD::Tex1DArrayU32FloatLevel:
3137     Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL;
3138     break;
3139   case NVPTXISD::Tex1DArrayU32FloatGrad:
3140     Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD;
3141     break;
3142   case NVPTXISD::Tex2DFloatS32:
3143     Opc = NVPTX::TEX_2D_F32_S32;
3144     break;
3145   case NVPTXISD::Tex2DFloatFloat:
3146     Opc = NVPTX::TEX_2D_F32_F32;
3147     break;
3148   case NVPTXISD::Tex2DFloatFloatLevel:
3149     Opc = NVPTX::TEX_2D_F32_F32_LEVEL;
3150     break;
3151   case NVPTXISD::Tex2DFloatFloatGrad:
3152     Opc = NVPTX::TEX_2D_F32_F32_GRAD;
3153     break;
3154   case NVPTXISD::Tex2DS32S32:
3155     Opc = NVPTX::TEX_2D_S32_S32;
3156     break;
3157   case NVPTXISD::Tex2DS32Float:
3158     Opc = NVPTX::TEX_2D_S32_F32;
3159     break;
3160   case NVPTXISD::Tex2DS32FloatLevel:
3161     Opc = NVPTX::TEX_2D_S32_F32_LEVEL;
3162     break;
3163   case NVPTXISD::Tex2DS32FloatGrad:
3164     Opc = NVPTX::TEX_2D_S32_F32_GRAD;
3165     break;
3166   case NVPTXISD::Tex2DU32S32:
3167     Opc = NVPTX::TEX_2D_U32_S32;
3168     break;
3169   case NVPTXISD::Tex2DU32Float:
3170     Opc = NVPTX::TEX_2D_U32_F32;
3171     break;
3172   case NVPTXISD::Tex2DU32FloatLevel:
3173     Opc = NVPTX::TEX_2D_U32_F32_LEVEL;
3174     break;
3175   case NVPTXISD::Tex2DU32FloatGrad:
3176     Opc = NVPTX::TEX_2D_U32_F32_GRAD;
3177     break;
3178   case NVPTXISD::Tex2DArrayFloatS32:
3179     Opc = NVPTX::TEX_2D_ARRAY_F32_S32;
3180     break;
3181   case NVPTXISD::Tex2DArrayFloatFloat:
3182     Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
3183     break;
3184   case NVPTXISD::Tex2DArrayFloatFloatLevel:
3185     Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL;
3186     break;
3187   case NVPTXISD::Tex2DArrayFloatFloatGrad:
3188     Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
3189     break;
3190   case NVPTXISD::Tex2DArrayS32S32:
3191     Opc = NVPTX::TEX_2D_ARRAY_S32_S32;
3192     break;
3193   case NVPTXISD::Tex2DArrayS32Float:
3194     Opc = NVPTX::TEX_2D_ARRAY_S32_F32;
3195     break;
3196   case NVPTXISD::Tex2DArrayS32FloatLevel:
3197     Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL;
3198     break;
3199   case NVPTXISD::Tex2DArrayS32FloatGrad:
3200     Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD;
3201     break;
3202   case NVPTXISD::Tex2DArrayU32S32:
3203     Opc = NVPTX::TEX_2D_ARRAY_U32_S32;
3204     break;
3205   case NVPTXISD::Tex2DArrayU32Float:
3206     Opc = NVPTX::TEX_2D_ARRAY_U32_F32;
3207     break;
3208   case NVPTXISD::Tex2DArrayU32FloatLevel:
3209     Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL;
3210     break;
3211   case NVPTXISD::Tex2DArrayU32FloatGrad:
3212     Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD;
3213     break;
3214   case NVPTXISD::Tex3DFloatS32:
3215     Opc = NVPTX::TEX_3D_F32_S32;
3216     break;
3217   case NVPTXISD::Tex3DFloatFloat:
3218     Opc = NVPTX::TEX_3D_F32_F32;
3219     break;
3220   case NVPTXISD::Tex3DFloatFloatLevel:
3221     Opc = NVPTX::TEX_3D_F32_F32_LEVEL;
3222     break;
3223   case NVPTXISD::Tex3DFloatFloatGrad:
3224     Opc = NVPTX::TEX_3D_F32_F32_GRAD;
3225     break;
3226   case NVPTXISD::Tex3DS32S32:
3227     Opc = NVPTX::TEX_3D_S32_S32;
3228     break;
3229   case NVPTXISD::Tex3DS32Float:
3230     Opc = NVPTX::TEX_3D_S32_F32;
3231     break;
3232   case NVPTXISD::Tex3DS32FloatLevel:
3233     Opc = NVPTX::TEX_3D_S32_F32_LEVEL;
3234     break;
3235   case NVPTXISD::Tex3DS32FloatGrad:
3236     Opc = NVPTX::TEX_3D_S32_F32_GRAD;
3237     break;
3238   case NVPTXISD::Tex3DU32S32:
3239     Opc = NVPTX::TEX_3D_U32_S32;
3240     break;
3241   case NVPTXISD::Tex3DU32Float:
3242     Opc = NVPTX::TEX_3D_U32_F32;
3243     break;
3244   case NVPTXISD::Tex3DU32FloatLevel:
3245     Opc = NVPTX::TEX_3D_U32_F32_LEVEL;
3246     break;
3247   case NVPTXISD::Tex3DU32FloatGrad:
3248     Opc = NVPTX::TEX_3D_U32_F32_GRAD;
3249     break;
3250   case NVPTXISD::TexCubeFloatFloat:
3251     Opc = NVPTX::TEX_CUBE_F32_F32;
3252     break;
3253   case NVPTXISD::TexCubeFloatFloatLevel:
3254     Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL;
3255     break;
3256   case NVPTXISD::TexCubeS32Float:
3257     Opc = NVPTX::TEX_CUBE_S32_F32;
3258     break;
3259   case NVPTXISD::TexCubeS32FloatLevel:
3260     Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL;
3261     break;
3262   case NVPTXISD::TexCubeU32Float:
3263     Opc = NVPTX::TEX_CUBE_U32_F32;
3264     break;
3265   case NVPTXISD::TexCubeU32FloatLevel:
3266     Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL;
3267     break;
3268   case NVPTXISD::TexCubeArrayFloatFloat:
3269     Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32;
3270     break;
3271   case NVPTXISD::TexCubeArrayFloatFloatLevel:
3272     Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL;
3273     break;
3274   case NVPTXISD::TexCubeArrayS32Float:
3275     Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32;
3276     break;
3277   case NVPTXISD::TexCubeArrayS32FloatLevel:
3278     Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL;
3279     break;
3280   case NVPTXISD::TexCubeArrayU32Float:
3281     Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32;
3282     break;
3283   case NVPTXISD::TexCubeArrayU32FloatLevel:
3284     Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL;
3285     break;
3286   case NVPTXISD::Tld4R2DFloatFloat:
3287     Opc = NVPTX::TLD4_R_2D_F32_F32;
3288     break;
3289   case NVPTXISD::Tld4G2DFloatFloat:
3290     Opc = NVPTX::TLD4_G_2D_F32_F32;
3291     break;
3292   case NVPTXISD::Tld4B2DFloatFloat:
3293     Opc = NVPTX::TLD4_B_2D_F32_F32;
3294     break;
3295   case NVPTXISD::Tld4A2DFloatFloat:
3296     Opc = NVPTX::TLD4_A_2D_F32_F32;
3297     break;
3298   case NVPTXISD::Tld4R2DS64Float:
3299     Opc = NVPTX::TLD4_R_2D_S32_F32;
3300     break;
3301   case NVPTXISD::Tld4G2DS64Float:
3302     Opc = NVPTX::TLD4_G_2D_S32_F32;
3303     break;
3304   case NVPTXISD::Tld4B2DS64Float:
3305     Opc = NVPTX::TLD4_B_2D_S32_F32;
3306     break;
3307   case NVPTXISD::Tld4A2DS64Float:
3308     Opc = NVPTX::TLD4_A_2D_S32_F32;
3309     break;
3310   case NVPTXISD::Tld4R2DU64Float:
3311     Opc = NVPTX::TLD4_R_2D_U32_F32;
3312     break;
3313   case NVPTXISD::Tld4G2DU64Float:
3314     Opc = NVPTX::TLD4_G_2D_U32_F32;
3315     break;
3316   case NVPTXISD::Tld4B2DU64Float:
3317     Opc = NVPTX::TLD4_B_2D_U32_F32;
3318     break;
3319   case NVPTXISD::Tld4A2DU64Float:
3320     Opc = NVPTX::TLD4_A_2D_U32_F32;
3321     break;
3322   case NVPTXISD::TexUnified1DFloatS32:
3323     Opc = NVPTX::TEX_UNIFIED_1D_F32_S32;
3324     break;
3325   case NVPTXISD::TexUnified1DFloatFloat:
3326     Opc = NVPTX::TEX_UNIFIED_1D_F32_F32;
3327     break;
3328   case NVPTXISD::TexUnified1DFloatFloatLevel:
3329     Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL;
3330     break;
3331   case NVPTXISD::TexUnified1DFloatFloatGrad:
3332     Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD;
3333     break;
3334   case NVPTXISD::TexUnified1DS32S32:
3335     Opc = NVPTX::TEX_UNIFIED_1D_S32_S32;
3336     break;
3337   case NVPTXISD::TexUnified1DS32Float:
3338     Opc = NVPTX::TEX_UNIFIED_1D_S32_F32;
3339     break;
3340   case NVPTXISD::TexUnified1DS32FloatLevel:
3341     Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL;
3342     break;
3343   case NVPTXISD::TexUnified1DS32FloatGrad:
3344     Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD;
3345     break;
3346   case NVPTXISD::TexUnified1DU32S32:
3347     Opc = NVPTX::TEX_UNIFIED_1D_U32_S32;
3348     break;
3349   case NVPTXISD::TexUnified1DU32Float:
3350     Opc = NVPTX::TEX_UNIFIED_1D_U32_F32;
3351     break;
3352   case NVPTXISD::TexUnified1DU32FloatLevel:
3353     Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL;
3354     break;
3355   case NVPTXISD::TexUnified1DU32FloatGrad:
3356     Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD;
3357     break;
3358   case NVPTXISD::TexUnified1DArrayFloatS32:
3359     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32;
3360     break;
3361   case NVPTXISD::TexUnified1DArrayFloatFloat:
3362     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32;
3363     break;
3364   case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
3365     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL;
3366     break;
3367   case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
3368     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD;
3369     break;
3370   case NVPTXISD::TexUnified1DArrayS32S32:
3371     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32;
3372     break;
3373   case NVPTXISD::TexUnified1DArrayS32Float:
3374     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32;
3375     break;
3376   case NVPTXISD::TexUnified1DArrayS32FloatLevel:
3377     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL;
3378     break;
3379   case NVPTXISD::TexUnified1DArrayS32FloatGrad:
3380     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD;
3381     break;
3382   case NVPTXISD::TexUnified1DArrayU32S32:
3383     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32;
3384     break;
3385   case NVPTXISD::TexUnified1DArrayU32Float:
3386     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32;
3387     break;
3388   case NVPTXISD::TexUnified1DArrayU32FloatLevel:
3389     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL;
3390     break;
3391   case NVPTXISD::TexUnified1DArrayU32FloatGrad:
3392     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD;
3393     break;
3394   case NVPTXISD::TexUnified2DFloatS32:
3395     Opc = NVPTX::TEX_UNIFIED_2D_F32_S32;
3396     break;
3397   case NVPTXISD::TexUnified2DFloatFloat:
3398     Opc = NVPTX::TEX_UNIFIED_2D_F32_F32;
3399     break;
3400   case NVPTXISD::TexUnified2DFloatFloatLevel:
3401     Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL;
3402     break;
3403   case NVPTXISD::TexUnified2DFloatFloatGrad:
3404     Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD;
3405     break;
3406   case NVPTXISD::TexUnified2DS32S32:
3407     Opc = NVPTX::TEX_UNIFIED_2D_S32_S32;
3408     break;
3409   case NVPTXISD::TexUnified2DS32Float:
3410     Opc = NVPTX::TEX_UNIFIED_2D_S32_F32;
3411     break;
3412   case NVPTXISD::TexUnified2DS32FloatLevel:
3413     Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL;
3414     break;
3415   case NVPTXISD::TexUnified2DS32FloatGrad:
3416     Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD;
3417     break;
3418   case NVPTXISD::TexUnified2DU32S32:
3419     Opc = NVPTX::TEX_UNIFIED_2D_U32_S32;
3420     break;
3421   case NVPTXISD::TexUnified2DU32Float:
3422     Opc = NVPTX::TEX_UNIFIED_2D_U32_F32;
3423     break;
3424   case NVPTXISD::TexUnified2DU32FloatLevel:
3425     Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL;
3426     break;
3427   case NVPTXISD::TexUnified2DU32FloatGrad:
3428     Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD;
3429     break;
3430   case NVPTXISD::TexUnified2DArrayFloatS32:
3431     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32;
3432     break;
3433   case NVPTXISD::TexUnified2DArrayFloatFloat:
3434     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32;
3435     break;
3436   case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
3437     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL;
3438     break;
3439   case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
3440     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD;
3441     break;
3442   case NVPTXISD::TexUnified2DArrayS32S32:
3443     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32;
3444     break;
3445   case NVPTXISD::TexUnified2DArrayS32Float:
3446     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32;
3447     break;
3448   case NVPTXISD::TexUnified2DArrayS32FloatLevel:
3449     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL;
3450     break;
3451   case NVPTXISD::TexUnified2DArrayS32FloatGrad:
3452     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD;
3453     break;
3454   case NVPTXISD::TexUnified2DArrayU32S32:
3455     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32;
3456     break;
3457   case NVPTXISD::TexUnified2DArrayU32Float:
3458     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32;
3459     break;
3460   case NVPTXISD::TexUnified2DArrayU32FloatLevel:
3461     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL;
3462     break;
3463   case NVPTXISD::TexUnified2DArrayU32FloatGrad:
3464     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD;
3465     break;
3466   case NVPTXISD::TexUnified3DFloatS32:
3467     Opc = NVPTX::TEX_UNIFIED_3D_F32_S32;
3468     break;
3469   case NVPTXISD::TexUnified3DFloatFloat:
3470     Opc = NVPTX::TEX_UNIFIED_3D_F32_F32;
3471     break;
3472   case NVPTXISD::TexUnified3DFloatFloatLevel:
3473     Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL;
3474     break;
3475   case NVPTXISD::TexUnified3DFloatFloatGrad:
3476     Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD;
3477     break;
3478   case NVPTXISD::TexUnified3DS32S32:
3479     Opc = NVPTX::TEX_UNIFIED_3D_S32_S32;
3480     break;
3481   case NVPTXISD::TexUnified3DS32Float:
3482     Opc = NVPTX::TEX_UNIFIED_3D_S32_F32;
3483     break;
3484   case NVPTXISD::TexUnified3DS32FloatLevel:
3485     Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL;
3486     break;
3487   case NVPTXISD::TexUnified3DS32FloatGrad:
3488     Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD;
3489     break;
3490   case NVPTXISD::TexUnified3DU32S32:
3491     Opc = NVPTX::TEX_UNIFIED_3D_U32_S32;
3492     break;
3493   case NVPTXISD::TexUnified3DU32Float:
3494     Opc = NVPTX::TEX_UNIFIED_3D_U32_F32;
3495     break;
3496   case NVPTXISD::TexUnified3DU32FloatLevel:
3497     Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL;
3498     break;
3499   case NVPTXISD::TexUnified3DU32FloatGrad:
3500     Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD;
3501     break;
3502   case NVPTXISD::TexUnifiedCubeFloatFloat:
3503     Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32;
3504     break;
3505   case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
3506     Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL;
3507     break;
3508   case NVPTXISD::TexUnifiedCubeS32Float:
3509     Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32;
3510     break;
3511   case NVPTXISD::TexUnifiedCubeS32FloatLevel:
3512     Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL;
3513     break;
3514   case NVPTXISD::TexUnifiedCubeU32Float:
3515     Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32;
3516     break;
3517   case NVPTXISD::TexUnifiedCubeU32FloatLevel:
3518     Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL;
3519     break;
3520   case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
3521     Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32;
3522     break;
3523   case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
3524     Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL;
3525     break;
3526   case NVPTXISD::TexUnifiedCubeArrayS32Float:
3527     Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32;
3528     break;
3529   case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
3530     Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL;
3531     break;
3532   case NVPTXISD::TexUnifiedCubeArrayU32Float:
3533     Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32;
3534     break;
3535   case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
3536     Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL;
3537     break;
3538   case NVPTXISD::Tld4UnifiedR2DFloatFloat:
3539     Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32;
3540     break;
3541   case NVPTXISD::Tld4UnifiedG2DFloatFloat:
3542     Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32;
3543     break;
3544   case NVPTXISD::Tld4UnifiedB2DFloatFloat:
3545     Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32;
3546     break;
3547   case NVPTXISD::Tld4UnifiedA2DFloatFloat:
3548     Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32;
3549     break;
3550   case NVPTXISD::Tld4UnifiedR2DS64Float:
3551     Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32;
3552     break;
3553   case NVPTXISD::Tld4UnifiedG2DS64Float:
3554     Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32;
3555     break;
3556   case NVPTXISD::Tld4UnifiedB2DS64Float:
3557     Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32;
3558     break;
3559   case NVPTXISD::Tld4UnifiedA2DS64Float:
3560     Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32;
3561     break;
3562   case NVPTXISD::Tld4UnifiedR2DU64Float:
3563     Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32;
3564     break;
3565   case NVPTXISD::Tld4UnifiedG2DU64Float:
3566     Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32;
3567     break;
3568   case NVPTXISD::Tld4UnifiedB2DU64Float:
3569     Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32;
3570     break;
3571   case NVPTXISD::Tld4UnifiedA2DU64Float:
3572     Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32;
3573     break;
3574   }
3575 
3576   // Copy over operands
3577   for (unsigned i = 1; i < N->getNumOperands(); ++i) {
3578     Ops.push_back(N->getOperand(i));
3579   }
3580 
3581   Ops.push_back(Chain);
3582   Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3583   return Ret;
3584 }
3585 
SelectSurfaceIntrinsic(SDNode * N)3586 SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) {
3587   SDValue Chain = N->getOperand(0);
3588   SDValue TexHandle = N->getOperand(1);
3589   SDNode *Ret = nullptr;
3590   unsigned Opc = 0;
3591   SmallVector<SDValue, 8> Ops;
3592   switch (N->getOpcode()) {
3593   default: return nullptr;
3594   case NVPTXISD::Suld1DI8Clamp:
3595     Opc = NVPTX::SULD_1D_I8_CLAMP;
3596     Ops.push_back(TexHandle);
3597     Ops.push_back(N->getOperand(2));
3598     Ops.push_back(Chain);
3599     break;
3600   case NVPTXISD::Suld1DI16Clamp:
3601     Opc = NVPTX::SULD_1D_I16_CLAMP;
3602     Ops.push_back(TexHandle);
3603     Ops.push_back(N->getOperand(2));
3604     Ops.push_back(Chain);
3605     break;
3606   case NVPTXISD::Suld1DI32Clamp:
3607     Opc = NVPTX::SULD_1D_I32_CLAMP;
3608     Ops.push_back(TexHandle);
3609     Ops.push_back(N->getOperand(2));
3610     Ops.push_back(Chain);
3611     break;
3612   case NVPTXISD::Suld1DI64Clamp:
3613     Opc = NVPTX::SULD_1D_I64_CLAMP;
3614     Ops.push_back(TexHandle);
3615     Ops.push_back(N->getOperand(2));
3616     Ops.push_back(Chain);
3617     break;
3618   case NVPTXISD::Suld1DV2I8Clamp:
3619     Opc = NVPTX::SULD_1D_V2I8_CLAMP;
3620     Ops.push_back(TexHandle);
3621     Ops.push_back(N->getOperand(2));
3622     Ops.push_back(Chain);
3623     break;
3624   case NVPTXISD::Suld1DV2I16Clamp:
3625     Opc = NVPTX::SULD_1D_V2I16_CLAMP;
3626     Ops.push_back(TexHandle);
3627     Ops.push_back(N->getOperand(2));
3628     Ops.push_back(Chain);
3629     break;
3630   case NVPTXISD::Suld1DV2I32Clamp:
3631     Opc = NVPTX::SULD_1D_V2I32_CLAMP;
3632     Ops.push_back(TexHandle);
3633     Ops.push_back(N->getOperand(2));
3634     Ops.push_back(Chain);
3635     break;
3636   case NVPTXISD::Suld1DV2I64Clamp:
3637     Opc = NVPTX::SULD_1D_V2I64_CLAMP;
3638     Ops.push_back(TexHandle);
3639     Ops.push_back(N->getOperand(2));
3640     Ops.push_back(Chain);
3641     break;
3642   case NVPTXISD::Suld1DV4I8Clamp:
3643     Opc = NVPTX::SULD_1D_V4I8_CLAMP;
3644     Ops.push_back(TexHandle);
3645     Ops.push_back(N->getOperand(2));
3646     Ops.push_back(Chain);
3647     break;
3648   case NVPTXISD::Suld1DV4I16Clamp:
3649     Opc = NVPTX::SULD_1D_V4I16_CLAMP;
3650     Ops.push_back(TexHandle);
3651     Ops.push_back(N->getOperand(2));
3652     Ops.push_back(Chain);
3653     break;
3654   case NVPTXISD::Suld1DV4I32Clamp:
3655     Opc = NVPTX::SULD_1D_V4I32_CLAMP;
3656     Ops.push_back(TexHandle);
3657     Ops.push_back(N->getOperand(2));
3658     Ops.push_back(Chain);
3659     break;
3660   case NVPTXISD::Suld1DArrayI8Clamp:
3661     Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP;
3662     Ops.push_back(TexHandle);
3663     Ops.push_back(N->getOperand(2));
3664     Ops.push_back(N->getOperand(3));
3665     Ops.push_back(Chain);
3666     break;
3667   case NVPTXISD::Suld1DArrayI16Clamp:
3668     Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP;
3669     Ops.push_back(TexHandle);
3670     Ops.push_back(N->getOperand(2));
3671     Ops.push_back(N->getOperand(3));
3672     Ops.push_back(Chain);
3673     break;
3674   case NVPTXISD::Suld1DArrayI32Clamp:
3675     Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP;
3676     Ops.push_back(TexHandle);
3677     Ops.push_back(N->getOperand(2));
3678     Ops.push_back(N->getOperand(3));
3679     Ops.push_back(Chain);
3680     break;
3681   case NVPTXISD::Suld1DArrayI64Clamp:
3682     Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP;
3683     Ops.push_back(TexHandle);
3684     Ops.push_back(N->getOperand(2));
3685     Ops.push_back(N->getOperand(3));
3686     Ops.push_back(Chain);
3687     break;
3688   case NVPTXISD::Suld1DArrayV2I8Clamp:
3689     Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP;
3690     Ops.push_back(TexHandle);
3691     Ops.push_back(N->getOperand(2));
3692     Ops.push_back(N->getOperand(3));
3693     Ops.push_back(Chain);
3694     break;
3695   case NVPTXISD::Suld1DArrayV2I16Clamp:
3696     Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP;
3697     Ops.push_back(TexHandle);
3698     Ops.push_back(N->getOperand(2));
3699     Ops.push_back(N->getOperand(3));
3700     Ops.push_back(Chain);
3701     break;
3702   case NVPTXISD::Suld1DArrayV2I32Clamp:
3703     Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP;
3704     Ops.push_back(TexHandle);
3705     Ops.push_back(N->getOperand(2));
3706     Ops.push_back(N->getOperand(3));
3707     Ops.push_back(Chain);
3708     break;
3709   case NVPTXISD::Suld1DArrayV2I64Clamp:
3710     Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP;
3711     Ops.push_back(TexHandle);
3712     Ops.push_back(N->getOperand(2));
3713     Ops.push_back(N->getOperand(3));
3714     Ops.push_back(Chain);
3715     break;
3716   case NVPTXISD::Suld1DArrayV4I8Clamp:
3717     Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP;
3718     Ops.push_back(TexHandle);
3719     Ops.push_back(N->getOperand(2));
3720     Ops.push_back(N->getOperand(3));
3721     Ops.push_back(Chain);
3722     break;
3723   case NVPTXISD::Suld1DArrayV4I16Clamp:
3724     Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP;
3725     Ops.push_back(TexHandle);
3726     Ops.push_back(N->getOperand(2));
3727     Ops.push_back(N->getOperand(3));
3728     Ops.push_back(Chain);
3729     break;
3730   case NVPTXISD::Suld1DArrayV4I32Clamp:
3731     Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP;
3732     Ops.push_back(TexHandle);
3733     Ops.push_back(N->getOperand(2));
3734     Ops.push_back(N->getOperand(3));
3735     Ops.push_back(Chain);
3736     break;
3737   case NVPTXISD::Suld2DI8Clamp:
3738     Opc = NVPTX::SULD_2D_I8_CLAMP;
3739     Ops.push_back(TexHandle);
3740     Ops.push_back(N->getOperand(2));
3741     Ops.push_back(N->getOperand(3));
3742     Ops.push_back(Chain);
3743     break;
3744   case NVPTXISD::Suld2DI16Clamp:
3745     Opc = NVPTX::SULD_2D_I16_CLAMP;
3746     Ops.push_back(TexHandle);
3747     Ops.push_back(N->getOperand(2));
3748     Ops.push_back(N->getOperand(3));
3749     Ops.push_back(Chain);
3750     break;
3751   case NVPTXISD::Suld2DI32Clamp:
3752     Opc = NVPTX::SULD_2D_I32_CLAMP;
3753     Ops.push_back(TexHandle);
3754     Ops.push_back(N->getOperand(2));
3755     Ops.push_back(N->getOperand(3));
3756     Ops.push_back(Chain);
3757     break;
3758   case NVPTXISD::Suld2DI64Clamp:
3759     Opc = NVPTX::SULD_2D_I64_CLAMP;
3760     Ops.push_back(TexHandle);
3761     Ops.push_back(N->getOperand(2));
3762     Ops.push_back(N->getOperand(3));
3763     Ops.push_back(Chain);
3764     break;
3765   case NVPTXISD::Suld2DV2I8Clamp:
3766     Opc = NVPTX::SULD_2D_V2I8_CLAMP;
3767     Ops.push_back(TexHandle);
3768     Ops.push_back(N->getOperand(2));
3769     Ops.push_back(N->getOperand(3));
3770     Ops.push_back(Chain);
3771     break;
3772   case NVPTXISD::Suld2DV2I16Clamp:
3773     Opc = NVPTX::SULD_2D_V2I16_CLAMP;
3774     Ops.push_back(TexHandle);
3775     Ops.push_back(N->getOperand(2));
3776     Ops.push_back(N->getOperand(3));
3777     Ops.push_back(Chain);
3778     break;
3779   case NVPTXISD::Suld2DV2I32Clamp:
3780     Opc = NVPTX::SULD_2D_V2I32_CLAMP;
3781     Ops.push_back(TexHandle);
3782     Ops.push_back(N->getOperand(2));
3783     Ops.push_back(N->getOperand(3));
3784     Ops.push_back(Chain);
3785     break;
3786   case NVPTXISD::Suld2DV2I64Clamp:
3787     Opc = NVPTX::SULD_2D_V2I64_CLAMP;
3788     Ops.push_back(TexHandle);
3789     Ops.push_back(N->getOperand(2));
3790     Ops.push_back(N->getOperand(3));
3791     Ops.push_back(Chain);
3792     break;
3793   case NVPTXISD::Suld2DV4I8Clamp:
3794     Opc = NVPTX::SULD_2D_V4I8_CLAMP;
3795     Ops.push_back(TexHandle);
3796     Ops.push_back(N->getOperand(2));
3797     Ops.push_back(N->getOperand(3));
3798     Ops.push_back(Chain);
3799     break;
3800   case NVPTXISD::Suld2DV4I16Clamp:
3801     Opc = NVPTX::SULD_2D_V4I16_CLAMP;
3802     Ops.push_back(TexHandle);
3803     Ops.push_back(N->getOperand(2));
3804     Ops.push_back(N->getOperand(3));
3805     Ops.push_back(Chain);
3806     break;
3807   case NVPTXISD::Suld2DV4I32Clamp:
3808     Opc = NVPTX::SULD_2D_V4I32_CLAMP;
3809     Ops.push_back(TexHandle);
3810     Ops.push_back(N->getOperand(2));
3811     Ops.push_back(N->getOperand(3));
3812     Ops.push_back(Chain);
3813     break;
3814   case NVPTXISD::Suld2DArrayI8Clamp:
3815     Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP;
3816     Ops.push_back(TexHandle);
3817     Ops.push_back(N->getOperand(2));
3818     Ops.push_back(N->getOperand(3));
3819     Ops.push_back(N->getOperand(4));
3820     Ops.push_back(Chain);
3821     break;
3822   case NVPTXISD::Suld2DArrayI16Clamp:
3823     Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP;
3824     Ops.push_back(TexHandle);
3825     Ops.push_back(N->getOperand(2));
3826     Ops.push_back(N->getOperand(3));
3827     Ops.push_back(N->getOperand(4));
3828     Ops.push_back(Chain);
3829     break;
3830   case NVPTXISD::Suld2DArrayI32Clamp:
3831     Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP;
3832     Ops.push_back(TexHandle);
3833     Ops.push_back(N->getOperand(2));
3834     Ops.push_back(N->getOperand(3));
3835     Ops.push_back(N->getOperand(4));
3836     Ops.push_back(Chain);
3837     break;
3838   case NVPTXISD::Suld2DArrayI64Clamp:
3839     Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP;
3840     Ops.push_back(TexHandle);
3841     Ops.push_back(N->getOperand(2));
3842     Ops.push_back(N->getOperand(3));
3843     Ops.push_back(N->getOperand(4));
3844     Ops.push_back(Chain);
3845     break;
3846   case NVPTXISD::Suld2DArrayV2I8Clamp:
3847     Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP;
3848     Ops.push_back(TexHandle);
3849     Ops.push_back(N->getOperand(2));
3850     Ops.push_back(N->getOperand(3));
3851     Ops.push_back(N->getOperand(4));
3852     Ops.push_back(Chain);
3853     break;
3854   case NVPTXISD::Suld2DArrayV2I16Clamp:
3855     Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP;
3856     Ops.push_back(TexHandle);
3857     Ops.push_back(N->getOperand(2));
3858     Ops.push_back(N->getOperand(3));
3859     Ops.push_back(N->getOperand(4));
3860     Ops.push_back(Chain);
3861     break;
3862   case NVPTXISD::Suld2DArrayV2I32Clamp:
3863     Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP;
3864     Ops.push_back(TexHandle);
3865     Ops.push_back(N->getOperand(2));
3866     Ops.push_back(N->getOperand(3));
3867     Ops.push_back(N->getOperand(4));
3868     Ops.push_back(Chain);
3869     break;
3870   case NVPTXISD::Suld2DArrayV2I64Clamp:
3871     Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP;
3872     Ops.push_back(TexHandle);
3873     Ops.push_back(N->getOperand(2));
3874     Ops.push_back(N->getOperand(3));
3875     Ops.push_back(N->getOperand(4));
3876     Ops.push_back(Chain);
3877     break;
3878   case NVPTXISD::Suld2DArrayV4I8Clamp:
3879     Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP;
3880     Ops.push_back(TexHandle);
3881     Ops.push_back(N->getOperand(2));
3882     Ops.push_back(N->getOperand(3));
3883     Ops.push_back(N->getOperand(4));
3884     Ops.push_back(Chain);
3885     break;
3886   case NVPTXISD::Suld2DArrayV4I16Clamp:
3887     Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP;
3888     Ops.push_back(TexHandle);
3889     Ops.push_back(N->getOperand(2));
3890     Ops.push_back(N->getOperand(3));
3891     Ops.push_back(N->getOperand(4));
3892     Ops.push_back(Chain);
3893     break;
3894   case NVPTXISD::Suld2DArrayV4I32Clamp:
3895     Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP;
3896     Ops.push_back(TexHandle);
3897     Ops.push_back(N->getOperand(2));
3898     Ops.push_back(N->getOperand(3));
3899     Ops.push_back(N->getOperand(4));
3900     Ops.push_back(Chain);
3901     break;
3902   case NVPTXISD::Suld3DI8Clamp:
3903     Opc = NVPTX::SULD_3D_I8_CLAMP;
3904     Ops.push_back(TexHandle);
3905     Ops.push_back(N->getOperand(2));
3906     Ops.push_back(N->getOperand(3));
3907     Ops.push_back(N->getOperand(4));
3908     Ops.push_back(Chain);
3909     break;
3910   case NVPTXISD::Suld3DI16Clamp:
3911     Opc = NVPTX::SULD_3D_I16_CLAMP;
3912     Ops.push_back(TexHandle);
3913     Ops.push_back(N->getOperand(2));
3914     Ops.push_back(N->getOperand(3));
3915     Ops.push_back(N->getOperand(4));
3916     Ops.push_back(Chain);
3917     break;
3918   case NVPTXISD::Suld3DI32Clamp:
3919     Opc = NVPTX::SULD_3D_I32_CLAMP;
3920     Ops.push_back(TexHandle);
3921     Ops.push_back(N->getOperand(2));
3922     Ops.push_back(N->getOperand(3));
3923     Ops.push_back(N->getOperand(4));
3924     Ops.push_back(Chain);
3925     break;
3926   case NVPTXISD::Suld3DI64Clamp:
3927     Opc = NVPTX::SULD_3D_I64_CLAMP;
3928     Ops.push_back(TexHandle);
3929     Ops.push_back(N->getOperand(2));
3930     Ops.push_back(N->getOperand(3));
3931     Ops.push_back(N->getOperand(4));
3932     Ops.push_back(Chain);
3933     break;
3934   case NVPTXISD::Suld3DV2I8Clamp:
3935     Opc = NVPTX::SULD_3D_V2I8_CLAMP;
3936     Ops.push_back(TexHandle);
3937     Ops.push_back(N->getOperand(2));
3938     Ops.push_back(N->getOperand(3));
3939     Ops.push_back(N->getOperand(4));
3940     Ops.push_back(Chain);
3941     break;
3942   case NVPTXISD::Suld3DV2I16Clamp:
3943     Opc = NVPTX::SULD_3D_V2I16_CLAMP;
3944     Ops.push_back(TexHandle);
3945     Ops.push_back(N->getOperand(2));
3946     Ops.push_back(N->getOperand(3));
3947     Ops.push_back(N->getOperand(4));
3948     Ops.push_back(Chain);
3949     break;
3950   case NVPTXISD::Suld3DV2I32Clamp:
3951     Opc = NVPTX::SULD_3D_V2I32_CLAMP;
3952     Ops.push_back(TexHandle);
3953     Ops.push_back(N->getOperand(2));
3954     Ops.push_back(N->getOperand(3));
3955     Ops.push_back(N->getOperand(4));
3956     Ops.push_back(Chain);
3957     break;
3958   case NVPTXISD::Suld3DV2I64Clamp:
3959     Opc = NVPTX::SULD_3D_V2I64_CLAMP;
3960     Ops.push_back(TexHandle);
3961     Ops.push_back(N->getOperand(2));
3962     Ops.push_back(N->getOperand(3));
3963     Ops.push_back(N->getOperand(4));
3964     Ops.push_back(Chain);
3965     break;
3966   case NVPTXISD::Suld3DV4I8Clamp:
3967     Opc = NVPTX::SULD_3D_V4I8_CLAMP;
3968     Ops.push_back(TexHandle);
3969     Ops.push_back(N->getOperand(2));
3970     Ops.push_back(N->getOperand(3));
3971     Ops.push_back(N->getOperand(4));
3972     Ops.push_back(Chain);
3973     break;
3974   case NVPTXISD::Suld3DV4I16Clamp:
3975     Opc = NVPTX::SULD_3D_V4I16_CLAMP;
3976     Ops.push_back(TexHandle);
3977     Ops.push_back(N->getOperand(2));
3978     Ops.push_back(N->getOperand(3));
3979     Ops.push_back(N->getOperand(4));
3980     Ops.push_back(Chain);
3981     break;
3982   case NVPTXISD::Suld3DV4I32Clamp:
3983     Opc = NVPTX::SULD_3D_V4I32_CLAMP;
3984     Ops.push_back(TexHandle);
3985     Ops.push_back(N->getOperand(2));
3986     Ops.push_back(N->getOperand(3));
3987     Ops.push_back(N->getOperand(4));
3988     Ops.push_back(Chain);
3989     break;
3990   case NVPTXISD::Suld1DI8Trap:
3991     Opc = NVPTX::SULD_1D_I8_TRAP;
3992     Ops.push_back(TexHandle);
3993     Ops.push_back(N->getOperand(2));
3994     Ops.push_back(Chain);
3995     break;
3996   case NVPTXISD::Suld1DI16Trap:
3997     Opc = NVPTX::SULD_1D_I16_TRAP;
3998     Ops.push_back(TexHandle);
3999     Ops.push_back(N->getOperand(2));
4000     Ops.push_back(Chain);
4001     break;
4002   case NVPTXISD::Suld1DI32Trap:
4003     Opc = NVPTX::SULD_1D_I32_TRAP;
4004     Ops.push_back(TexHandle);
4005     Ops.push_back(N->getOperand(2));
4006     Ops.push_back(Chain);
4007     break;
4008   case NVPTXISD::Suld1DI64Trap:
4009     Opc = NVPTX::SULD_1D_I64_TRAP;
4010     Ops.push_back(TexHandle);
4011     Ops.push_back(N->getOperand(2));
4012     Ops.push_back(Chain);
4013     break;
4014   case NVPTXISD::Suld1DV2I8Trap:
4015     Opc = NVPTX::SULD_1D_V2I8_TRAP;
4016     Ops.push_back(TexHandle);
4017     Ops.push_back(N->getOperand(2));
4018     Ops.push_back(Chain);
4019     break;
4020   case NVPTXISD::Suld1DV2I16Trap:
4021     Opc = NVPTX::SULD_1D_V2I16_TRAP;
4022     Ops.push_back(TexHandle);
4023     Ops.push_back(N->getOperand(2));
4024     Ops.push_back(Chain);
4025     break;
4026   case NVPTXISD::Suld1DV2I32Trap:
4027     Opc = NVPTX::SULD_1D_V2I32_TRAP;
4028     Ops.push_back(TexHandle);
4029     Ops.push_back(N->getOperand(2));
4030     Ops.push_back(Chain);
4031     break;
4032   case NVPTXISD::Suld1DV2I64Trap:
4033     Opc = NVPTX::SULD_1D_V2I64_TRAP;
4034     Ops.push_back(TexHandle);
4035     Ops.push_back(N->getOperand(2));
4036     Ops.push_back(Chain);
4037     break;
4038   case NVPTXISD::Suld1DV4I8Trap:
4039     Opc = NVPTX::SULD_1D_V4I8_TRAP;
4040     Ops.push_back(TexHandle);
4041     Ops.push_back(N->getOperand(2));
4042     Ops.push_back(Chain);
4043     break;
4044   case NVPTXISD::Suld1DV4I16Trap:
4045     Opc = NVPTX::SULD_1D_V4I16_TRAP;
4046     Ops.push_back(TexHandle);
4047     Ops.push_back(N->getOperand(2));
4048     Ops.push_back(Chain);
4049     break;
4050   case NVPTXISD::Suld1DV4I32Trap:
4051     Opc = NVPTX::SULD_1D_V4I32_TRAP;
4052     Ops.push_back(TexHandle);
4053     Ops.push_back(N->getOperand(2));
4054     Ops.push_back(Chain);
4055     break;
4056   case NVPTXISD::Suld1DArrayI8Trap:
4057     Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP;
4058     Ops.push_back(TexHandle);
4059     Ops.push_back(N->getOperand(2));
4060     Ops.push_back(N->getOperand(3));
4061     Ops.push_back(Chain);
4062     break;
4063   case NVPTXISD::Suld1DArrayI16Trap:
4064     Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP;
4065     Ops.push_back(TexHandle);
4066     Ops.push_back(N->getOperand(2));
4067     Ops.push_back(N->getOperand(3));
4068     Ops.push_back(Chain);
4069     break;
4070   case NVPTXISD::Suld1DArrayI32Trap:
4071     Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP;
4072     Ops.push_back(TexHandle);
4073     Ops.push_back(N->getOperand(2));
4074     Ops.push_back(N->getOperand(3));
4075     Ops.push_back(Chain);
4076     break;
4077   case NVPTXISD::Suld1DArrayI64Trap:
4078     Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP;
4079     Ops.push_back(TexHandle);
4080     Ops.push_back(N->getOperand(2));
4081     Ops.push_back(N->getOperand(3));
4082     Ops.push_back(Chain);
4083     break;
4084   case NVPTXISD::Suld1DArrayV2I8Trap:
4085     Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
4086     Ops.push_back(TexHandle);
4087     Ops.push_back(N->getOperand(2));
4088     Ops.push_back(N->getOperand(3));
4089     Ops.push_back(Chain);
4090     break;
4091   case NVPTXISD::Suld1DArrayV2I16Trap:
4092     Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP;
4093     Ops.push_back(TexHandle);
4094     Ops.push_back(N->getOperand(2));
4095     Ops.push_back(N->getOperand(3));
4096     Ops.push_back(Chain);
4097     break;
4098   case NVPTXISD::Suld1DArrayV2I32Trap:
4099     Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP;
4100     Ops.push_back(TexHandle);
4101     Ops.push_back(N->getOperand(2));
4102     Ops.push_back(N->getOperand(3));
4103     Ops.push_back(Chain);
4104     break;
4105   case NVPTXISD::Suld1DArrayV2I64Trap:
4106     Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP;
4107     Ops.push_back(TexHandle);
4108     Ops.push_back(N->getOperand(2));
4109     Ops.push_back(N->getOperand(3));
4110     Ops.push_back(Chain);
4111     break;
4112   case NVPTXISD::Suld1DArrayV4I8Trap:
4113     Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
4114     Ops.push_back(TexHandle);
4115     Ops.push_back(N->getOperand(2));
4116     Ops.push_back(N->getOperand(3));
4117     Ops.push_back(Chain);
4118     break;
4119   case NVPTXISD::Suld1DArrayV4I16Trap:
4120     Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP;
4121     Ops.push_back(TexHandle);
4122     Ops.push_back(N->getOperand(2));
4123     Ops.push_back(N->getOperand(3));
4124     Ops.push_back(Chain);
4125     break;
4126   case NVPTXISD::Suld1DArrayV4I32Trap:
4127     Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP;
4128     Ops.push_back(TexHandle);
4129     Ops.push_back(N->getOperand(2));
4130     Ops.push_back(N->getOperand(3));
4131     Ops.push_back(Chain);
4132     break;
4133   case NVPTXISD::Suld2DI8Trap:
4134     Opc = NVPTX::SULD_2D_I8_TRAP;
4135     Ops.push_back(TexHandle);
4136     Ops.push_back(N->getOperand(2));
4137     Ops.push_back(N->getOperand(3));
4138     Ops.push_back(Chain);
4139     break;
4140   case NVPTXISD::Suld2DI16Trap:
4141     Opc = NVPTX::SULD_2D_I16_TRAP;
4142     Ops.push_back(TexHandle);
4143     Ops.push_back(N->getOperand(2));
4144     Ops.push_back(N->getOperand(3));
4145     Ops.push_back(Chain);
4146     break;
4147   case NVPTXISD::Suld2DI32Trap:
4148     Opc = NVPTX::SULD_2D_I32_TRAP;
4149     Ops.push_back(TexHandle);
4150     Ops.push_back(N->getOperand(2));
4151     Ops.push_back(N->getOperand(3));
4152     Ops.push_back(Chain);
4153     break;
4154   case NVPTXISD::Suld2DI64Trap:
4155     Opc = NVPTX::SULD_2D_I64_TRAP;
4156     Ops.push_back(TexHandle);
4157     Ops.push_back(N->getOperand(2));
4158     Ops.push_back(N->getOperand(3));
4159     Ops.push_back(Chain);
4160     break;
4161   case NVPTXISD::Suld2DV2I8Trap:
4162     Opc = NVPTX::SULD_2D_V2I8_TRAP;
4163     Ops.push_back(TexHandle);
4164     Ops.push_back(N->getOperand(2));
4165     Ops.push_back(N->getOperand(3));
4166     Ops.push_back(Chain);
4167     break;
4168   case NVPTXISD::Suld2DV2I16Trap:
4169     Opc = NVPTX::SULD_2D_V2I16_TRAP;
4170     Ops.push_back(TexHandle);
4171     Ops.push_back(N->getOperand(2));
4172     Ops.push_back(N->getOperand(3));
4173     Ops.push_back(Chain);
4174     break;
4175   case NVPTXISD::Suld2DV2I32Trap:
4176     Opc = NVPTX::SULD_2D_V2I32_TRAP;
4177     Ops.push_back(TexHandle);
4178     Ops.push_back(N->getOperand(2));
4179     Ops.push_back(N->getOperand(3));
4180     Ops.push_back(Chain);
4181     break;
4182   case NVPTXISD::Suld2DV2I64Trap:
4183     Opc = NVPTX::SULD_2D_V2I64_TRAP;
4184     Ops.push_back(TexHandle);
4185     Ops.push_back(N->getOperand(2));
4186     Ops.push_back(N->getOperand(3));
4187     Ops.push_back(Chain);
4188     break;
4189   case NVPTXISD::Suld2DV4I8Trap:
4190     Opc = NVPTX::SULD_2D_V4I8_TRAP;
4191     Ops.push_back(TexHandle);
4192     Ops.push_back(N->getOperand(2));
4193     Ops.push_back(N->getOperand(3));
4194     Ops.push_back(Chain);
4195     break;
4196   case NVPTXISD::Suld2DV4I16Trap:
4197     Opc = NVPTX::SULD_2D_V4I16_TRAP;
4198     Ops.push_back(TexHandle);
4199     Ops.push_back(N->getOperand(2));
4200     Ops.push_back(N->getOperand(3));
4201     Ops.push_back(Chain);
4202     break;
4203   case NVPTXISD::Suld2DV4I32Trap:
4204     Opc = NVPTX::SULD_2D_V4I32_TRAP;
4205     Ops.push_back(TexHandle);
4206     Ops.push_back(N->getOperand(2));
4207     Ops.push_back(N->getOperand(3));
4208     Ops.push_back(Chain);
4209     break;
4210   case NVPTXISD::Suld2DArrayI8Trap:
4211     Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP;
4212     Ops.push_back(TexHandle);
4213     Ops.push_back(N->getOperand(2));
4214     Ops.push_back(N->getOperand(3));
4215     Ops.push_back(N->getOperand(4));
4216     Ops.push_back(Chain);
4217     break;
4218   case NVPTXISD::Suld2DArrayI16Trap:
4219     Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP;
4220     Ops.push_back(TexHandle);
4221     Ops.push_back(N->getOperand(2));
4222     Ops.push_back(N->getOperand(3));
4223     Ops.push_back(N->getOperand(4));
4224     Ops.push_back(Chain);
4225     break;
4226   case NVPTXISD::Suld2DArrayI32Trap:
4227     Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP;
4228     Ops.push_back(TexHandle);
4229     Ops.push_back(N->getOperand(2));
4230     Ops.push_back(N->getOperand(3));
4231     Ops.push_back(N->getOperand(4));
4232     Ops.push_back(Chain);
4233     break;
4234   case NVPTXISD::Suld2DArrayI64Trap:
4235     Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP;
4236     Ops.push_back(TexHandle);
4237     Ops.push_back(N->getOperand(2));
4238     Ops.push_back(N->getOperand(3));
4239     Ops.push_back(N->getOperand(4));
4240     Ops.push_back(Chain);
4241     break;
4242   case NVPTXISD::Suld2DArrayV2I8Trap:
4243     Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
4244     Ops.push_back(TexHandle);
4245     Ops.push_back(N->getOperand(2));
4246     Ops.push_back(N->getOperand(3));
4247     Ops.push_back(N->getOperand(4));
4248     Ops.push_back(Chain);
4249     break;
4250   case NVPTXISD::Suld2DArrayV2I16Trap:
4251     Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP;
4252     Ops.push_back(TexHandle);
4253     Ops.push_back(N->getOperand(2));
4254     Ops.push_back(N->getOperand(3));
4255     Ops.push_back(N->getOperand(4));
4256     Ops.push_back(Chain);
4257     break;
4258   case NVPTXISD::Suld2DArrayV2I32Trap:
4259     Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP;
4260     Ops.push_back(TexHandle);
4261     Ops.push_back(N->getOperand(2));
4262     Ops.push_back(N->getOperand(3));
4263     Ops.push_back(N->getOperand(4));
4264     Ops.push_back(Chain);
4265     break;
4266   case NVPTXISD::Suld2DArrayV2I64Trap:
4267     Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP;
4268     Ops.push_back(TexHandle);
4269     Ops.push_back(N->getOperand(2));
4270     Ops.push_back(N->getOperand(3));
4271     Ops.push_back(N->getOperand(4));
4272     Ops.push_back(Chain);
4273     break;
4274   case NVPTXISD::Suld2DArrayV4I8Trap:
4275     Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
4276     Ops.push_back(TexHandle);
4277     Ops.push_back(N->getOperand(2));
4278     Ops.push_back(N->getOperand(3));
4279     Ops.push_back(N->getOperand(4));
4280     Ops.push_back(Chain);
4281     break;
4282   case NVPTXISD::Suld2DArrayV4I16Trap:
4283     Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP;
4284     Ops.push_back(TexHandle);
4285     Ops.push_back(N->getOperand(2));
4286     Ops.push_back(N->getOperand(3));
4287     Ops.push_back(N->getOperand(4));
4288     Ops.push_back(Chain);
4289     break;
4290   case NVPTXISD::Suld2DArrayV4I32Trap:
4291     Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP;
4292     Ops.push_back(TexHandle);
4293     Ops.push_back(N->getOperand(2));
4294     Ops.push_back(N->getOperand(3));
4295     Ops.push_back(N->getOperand(4));
4296     Ops.push_back(Chain);
4297     break;
4298   case NVPTXISD::Suld3DI8Trap:
4299     Opc = NVPTX::SULD_3D_I8_TRAP;
4300     Ops.push_back(TexHandle);
4301     Ops.push_back(N->getOperand(2));
4302     Ops.push_back(N->getOperand(3));
4303     Ops.push_back(N->getOperand(4));
4304     Ops.push_back(Chain);
4305     break;
4306   case NVPTXISD::Suld3DI16Trap:
4307     Opc = NVPTX::SULD_3D_I16_TRAP;
4308     Ops.push_back(TexHandle);
4309     Ops.push_back(N->getOperand(2));
4310     Ops.push_back(N->getOperand(3));
4311     Ops.push_back(N->getOperand(4));
4312     Ops.push_back(Chain);
4313     break;
4314   case NVPTXISD::Suld3DI32Trap:
4315     Opc = NVPTX::SULD_3D_I32_TRAP;
4316     Ops.push_back(TexHandle);
4317     Ops.push_back(N->getOperand(2));
4318     Ops.push_back(N->getOperand(3));
4319     Ops.push_back(N->getOperand(4));
4320     Ops.push_back(Chain);
4321     break;
4322   case NVPTXISD::Suld3DI64Trap:
4323     Opc = NVPTX::SULD_3D_I64_TRAP;
4324     Ops.push_back(TexHandle);
4325     Ops.push_back(N->getOperand(2));
4326     Ops.push_back(N->getOperand(3));
4327     Ops.push_back(N->getOperand(4));
4328     Ops.push_back(Chain);
4329     break;
4330   case NVPTXISD::Suld3DV2I8Trap:
4331     Opc = NVPTX::SULD_3D_V2I8_TRAP;
4332     Ops.push_back(TexHandle);
4333     Ops.push_back(N->getOperand(2));
4334     Ops.push_back(N->getOperand(3));
4335     Ops.push_back(N->getOperand(4));
4336     Ops.push_back(Chain);
4337     break;
4338   case NVPTXISD::Suld3DV2I16Trap:
4339     Opc = NVPTX::SULD_3D_V2I16_TRAP;
4340     Ops.push_back(TexHandle);
4341     Ops.push_back(N->getOperand(2));
4342     Ops.push_back(N->getOperand(3));
4343     Ops.push_back(N->getOperand(4));
4344     Ops.push_back(Chain);
4345     break;
4346   case NVPTXISD::Suld3DV2I32Trap:
4347     Opc = NVPTX::SULD_3D_V2I32_TRAP;
4348     Ops.push_back(TexHandle);
4349     Ops.push_back(N->getOperand(2));
4350     Ops.push_back(N->getOperand(3));
4351     Ops.push_back(N->getOperand(4));
4352     Ops.push_back(Chain);
4353     break;
4354   case NVPTXISD::Suld3DV2I64Trap:
4355     Opc = NVPTX::SULD_3D_V2I64_TRAP;
4356     Ops.push_back(TexHandle);
4357     Ops.push_back(N->getOperand(2));
4358     Ops.push_back(N->getOperand(3));
4359     Ops.push_back(N->getOperand(4));
4360     Ops.push_back(Chain);
4361     break;
4362   case NVPTXISD::Suld3DV4I8Trap:
4363     Opc = NVPTX::SULD_3D_V4I8_TRAP;
4364     Ops.push_back(TexHandle);
4365     Ops.push_back(N->getOperand(2));
4366     Ops.push_back(N->getOperand(3));
4367     Ops.push_back(N->getOperand(4));
4368     Ops.push_back(Chain);
4369     break;
4370   case NVPTXISD::Suld3DV4I16Trap:
4371     Opc = NVPTX::SULD_3D_V4I16_TRAP;
4372     Ops.push_back(TexHandle);
4373     Ops.push_back(N->getOperand(2));
4374     Ops.push_back(N->getOperand(3));
4375     Ops.push_back(N->getOperand(4));
4376     Ops.push_back(Chain);
4377     break;
4378   case NVPTXISD::Suld3DV4I32Trap:
4379     Opc = NVPTX::SULD_3D_V4I32_TRAP;
4380     Ops.push_back(TexHandle);
4381     Ops.push_back(N->getOperand(2));
4382     Ops.push_back(N->getOperand(3));
4383     Ops.push_back(N->getOperand(4));
4384     Ops.push_back(Chain);
4385     break;
4386   case NVPTXISD::Suld1DI8Zero:
4387     Opc = NVPTX::SULD_1D_I8_ZERO;
4388     Ops.push_back(TexHandle);
4389     Ops.push_back(N->getOperand(2));
4390     Ops.push_back(Chain);
4391     break;
4392   case NVPTXISD::Suld1DI16Zero:
4393     Opc = NVPTX::SULD_1D_I16_ZERO;
4394     Ops.push_back(TexHandle);
4395     Ops.push_back(N->getOperand(2));
4396     Ops.push_back(Chain);
4397     break;
4398   case NVPTXISD::Suld1DI32Zero:
4399     Opc = NVPTX::SULD_1D_I32_ZERO;
4400     Ops.push_back(TexHandle);
4401     Ops.push_back(N->getOperand(2));
4402     Ops.push_back(Chain);
4403     break;
4404   case NVPTXISD::Suld1DI64Zero:
4405     Opc = NVPTX::SULD_1D_I64_ZERO;
4406     Ops.push_back(TexHandle);
4407     Ops.push_back(N->getOperand(2));
4408     Ops.push_back(Chain);
4409     break;
4410   case NVPTXISD::Suld1DV2I8Zero:
4411     Opc = NVPTX::SULD_1D_V2I8_ZERO;
4412     Ops.push_back(TexHandle);
4413     Ops.push_back(N->getOperand(2));
4414     Ops.push_back(Chain);
4415     break;
4416   case NVPTXISD::Suld1DV2I16Zero:
4417     Opc = NVPTX::SULD_1D_V2I16_ZERO;
4418     Ops.push_back(TexHandle);
4419     Ops.push_back(N->getOperand(2));
4420     Ops.push_back(Chain);
4421     break;
4422   case NVPTXISD::Suld1DV2I32Zero:
4423     Opc = NVPTX::SULD_1D_V2I32_ZERO;
4424     Ops.push_back(TexHandle);
4425     Ops.push_back(N->getOperand(2));
4426     Ops.push_back(Chain);
4427     break;
4428   case NVPTXISD::Suld1DV2I64Zero:
4429     Opc = NVPTX::SULD_1D_V2I64_ZERO;
4430     Ops.push_back(TexHandle);
4431     Ops.push_back(N->getOperand(2));
4432     Ops.push_back(Chain);
4433     break;
4434   case NVPTXISD::Suld1DV4I8Zero:
4435     Opc = NVPTX::SULD_1D_V4I8_ZERO;
4436     Ops.push_back(TexHandle);
4437     Ops.push_back(N->getOperand(2));
4438     Ops.push_back(Chain);
4439     break;
4440   case NVPTXISD::Suld1DV4I16Zero:
4441     Opc = NVPTX::SULD_1D_V4I16_ZERO;
4442     Ops.push_back(TexHandle);
4443     Ops.push_back(N->getOperand(2));
4444     Ops.push_back(Chain);
4445     break;
4446   case NVPTXISD::Suld1DV4I32Zero:
4447     Opc = NVPTX::SULD_1D_V4I32_ZERO;
4448     Ops.push_back(TexHandle);
4449     Ops.push_back(N->getOperand(2));
4450     Ops.push_back(Chain);
4451     break;
4452   case NVPTXISD::Suld1DArrayI8Zero:
4453     Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO;
4454     Ops.push_back(TexHandle);
4455     Ops.push_back(N->getOperand(2));
4456     Ops.push_back(N->getOperand(3));
4457     Ops.push_back(Chain);
4458     break;
4459   case NVPTXISD::Suld1DArrayI16Zero:
4460     Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO;
4461     Ops.push_back(TexHandle);
4462     Ops.push_back(N->getOperand(2));
4463     Ops.push_back(N->getOperand(3));
4464     Ops.push_back(Chain);
4465     break;
4466   case NVPTXISD::Suld1DArrayI32Zero:
4467     Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO;
4468     Ops.push_back(TexHandle);
4469     Ops.push_back(N->getOperand(2));
4470     Ops.push_back(N->getOperand(3));
4471     Ops.push_back(Chain);
4472     break;
4473   case NVPTXISD::Suld1DArrayI64Zero:
4474     Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO;
4475     Ops.push_back(TexHandle);
4476     Ops.push_back(N->getOperand(2));
4477     Ops.push_back(N->getOperand(3));
4478     Ops.push_back(Chain);
4479     break;
4480   case NVPTXISD::Suld1DArrayV2I8Zero:
4481     Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO;
4482     Ops.push_back(TexHandle);
4483     Ops.push_back(N->getOperand(2));
4484     Ops.push_back(N->getOperand(3));
4485     Ops.push_back(Chain);
4486     break;
4487   case NVPTXISD::Suld1DArrayV2I16Zero:
4488     Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO;
4489     Ops.push_back(TexHandle);
4490     Ops.push_back(N->getOperand(2));
4491     Ops.push_back(N->getOperand(3));
4492     Ops.push_back(Chain);
4493     break;
4494   case NVPTXISD::Suld1DArrayV2I32Zero:
4495     Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO;
4496     Ops.push_back(TexHandle);
4497     Ops.push_back(N->getOperand(2));
4498     Ops.push_back(N->getOperand(3));
4499     Ops.push_back(Chain);
4500     break;
4501   case NVPTXISD::Suld1DArrayV2I64Zero:
4502     Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO;
4503     Ops.push_back(TexHandle);
4504     Ops.push_back(N->getOperand(2));
4505     Ops.push_back(N->getOperand(3));
4506     Ops.push_back(Chain);
4507     break;
4508   case NVPTXISD::Suld1DArrayV4I8Zero:
4509     Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO;
4510     Ops.push_back(TexHandle);
4511     Ops.push_back(N->getOperand(2));
4512     Ops.push_back(N->getOperand(3));
4513     Ops.push_back(Chain);
4514     break;
4515   case NVPTXISD::Suld1DArrayV4I16Zero:
4516     Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO;
4517     Ops.push_back(TexHandle);
4518     Ops.push_back(N->getOperand(2));
4519     Ops.push_back(N->getOperand(3));
4520     Ops.push_back(Chain);
4521     break;
4522   case NVPTXISD::Suld1DArrayV4I32Zero:
4523     Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO;
4524     Ops.push_back(TexHandle);
4525     Ops.push_back(N->getOperand(2));
4526     Ops.push_back(N->getOperand(3));
4527     Ops.push_back(Chain);
4528     break;
4529   case NVPTXISD::Suld2DI8Zero:
4530     Opc = NVPTX::SULD_2D_I8_ZERO;
4531     Ops.push_back(TexHandle);
4532     Ops.push_back(N->getOperand(2));
4533     Ops.push_back(N->getOperand(3));
4534     Ops.push_back(Chain);
4535     break;
4536   case NVPTXISD::Suld2DI16Zero:
4537     Opc = NVPTX::SULD_2D_I16_ZERO;
4538     Ops.push_back(TexHandle);
4539     Ops.push_back(N->getOperand(2));
4540     Ops.push_back(N->getOperand(3));
4541     Ops.push_back(Chain);
4542     break;
4543   case NVPTXISD::Suld2DI32Zero:
4544     Opc = NVPTX::SULD_2D_I32_ZERO;
4545     Ops.push_back(TexHandle);
4546     Ops.push_back(N->getOperand(2));
4547     Ops.push_back(N->getOperand(3));
4548     Ops.push_back(Chain);
4549     break;
4550   case NVPTXISD::Suld2DI64Zero:
4551     Opc = NVPTX::SULD_2D_I64_ZERO;
4552     Ops.push_back(TexHandle);
4553     Ops.push_back(N->getOperand(2));
4554     Ops.push_back(N->getOperand(3));
4555     Ops.push_back(Chain);
4556     break;
4557   case NVPTXISD::Suld2DV2I8Zero:
4558     Opc = NVPTX::SULD_2D_V2I8_ZERO;
4559     Ops.push_back(TexHandle);
4560     Ops.push_back(N->getOperand(2));
4561     Ops.push_back(N->getOperand(3));
4562     Ops.push_back(Chain);
4563     break;
4564   case NVPTXISD::Suld2DV2I16Zero:
4565     Opc = NVPTX::SULD_2D_V2I16_ZERO;
4566     Ops.push_back(TexHandle);
4567     Ops.push_back(N->getOperand(2));
4568     Ops.push_back(N->getOperand(3));
4569     Ops.push_back(Chain);
4570     break;
4571   case NVPTXISD::Suld2DV2I32Zero:
4572     Opc = NVPTX::SULD_2D_V2I32_ZERO;
4573     Ops.push_back(TexHandle);
4574     Ops.push_back(N->getOperand(2));
4575     Ops.push_back(N->getOperand(3));
4576     Ops.push_back(Chain);
4577     break;
4578   case NVPTXISD::Suld2DV2I64Zero:
4579     Opc = NVPTX::SULD_2D_V2I64_ZERO;
4580     Ops.push_back(TexHandle);
4581     Ops.push_back(N->getOperand(2));
4582     Ops.push_back(N->getOperand(3));
4583     Ops.push_back(Chain);
4584     break;
4585   case NVPTXISD::Suld2DV4I8Zero:
4586     Opc = NVPTX::SULD_2D_V4I8_ZERO;
4587     Ops.push_back(TexHandle);
4588     Ops.push_back(N->getOperand(2));
4589     Ops.push_back(N->getOperand(3));
4590     Ops.push_back(Chain);
4591     break;
4592   case NVPTXISD::Suld2DV4I16Zero:
4593     Opc = NVPTX::SULD_2D_V4I16_ZERO;
4594     Ops.push_back(TexHandle);
4595     Ops.push_back(N->getOperand(2));
4596     Ops.push_back(N->getOperand(3));
4597     Ops.push_back(Chain);
4598     break;
4599   case NVPTXISD::Suld2DV4I32Zero:
4600     Opc = NVPTX::SULD_2D_V4I32_ZERO;
4601     Ops.push_back(TexHandle);
4602     Ops.push_back(N->getOperand(2));
4603     Ops.push_back(N->getOperand(3));
4604     Ops.push_back(Chain);
4605     break;
4606   case NVPTXISD::Suld2DArrayI8Zero:
4607     Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO;
4608     Ops.push_back(TexHandle);
4609     Ops.push_back(N->getOperand(2));
4610     Ops.push_back(N->getOperand(3));
4611     Ops.push_back(N->getOperand(4));
4612     Ops.push_back(Chain);
4613     break;
4614   case NVPTXISD::Suld2DArrayI16Zero:
4615     Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO;
4616     Ops.push_back(TexHandle);
4617     Ops.push_back(N->getOperand(2));
4618     Ops.push_back(N->getOperand(3));
4619     Ops.push_back(N->getOperand(4));
4620     Ops.push_back(Chain);
4621     break;
4622   case NVPTXISD::Suld2DArrayI32Zero:
4623     Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO;
4624     Ops.push_back(TexHandle);
4625     Ops.push_back(N->getOperand(2));
4626     Ops.push_back(N->getOperand(3));
4627     Ops.push_back(N->getOperand(4));
4628     Ops.push_back(Chain);
4629     break;
4630   case NVPTXISD::Suld2DArrayI64Zero:
4631     Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO;
4632     Ops.push_back(TexHandle);
4633     Ops.push_back(N->getOperand(2));
4634     Ops.push_back(N->getOperand(3));
4635     Ops.push_back(N->getOperand(4));
4636     Ops.push_back(Chain);
4637     break;
4638   case NVPTXISD::Suld2DArrayV2I8Zero:
4639     Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO;
4640     Ops.push_back(TexHandle);
4641     Ops.push_back(N->getOperand(2));
4642     Ops.push_back(N->getOperand(3));
4643     Ops.push_back(N->getOperand(4));
4644     Ops.push_back(Chain);
4645     break;
4646   case NVPTXISD::Suld2DArrayV2I16Zero:
4647     Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO;
4648     Ops.push_back(TexHandle);
4649     Ops.push_back(N->getOperand(2));
4650     Ops.push_back(N->getOperand(3));
4651     Ops.push_back(N->getOperand(4));
4652     Ops.push_back(Chain);
4653     break;
4654   case NVPTXISD::Suld2DArrayV2I32Zero:
4655     Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO;
4656     Ops.push_back(TexHandle);
4657     Ops.push_back(N->getOperand(2));
4658     Ops.push_back(N->getOperand(3));
4659     Ops.push_back(N->getOperand(4));
4660     Ops.push_back(Chain);
4661     break;
4662   case NVPTXISD::Suld2DArrayV2I64Zero:
4663     Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO;
4664     Ops.push_back(TexHandle);
4665     Ops.push_back(N->getOperand(2));
4666     Ops.push_back(N->getOperand(3));
4667     Ops.push_back(N->getOperand(4));
4668     Ops.push_back(Chain);
4669     break;
4670   case NVPTXISD::Suld2DArrayV4I8Zero:
4671     Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO;
4672     Ops.push_back(TexHandle);
4673     Ops.push_back(N->getOperand(2));
4674     Ops.push_back(N->getOperand(3));
4675     Ops.push_back(N->getOperand(4));
4676     Ops.push_back(Chain);
4677     break;
4678   case NVPTXISD::Suld2DArrayV4I16Zero:
4679     Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO;
4680     Ops.push_back(TexHandle);
4681     Ops.push_back(N->getOperand(2));
4682     Ops.push_back(N->getOperand(3));
4683     Ops.push_back(N->getOperand(4));
4684     Ops.push_back(Chain);
4685     break;
4686   case NVPTXISD::Suld2DArrayV4I32Zero:
4687     Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO;
4688     Ops.push_back(TexHandle);
4689     Ops.push_back(N->getOperand(2));
4690     Ops.push_back(N->getOperand(3));
4691     Ops.push_back(N->getOperand(4));
4692     Ops.push_back(Chain);
4693     break;
4694   case NVPTXISD::Suld3DI8Zero:
4695     Opc = NVPTX::SULD_3D_I8_ZERO;
4696     Ops.push_back(TexHandle);
4697     Ops.push_back(N->getOperand(2));
4698     Ops.push_back(N->getOperand(3));
4699     Ops.push_back(N->getOperand(4));
4700     Ops.push_back(Chain);
4701     break;
4702   case NVPTXISD::Suld3DI16Zero:
4703     Opc = NVPTX::SULD_3D_I16_ZERO;
4704     Ops.push_back(TexHandle);
4705     Ops.push_back(N->getOperand(2));
4706     Ops.push_back(N->getOperand(3));
4707     Ops.push_back(N->getOperand(4));
4708     Ops.push_back(Chain);
4709     break;
4710   case NVPTXISD::Suld3DI32Zero:
4711     Opc = NVPTX::SULD_3D_I32_ZERO;
4712     Ops.push_back(TexHandle);
4713     Ops.push_back(N->getOperand(2));
4714     Ops.push_back(N->getOperand(3));
4715     Ops.push_back(N->getOperand(4));
4716     Ops.push_back(Chain);
4717     break;
4718   case NVPTXISD::Suld3DI64Zero:
4719     Opc = NVPTX::SULD_3D_I64_ZERO;
4720     Ops.push_back(TexHandle);
4721     Ops.push_back(N->getOperand(2));
4722     Ops.push_back(N->getOperand(3));
4723     Ops.push_back(N->getOperand(4));
4724     Ops.push_back(Chain);
4725     break;
4726   case NVPTXISD::Suld3DV2I8Zero:
4727     Opc = NVPTX::SULD_3D_V2I8_ZERO;
4728     Ops.push_back(TexHandle);
4729     Ops.push_back(N->getOperand(2));
4730     Ops.push_back(N->getOperand(3));
4731     Ops.push_back(N->getOperand(4));
4732     Ops.push_back(Chain);
4733     break;
4734   case NVPTXISD::Suld3DV2I16Zero:
4735     Opc = NVPTX::SULD_3D_V2I16_ZERO;
4736     Ops.push_back(TexHandle);
4737     Ops.push_back(N->getOperand(2));
4738     Ops.push_back(N->getOperand(3));
4739     Ops.push_back(N->getOperand(4));
4740     Ops.push_back(Chain);
4741     break;
4742   case NVPTXISD::Suld3DV2I32Zero:
4743     Opc = NVPTX::SULD_3D_V2I32_ZERO;
4744     Ops.push_back(TexHandle);
4745     Ops.push_back(N->getOperand(2));
4746     Ops.push_back(N->getOperand(3));
4747     Ops.push_back(N->getOperand(4));
4748     Ops.push_back(Chain);
4749     break;
4750   case NVPTXISD::Suld3DV2I64Zero:
4751     Opc = NVPTX::SULD_3D_V2I64_ZERO;
4752     Ops.push_back(TexHandle);
4753     Ops.push_back(N->getOperand(2));
4754     Ops.push_back(N->getOperand(3));
4755     Ops.push_back(N->getOperand(4));
4756     Ops.push_back(Chain);
4757     break;
4758   case NVPTXISD::Suld3DV4I8Zero:
4759     Opc = NVPTX::SULD_3D_V4I8_ZERO;
4760     Ops.push_back(TexHandle);
4761     Ops.push_back(N->getOperand(2));
4762     Ops.push_back(N->getOperand(3));
4763     Ops.push_back(N->getOperand(4));
4764     Ops.push_back(Chain);
4765     break;
4766   case NVPTXISD::Suld3DV4I16Zero:
4767     Opc = NVPTX::SULD_3D_V4I16_ZERO;
4768     Ops.push_back(TexHandle);
4769     Ops.push_back(N->getOperand(2));
4770     Ops.push_back(N->getOperand(3));
4771     Ops.push_back(N->getOperand(4));
4772     Ops.push_back(Chain);
4773     break;
4774   case NVPTXISD::Suld3DV4I32Zero:
4775     Opc = NVPTX::SULD_3D_V4I32_ZERO;
4776     Ops.push_back(TexHandle);
4777     Ops.push_back(N->getOperand(2));
4778     Ops.push_back(N->getOperand(3));
4779     Ops.push_back(N->getOperand(4));
4780     Ops.push_back(Chain);
4781     break;
4782   }
4783   Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4784   return Ret;
4785 }
4786 
4787 
4788 /// SelectBFE - Look for instruction sequences that can be made more efficient
4789 /// by using the 'bfe' (bit-field extract) PTX instruction
SelectBFE(SDNode * N)4790 SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) {
4791   SDLoc DL(N);
4792   SDValue LHS = N->getOperand(0);
4793   SDValue RHS = N->getOperand(1);
4794   SDValue Len;
4795   SDValue Start;
4796   SDValue Val;
4797   bool IsSigned = false;
4798 
4799   if (N->getOpcode() == ISD::AND) {
4800     // Canonicalize the operands
4801     // We want 'and %val, %mask'
4802     if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
4803       std::swap(LHS, RHS);
4804     }
4805 
4806     ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS);
4807     if (!Mask) {
4808       // We need a constant mask on the RHS of the AND
4809       return NULL;
4810     }
4811 
4812     // Extract the mask bits
4813     uint64_t MaskVal = Mask->getZExtValue();
4814     if (!isMask_64(MaskVal)) {
4815       // We *could* handle shifted masks here, but doing so would require an
4816       // 'and' operation to fix up the low-order bits so we would trade
4817       // shr+and for bfe+and, which has the same throughput
4818       return NULL;
4819     }
4820 
4821     // How many bits are in our mask?
4822     uint64_t NumBits = countTrailingOnes(MaskVal);
4823     Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
4824 
4825     if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
4826       // We have a 'srl/and' pair, extract the effective start bit and length
4827       Val = LHS.getNode()->getOperand(0);
4828       Start = LHS.getNode()->getOperand(1);
4829       ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
4830       if (StartConst) {
4831         uint64_t StartVal = StartConst->getZExtValue();
4832         // How many "good" bits do we have left?  "good" is defined here as bits
4833         // that exist in the original value, not shifted in.
4834         uint64_t GoodBits = Start.getValueType().getSizeInBits() - StartVal;
4835         if (NumBits > GoodBits) {
4836           // Do not handle the case where bits have been shifted in. In theory
4837           // we could handle this, but the cost is likely higher than just
4838           // emitting the srl/and pair.
4839           return NULL;
4840         }
4841         Start = CurDAG->getTargetConstant(StartVal, DL, MVT::i32);
4842       } else {
4843         // Do not handle the case where the shift amount (can be zero if no srl
4844         // was found) is not constant. We could handle this case, but it would
4845         // require run-time logic that would be more expensive than just
4846         // emitting the srl/and pair.
4847         return NULL;
4848       }
4849     } else {
4850       // Do not handle the case where the LHS of the and is not a shift. While
4851       // it would be trivial to handle this case, it would just transform
4852       // 'and' -> 'bfe', but 'and' has higher-throughput.
4853       return NULL;
4854     }
4855   } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
4856     if (LHS->getOpcode() == ISD::AND) {
4857       ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
4858       if (!ShiftCnst) {
4859         // Shift amount must be constant
4860         return NULL;
4861       }
4862 
4863       uint64_t ShiftAmt = ShiftCnst->getZExtValue();
4864 
4865       SDValue AndLHS = LHS->getOperand(0);
4866       SDValue AndRHS = LHS->getOperand(1);
4867 
4868       // Canonicalize the AND to have the mask on the RHS
4869       if (isa<ConstantSDNode>(AndLHS)) {
4870         std::swap(AndLHS, AndRHS);
4871       }
4872 
4873       ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
4874       if (!MaskCnst) {
4875         // Mask must be constant
4876         return NULL;
4877       }
4878 
4879       uint64_t MaskVal = MaskCnst->getZExtValue();
4880       uint64_t NumZeros;
4881       uint64_t NumBits;
4882       if (isMask_64(MaskVal)) {
4883         NumZeros = 0;
4884         // The number of bits in the result bitfield will be the number of
4885         // trailing ones (the AND) minus the number of bits we shift off
4886         NumBits = countTrailingOnes(MaskVal) - ShiftAmt;
4887       } else if (isShiftedMask_64(MaskVal)) {
4888         NumZeros = countTrailingZeros(MaskVal);
4889         unsigned NumOnes = countTrailingOnes(MaskVal >> NumZeros);
4890         // The number of bits in the result bitfield will be the number of
4891         // trailing zeros plus the number of set bits in the mask minus the
4892         // number of bits we shift off
4893         NumBits = NumZeros + NumOnes - ShiftAmt;
4894       } else {
4895         // This is not a mask we can handle
4896         return NULL;
4897       }
4898 
4899       if (ShiftAmt < NumZeros) {
4900         // Handling this case would require extra logic that would make this
4901         // transformation non-profitable
4902         return NULL;
4903       }
4904 
4905       Val = AndLHS;
4906       Start = CurDAG->getTargetConstant(ShiftAmt, DL, MVT::i32);
4907       Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
4908     } else if (LHS->getOpcode() == ISD::SHL) {
4909       // Here, we have a pattern like:
4910       //
4911       // (sra (shl val, NN), MM)
4912       // or
4913       // (srl (shl val, NN), MM)
4914       //
4915       // If MM >= NN, we can efficiently optimize this with bfe
4916       Val = LHS->getOperand(0);
4917 
4918       SDValue ShlRHS = LHS->getOperand(1);
4919       ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
4920       if (!ShlCnst) {
4921         // Shift amount must be constant
4922         return NULL;
4923       }
4924       uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
4925 
4926       SDValue ShrRHS = RHS;
4927       ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
4928       if (!ShrCnst) {
4929         // Shift amount must be constant
4930         return NULL;
4931       }
4932       uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
4933 
4934       // To avoid extra codegen and be profitable, we need Outer >= Inner
4935       if (OuterShiftAmt < InnerShiftAmt) {
4936         return NULL;
4937       }
4938 
4939       // If the outer shift is more than the type size, we have no bitfield to
4940       // extract (since we also check that the inner shift is <= the outer shift
4941       // then this also implies that the inner shift is < the type size)
4942       if (OuterShiftAmt >= Val.getValueType().getSizeInBits()) {
4943         return NULL;
4944       }
4945 
4946       Start =
4947         CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, DL, MVT::i32);
4948       Len =
4949         CurDAG->getTargetConstant(Val.getValueType().getSizeInBits() -
4950                                   OuterShiftAmt, DL, MVT::i32);
4951 
4952       if (N->getOpcode() == ISD::SRA) {
4953         // If we have a arithmetic right shift, we need to use the signed bfe
4954         // variant
4955         IsSigned = true;
4956       }
4957     } else {
4958       // No can do...
4959       return NULL;
4960     }
4961   } else {
4962     // No can do...
4963     return NULL;
4964   }
4965 
4966 
4967   unsigned Opc;
4968   // For the BFE operations we form here from "and" and "srl", always use the
4969   // unsigned variants.
4970   if (Val.getValueType() == MVT::i32) {
4971     if (IsSigned) {
4972       Opc = NVPTX::BFE_S32rii;
4973     } else {
4974       Opc = NVPTX::BFE_U32rii;
4975     }
4976   } else if (Val.getValueType() == MVT::i64) {
4977     if (IsSigned) {
4978       Opc = NVPTX::BFE_S64rii;
4979     } else {
4980       Opc = NVPTX::BFE_U64rii;
4981     }
4982   } else {
4983     // We cannot handle this type
4984     return NULL;
4985   }
4986 
4987   SDValue Ops[] = {
4988     Val, Start, Len
4989   };
4990 
4991   return CurDAG->getMachineNode(Opc, DL, N->getVTList(), Ops);
4992 }
4993 
4994 // SelectDirectAddr - Match a direct address for DAG.
4995 // A direct address could be a globaladdress or externalsymbol.
SelectDirectAddr(SDValue N,SDValue & Address)4996 bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
4997   // Return true if TGA or ES.
4998   if (N.getOpcode() == ISD::TargetGlobalAddress ||
4999       N.getOpcode() == ISD::TargetExternalSymbol) {
5000     Address = N;
5001     return true;
5002   }
5003   if (N.getOpcode() == NVPTXISD::Wrapper) {
5004     Address = N.getOperand(0);
5005     return true;
5006   }
5007   if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
5008     unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
5009     if (IID == Intrinsic::nvvm_ptr_gen_to_param)
5010       if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam)
5011         return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
5012   }
5013   return false;
5014 }
5015 
5016 // symbol+offset
SelectADDRsi_imp(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset,MVT mvt)5017 bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
5018     SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
5019   if (Addr.getOpcode() == ISD::ADD) {
5020     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
5021       SDValue base = Addr.getOperand(0);
5022       if (SelectDirectAddr(base, Base)) {
5023         Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
5024                                            mvt);
5025         return true;
5026       }
5027     }
5028   }
5029   return false;
5030 }
5031 
5032 // symbol+offset
SelectADDRsi(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset)5033 bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
5034                                      SDValue &Base, SDValue &Offset) {
5035   return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
5036 }
5037 
5038 // symbol+offset
SelectADDRsi64(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset)5039 bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
5040                                        SDValue &Base, SDValue &Offset) {
5041   return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
5042 }
5043 
5044 // register+offset
SelectADDRri_imp(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset,MVT mvt)5045 bool NVPTXDAGToDAGISel::SelectADDRri_imp(
5046     SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
5047   if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
5048     Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
5049     Offset = CurDAG->getTargetConstant(0, SDLoc(OpNode), mvt);
5050     return true;
5051   }
5052   if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
5053       Addr.getOpcode() == ISD::TargetGlobalAddress)
5054     return false; // direct calls.
5055 
5056   if (Addr.getOpcode() == ISD::ADD) {
5057     if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
5058       return false;
5059     }
5060     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
5061       if (FrameIndexSDNode *FIN =
5062               dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
5063         // Constant offset from frame ref.
5064         Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
5065       else
5066         Base = Addr.getOperand(0);
5067       Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
5068                                          mvt);
5069       return true;
5070     }
5071   }
5072   return false;
5073 }
5074 
5075 // register+offset
SelectADDRri(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset)5076 bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
5077                                      SDValue &Base, SDValue &Offset) {
5078   return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
5079 }
5080 
5081 // register+offset
SelectADDRri64(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset)5082 bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
5083                                        SDValue &Base, SDValue &Offset) {
5084   return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
5085 }
5086 
ChkMemSDNodeAddressSpace(SDNode * N,unsigned int spN) const5087 bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
5088                                                  unsigned int spN) const {
5089   const Value *Src = nullptr;
5090   if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
5091     if (spN == 0 && mN->getMemOperand()->getPseudoValue())
5092       return true;
5093     Src = mN->getMemOperand()->getValue();
5094   }
5095   if (!Src)
5096     return false;
5097   if (auto *PT = dyn_cast<PointerType>(Src->getType()))
5098     return (PT->getAddressSpace() == spN);
5099   return false;
5100 }
5101 
5102 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
5103 /// inline asm expressions.
SelectInlineAsmMemoryOperand(const SDValue & Op,unsigned ConstraintID,std::vector<SDValue> & OutOps)5104 bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
5105     const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
5106   SDValue Op0, Op1;
5107   switch (ConstraintID) {
5108   default:
5109     return true;
5110   case InlineAsm::Constraint_m: // memory
5111     if (SelectDirectAddr(Op, Op0)) {
5112       OutOps.push_back(Op0);
5113       OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
5114       return false;
5115     }
5116     if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
5117       OutOps.push_back(Op0);
5118       OutOps.push_back(Op1);
5119       return false;
5120     }
5121     break;
5122   }
5123   return true;
5124 }
5125