1 //===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines an instruction selector for the NVPTX target.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "NVPTXISelDAGToDAG.h"
15 #include "llvm/IR/GlobalValue.h"
16 #include "llvm/IR/Instructions.h"
17 #include "llvm/Support/CommandLine.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/ErrorHandling.h"
20 #include "llvm/Support/raw_ostream.h"
21 #include "llvm/Target/TargetIntrinsicInfo.h"
22
23 using namespace llvm;
24
25 #define DEBUG_TYPE "nvptx-isel"
26
27 static cl::opt<int> UsePrecDivF32(
28 "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden,
29 cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
30 " IEEE Compliant F32 div.rnd if available."),
31 cl::init(2));
32
33 static cl::opt<bool>
34 UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::Hidden,
35 cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
36 cl::init(true));
37
38 static cl::opt<bool>
39 FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden,
40 cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."),
41 cl::init(false));
42
43
44 /// createNVPTXISelDag - This pass converts a legalized DAG into a
45 /// NVPTX-specific DAG, ready for instruction scheduling.
createNVPTXISelDag(NVPTXTargetMachine & TM,llvm::CodeGenOpt::Level OptLevel)46 FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
47 llvm::CodeGenOpt::Level OptLevel) {
48 return new NVPTXDAGToDAGISel(TM, OptLevel);
49 }
50
NVPTXDAGToDAGISel(NVPTXTargetMachine & tm,CodeGenOpt::Level OptLevel)51 NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
52 CodeGenOpt::Level OptLevel)
53 : SelectionDAGISel(tm, OptLevel), TM(tm) {
54 doMulWide = (OptLevel > 0);
55 }
56
runOnMachineFunction(MachineFunction & MF)57 bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
58 Subtarget = &static_cast<const NVPTXSubtarget &>(MF.getSubtarget());
59 return SelectionDAGISel::runOnMachineFunction(MF);
60 }
61
getDivF32Level() const62 int NVPTXDAGToDAGISel::getDivF32Level() const {
63 if (UsePrecDivF32.getNumOccurrences() > 0) {
64 // If nvptx-prec-div32=N is used on the command-line, always honor it
65 return UsePrecDivF32;
66 } else {
67 // Otherwise, use div.approx if fast math is enabled
68 if (TM.Options.UnsafeFPMath)
69 return 0;
70 else
71 return 2;
72 }
73 }
74
usePrecSqrtF32() const75 bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
76 if (UsePrecSqrtF32.getNumOccurrences() > 0) {
77 // If nvptx-prec-sqrtf32 is used on the command-line, always honor it
78 return UsePrecSqrtF32;
79 } else {
80 // Otherwise, use sqrt.approx if fast math is enabled
81 return !TM.Options.UnsafeFPMath;
82 }
83 }
84
useF32FTZ() const85 bool NVPTXDAGToDAGISel::useF32FTZ() const {
86 if (FtzEnabled.getNumOccurrences() > 0) {
87 // If nvptx-f32ftz is used on the command-line, always honor it
88 return FtzEnabled;
89 } else {
90 const Function *F = MF->getFunction();
91 // Otherwise, check for an nvptx-f32ftz attribute on the function
92 if (F->hasFnAttribute("nvptx-f32ftz"))
93 return F->getFnAttribute("nvptx-f32ftz").getValueAsString() == "true";
94 else
95 return false;
96 }
97 }
98
allowFMA() const99 bool NVPTXDAGToDAGISel::allowFMA() const {
100 const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
101 return TL->allowFMA(*MF, OptLevel);
102 }
103
104 /// Select - Select instructions not customized! Used for
105 /// expanded, promoted and normal instructions.
Select(SDNode * N)106 SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
107
108 if (N->isMachineOpcode()) {
109 N->setNodeId(-1);
110 return nullptr; // Already selected.
111 }
112
113 SDNode *ResNode = nullptr;
114 switch (N->getOpcode()) {
115 case ISD::LOAD:
116 ResNode = SelectLoad(N);
117 break;
118 case ISD::STORE:
119 ResNode = SelectStore(N);
120 break;
121 case NVPTXISD::LoadV2:
122 case NVPTXISD::LoadV4:
123 ResNode = SelectLoadVector(N);
124 break;
125 case NVPTXISD::LDGV2:
126 case NVPTXISD::LDGV4:
127 case NVPTXISD::LDUV2:
128 case NVPTXISD::LDUV4:
129 ResNode = SelectLDGLDU(N);
130 break;
131 case NVPTXISD::StoreV2:
132 case NVPTXISD::StoreV4:
133 ResNode = SelectStoreVector(N);
134 break;
135 case NVPTXISD::LoadParam:
136 case NVPTXISD::LoadParamV2:
137 case NVPTXISD::LoadParamV4:
138 ResNode = SelectLoadParam(N);
139 break;
140 case NVPTXISD::StoreRetval:
141 case NVPTXISD::StoreRetvalV2:
142 case NVPTXISD::StoreRetvalV4:
143 ResNode = SelectStoreRetval(N);
144 break;
145 case NVPTXISD::StoreParam:
146 case NVPTXISD::StoreParamV2:
147 case NVPTXISD::StoreParamV4:
148 case NVPTXISD::StoreParamS32:
149 case NVPTXISD::StoreParamU32:
150 ResNode = SelectStoreParam(N);
151 break;
152 case ISD::INTRINSIC_WO_CHAIN:
153 ResNode = SelectIntrinsicNoChain(N);
154 break;
155 case ISD::INTRINSIC_W_CHAIN:
156 ResNode = SelectIntrinsicChain(N);
157 break;
158 case NVPTXISD::Tex1DFloatS32:
159 case NVPTXISD::Tex1DFloatFloat:
160 case NVPTXISD::Tex1DFloatFloatLevel:
161 case NVPTXISD::Tex1DFloatFloatGrad:
162 case NVPTXISD::Tex1DS32S32:
163 case NVPTXISD::Tex1DS32Float:
164 case NVPTXISD::Tex1DS32FloatLevel:
165 case NVPTXISD::Tex1DS32FloatGrad:
166 case NVPTXISD::Tex1DU32S32:
167 case NVPTXISD::Tex1DU32Float:
168 case NVPTXISD::Tex1DU32FloatLevel:
169 case NVPTXISD::Tex1DU32FloatGrad:
170 case NVPTXISD::Tex1DArrayFloatS32:
171 case NVPTXISD::Tex1DArrayFloatFloat:
172 case NVPTXISD::Tex1DArrayFloatFloatLevel:
173 case NVPTXISD::Tex1DArrayFloatFloatGrad:
174 case NVPTXISD::Tex1DArrayS32S32:
175 case NVPTXISD::Tex1DArrayS32Float:
176 case NVPTXISD::Tex1DArrayS32FloatLevel:
177 case NVPTXISD::Tex1DArrayS32FloatGrad:
178 case NVPTXISD::Tex1DArrayU32S32:
179 case NVPTXISD::Tex1DArrayU32Float:
180 case NVPTXISD::Tex1DArrayU32FloatLevel:
181 case NVPTXISD::Tex1DArrayU32FloatGrad:
182 case NVPTXISD::Tex2DFloatS32:
183 case NVPTXISD::Tex2DFloatFloat:
184 case NVPTXISD::Tex2DFloatFloatLevel:
185 case NVPTXISD::Tex2DFloatFloatGrad:
186 case NVPTXISD::Tex2DS32S32:
187 case NVPTXISD::Tex2DS32Float:
188 case NVPTXISD::Tex2DS32FloatLevel:
189 case NVPTXISD::Tex2DS32FloatGrad:
190 case NVPTXISD::Tex2DU32S32:
191 case NVPTXISD::Tex2DU32Float:
192 case NVPTXISD::Tex2DU32FloatLevel:
193 case NVPTXISD::Tex2DU32FloatGrad:
194 case NVPTXISD::Tex2DArrayFloatS32:
195 case NVPTXISD::Tex2DArrayFloatFloat:
196 case NVPTXISD::Tex2DArrayFloatFloatLevel:
197 case NVPTXISD::Tex2DArrayFloatFloatGrad:
198 case NVPTXISD::Tex2DArrayS32S32:
199 case NVPTXISD::Tex2DArrayS32Float:
200 case NVPTXISD::Tex2DArrayS32FloatLevel:
201 case NVPTXISD::Tex2DArrayS32FloatGrad:
202 case NVPTXISD::Tex2DArrayU32S32:
203 case NVPTXISD::Tex2DArrayU32Float:
204 case NVPTXISD::Tex2DArrayU32FloatLevel:
205 case NVPTXISD::Tex2DArrayU32FloatGrad:
206 case NVPTXISD::Tex3DFloatS32:
207 case NVPTXISD::Tex3DFloatFloat:
208 case NVPTXISD::Tex3DFloatFloatLevel:
209 case NVPTXISD::Tex3DFloatFloatGrad:
210 case NVPTXISD::Tex3DS32S32:
211 case NVPTXISD::Tex3DS32Float:
212 case NVPTXISD::Tex3DS32FloatLevel:
213 case NVPTXISD::Tex3DS32FloatGrad:
214 case NVPTXISD::Tex3DU32S32:
215 case NVPTXISD::Tex3DU32Float:
216 case NVPTXISD::Tex3DU32FloatLevel:
217 case NVPTXISD::Tex3DU32FloatGrad:
218 case NVPTXISD::TexCubeFloatFloat:
219 case NVPTXISD::TexCubeFloatFloatLevel:
220 case NVPTXISD::TexCubeS32Float:
221 case NVPTXISD::TexCubeS32FloatLevel:
222 case NVPTXISD::TexCubeU32Float:
223 case NVPTXISD::TexCubeU32FloatLevel:
224 case NVPTXISD::TexCubeArrayFloatFloat:
225 case NVPTXISD::TexCubeArrayFloatFloatLevel:
226 case NVPTXISD::TexCubeArrayS32Float:
227 case NVPTXISD::TexCubeArrayS32FloatLevel:
228 case NVPTXISD::TexCubeArrayU32Float:
229 case NVPTXISD::TexCubeArrayU32FloatLevel:
230 case NVPTXISD::Tld4R2DFloatFloat:
231 case NVPTXISD::Tld4G2DFloatFloat:
232 case NVPTXISD::Tld4B2DFloatFloat:
233 case NVPTXISD::Tld4A2DFloatFloat:
234 case NVPTXISD::Tld4R2DS64Float:
235 case NVPTXISD::Tld4G2DS64Float:
236 case NVPTXISD::Tld4B2DS64Float:
237 case NVPTXISD::Tld4A2DS64Float:
238 case NVPTXISD::Tld4R2DU64Float:
239 case NVPTXISD::Tld4G2DU64Float:
240 case NVPTXISD::Tld4B2DU64Float:
241 case NVPTXISD::Tld4A2DU64Float:
242 case NVPTXISD::TexUnified1DFloatS32:
243 case NVPTXISD::TexUnified1DFloatFloat:
244 case NVPTXISD::TexUnified1DFloatFloatLevel:
245 case NVPTXISD::TexUnified1DFloatFloatGrad:
246 case NVPTXISD::TexUnified1DS32S32:
247 case NVPTXISD::TexUnified1DS32Float:
248 case NVPTXISD::TexUnified1DS32FloatLevel:
249 case NVPTXISD::TexUnified1DS32FloatGrad:
250 case NVPTXISD::TexUnified1DU32S32:
251 case NVPTXISD::TexUnified1DU32Float:
252 case NVPTXISD::TexUnified1DU32FloatLevel:
253 case NVPTXISD::TexUnified1DU32FloatGrad:
254 case NVPTXISD::TexUnified1DArrayFloatS32:
255 case NVPTXISD::TexUnified1DArrayFloatFloat:
256 case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
257 case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
258 case NVPTXISD::TexUnified1DArrayS32S32:
259 case NVPTXISD::TexUnified1DArrayS32Float:
260 case NVPTXISD::TexUnified1DArrayS32FloatLevel:
261 case NVPTXISD::TexUnified1DArrayS32FloatGrad:
262 case NVPTXISD::TexUnified1DArrayU32S32:
263 case NVPTXISD::TexUnified1DArrayU32Float:
264 case NVPTXISD::TexUnified1DArrayU32FloatLevel:
265 case NVPTXISD::TexUnified1DArrayU32FloatGrad:
266 case NVPTXISD::TexUnified2DFloatS32:
267 case NVPTXISD::TexUnified2DFloatFloat:
268 case NVPTXISD::TexUnified2DFloatFloatLevel:
269 case NVPTXISD::TexUnified2DFloatFloatGrad:
270 case NVPTXISD::TexUnified2DS32S32:
271 case NVPTXISD::TexUnified2DS32Float:
272 case NVPTXISD::TexUnified2DS32FloatLevel:
273 case NVPTXISD::TexUnified2DS32FloatGrad:
274 case NVPTXISD::TexUnified2DU32S32:
275 case NVPTXISD::TexUnified2DU32Float:
276 case NVPTXISD::TexUnified2DU32FloatLevel:
277 case NVPTXISD::TexUnified2DU32FloatGrad:
278 case NVPTXISD::TexUnified2DArrayFloatS32:
279 case NVPTXISD::TexUnified2DArrayFloatFloat:
280 case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
281 case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
282 case NVPTXISD::TexUnified2DArrayS32S32:
283 case NVPTXISD::TexUnified2DArrayS32Float:
284 case NVPTXISD::TexUnified2DArrayS32FloatLevel:
285 case NVPTXISD::TexUnified2DArrayS32FloatGrad:
286 case NVPTXISD::TexUnified2DArrayU32S32:
287 case NVPTXISD::TexUnified2DArrayU32Float:
288 case NVPTXISD::TexUnified2DArrayU32FloatLevel:
289 case NVPTXISD::TexUnified2DArrayU32FloatGrad:
290 case NVPTXISD::TexUnified3DFloatS32:
291 case NVPTXISD::TexUnified3DFloatFloat:
292 case NVPTXISD::TexUnified3DFloatFloatLevel:
293 case NVPTXISD::TexUnified3DFloatFloatGrad:
294 case NVPTXISD::TexUnified3DS32S32:
295 case NVPTXISD::TexUnified3DS32Float:
296 case NVPTXISD::TexUnified3DS32FloatLevel:
297 case NVPTXISD::TexUnified3DS32FloatGrad:
298 case NVPTXISD::TexUnified3DU32S32:
299 case NVPTXISD::TexUnified3DU32Float:
300 case NVPTXISD::TexUnified3DU32FloatLevel:
301 case NVPTXISD::TexUnified3DU32FloatGrad:
302 case NVPTXISD::TexUnifiedCubeFloatFloat:
303 case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
304 case NVPTXISD::TexUnifiedCubeS32Float:
305 case NVPTXISD::TexUnifiedCubeS32FloatLevel:
306 case NVPTXISD::TexUnifiedCubeU32Float:
307 case NVPTXISD::TexUnifiedCubeU32FloatLevel:
308 case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
309 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
310 case NVPTXISD::TexUnifiedCubeArrayS32Float:
311 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
312 case NVPTXISD::TexUnifiedCubeArrayU32Float:
313 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
314 case NVPTXISD::Tld4UnifiedR2DFloatFloat:
315 case NVPTXISD::Tld4UnifiedG2DFloatFloat:
316 case NVPTXISD::Tld4UnifiedB2DFloatFloat:
317 case NVPTXISD::Tld4UnifiedA2DFloatFloat:
318 case NVPTXISD::Tld4UnifiedR2DS64Float:
319 case NVPTXISD::Tld4UnifiedG2DS64Float:
320 case NVPTXISD::Tld4UnifiedB2DS64Float:
321 case NVPTXISD::Tld4UnifiedA2DS64Float:
322 case NVPTXISD::Tld4UnifiedR2DU64Float:
323 case NVPTXISD::Tld4UnifiedG2DU64Float:
324 case NVPTXISD::Tld4UnifiedB2DU64Float:
325 case NVPTXISD::Tld4UnifiedA2DU64Float:
326 ResNode = SelectTextureIntrinsic(N);
327 break;
328 case NVPTXISD::Suld1DI8Clamp:
329 case NVPTXISD::Suld1DI16Clamp:
330 case NVPTXISD::Suld1DI32Clamp:
331 case NVPTXISD::Suld1DI64Clamp:
332 case NVPTXISD::Suld1DV2I8Clamp:
333 case NVPTXISD::Suld1DV2I16Clamp:
334 case NVPTXISD::Suld1DV2I32Clamp:
335 case NVPTXISD::Suld1DV2I64Clamp:
336 case NVPTXISD::Suld1DV4I8Clamp:
337 case NVPTXISD::Suld1DV4I16Clamp:
338 case NVPTXISD::Suld1DV4I32Clamp:
339 case NVPTXISD::Suld1DArrayI8Clamp:
340 case NVPTXISD::Suld1DArrayI16Clamp:
341 case NVPTXISD::Suld1DArrayI32Clamp:
342 case NVPTXISD::Suld1DArrayI64Clamp:
343 case NVPTXISD::Suld1DArrayV2I8Clamp:
344 case NVPTXISD::Suld1DArrayV2I16Clamp:
345 case NVPTXISD::Suld1DArrayV2I32Clamp:
346 case NVPTXISD::Suld1DArrayV2I64Clamp:
347 case NVPTXISD::Suld1DArrayV4I8Clamp:
348 case NVPTXISD::Suld1DArrayV4I16Clamp:
349 case NVPTXISD::Suld1DArrayV4I32Clamp:
350 case NVPTXISD::Suld2DI8Clamp:
351 case NVPTXISD::Suld2DI16Clamp:
352 case NVPTXISD::Suld2DI32Clamp:
353 case NVPTXISD::Suld2DI64Clamp:
354 case NVPTXISD::Suld2DV2I8Clamp:
355 case NVPTXISD::Suld2DV2I16Clamp:
356 case NVPTXISD::Suld2DV2I32Clamp:
357 case NVPTXISD::Suld2DV2I64Clamp:
358 case NVPTXISD::Suld2DV4I8Clamp:
359 case NVPTXISD::Suld2DV4I16Clamp:
360 case NVPTXISD::Suld2DV4I32Clamp:
361 case NVPTXISD::Suld2DArrayI8Clamp:
362 case NVPTXISD::Suld2DArrayI16Clamp:
363 case NVPTXISD::Suld2DArrayI32Clamp:
364 case NVPTXISD::Suld2DArrayI64Clamp:
365 case NVPTXISD::Suld2DArrayV2I8Clamp:
366 case NVPTXISD::Suld2DArrayV2I16Clamp:
367 case NVPTXISD::Suld2DArrayV2I32Clamp:
368 case NVPTXISD::Suld2DArrayV2I64Clamp:
369 case NVPTXISD::Suld2DArrayV4I8Clamp:
370 case NVPTXISD::Suld2DArrayV4I16Clamp:
371 case NVPTXISD::Suld2DArrayV4I32Clamp:
372 case NVPTXISD::Suld3DI8Clamp:
373 case NVPTXISD::Suld3DI16Clamp:
374 case NVPTXISD::Suld3DI32Clamp:
375 case NVPTXISD::Suld3DI64Clamp:
376 case NVPTXISD::Suld3DV2I8Clamp:
377 case NVPTXISD::Suld3DV2I16Clamp:
378 case NVPTXISD::Suld3DV2I32Clamp:
379 case NVPTXISD::Suld3DV2I64Clamp:
380 case NVPTXISD::Suld3DV4I8Clamp:
381 case NVPTXISD::Suld3DV4I16Clamp:
382 case NVPTXISD::Suld3DV4I32Clamp:
383 case NVPTXISD::Suld1DI8Trap:
384 case NVPTXISD::Suld1DI16Trap:
385 case NVPTXISD::Suld1DI32Trap:
386 case NVPTXISD::Suld1DI64Trap:
387 case NVPTXISD::Suld1DV2I8Trap:
388 case NVPTXISD::Suld1DV2I16Trap:
389 case NVPTXISD::Suld1DV2I32Trap:
390 case NVPTXISD::Suld1DV2I64Trap:
391 case NVPTXISD::Suld1DV4I8Trap:
392 case NVPTXISD::Suld1DV4I16Trap:
393 case NVPTXISD::Suld1DV4I32Trap:
394 case NVPTXISD::Suld1DArrayI8Trap:
395 case NVPTXISD::Suld1DArrayI16Trap:
396 case NVPTXISD::Suld1DArrayI32Trap:
397 case NVPTXISD::Suld1DArrayI64Trap:
398 case NVPTXISD::Suld1DArrayV2I8Trap:
399 case NVPTXISD::Suld1DArrayV2I16Trap:
400 case NVPTXISD::Suld1DArrayV2I32Trap:
401 case NVPTXISD::Suld1DArrayV2I64Trap:
402 case NVPTXISD::Suld1DArrayV4I8Trap:
403 case NVPTXISD::Suld1DArrayV4I16Trap:
404 case NVPTXISD::Suld1DArrayV4I32Trap:
405 case NVPTXISD::Suld2DI8Trap:
406 case NVPTXISD::Suld2DI16Trap:
407 case NVPTXISD::Suld2DI32Trap:
408 case NVPTXISD::Suld2DI64Trap:
409 case NVPTXISD::Suld2DV2I8Trap:
410 case NVPTXISD::Suld2DV2I16Trap:
411 case NVPTXISD::Suld2DV2I32Trap:
412 case NVPTXISD::Suld2DV2I64Trap:
413 case NVPTXISD::Suld2DV4I8Trap:
414 case NVPTXISD::Suld2DV4I16Trap:
415 case NVPTXISD::Suld2DV4I32Trap:
416 case NVPTXISD::Suld2DArrayI8Trap:
417 case NVPTXISD::Suld2DArrayI16Trap:
418 case NVPTXISD::Suld2DArrayI32Trap:
419 case NVPTXISD::Suld2DArrayI64Trap:
420 case NVPTXISD::Suld2DArrayV2I8Trap:
421 case NVPTXISD::Suld2DArrayV2I16Trap:
422 case NVPTXISD::Suld2DArrayV2I32Trap:
423 case NVPTXISD::Suld2DArrayV2I64Trap:
424 case NVPTXISD::Suld2DArrayV4I8Trap:
425 case NVPTXISD::Suld2DArrayV4I16Trap:
426 case NVPTXISD::Suld2DArrayV4I32Trap:
427 case NVPTXISD::Suld3DI8Trap:
428 case NVPTXISD::Suld3DI16Trap:
429 case NVPTXISD::Suld3DI32Trap:
430 case NVPTXISD::Suld3DI64Trap:
431 case NVPTXISD::Suld3DV2I8Trap:
432 case NVPTXISD::Suld3DV2I16Trap:
433 case NVPTXISD::Suld3DV2I32Trap:
434 case NVPTXISD::Suld3DV2I64Trap:
435 case NVPTXISD::Suld3DV4I8Trap:
436 case NVPTXISD::Suld3DV4I16Trap:
437 case NVPTXISD::Suld3DV4I32Trap:
438 case NVPTXISD::Suld1DI8Zero:
439 case NVPTXISD::Suld1DI16Zero:
440 case NVPTXISD::Suld1DI32Zero:
441 case NVPTXISD::Suld1DI64Zero:
442 case NVPTXISD::Suld1DV2I8Zero:
443 case NVPTXISD::Suld1DV2I16Zero:
444 case NVPTXISD::Suld1DV2I32Zero:
445 case NVPTXISD::Suld1DV2I64Zero:
446 case NVPTXISD::Suld1DV4I8Zero:
447 case NVPTXISD::Suld1DV4I16Zero:
448 case NVPTXISD::Suld1DV4I32Zero:
449 case NVPTXISD::Suld1DArrayI8Zero:
450 case NVPTXISD::Suld1DArrayI16Zero:
451 case NVPTXISD::Suld1DArrayI32Zero:
452 case NVPTXISD::Suld1DArrayI64Zero:
453 case NVPTXISD::Suld1DArrayV2I8Zero:
454 case NVPTXISD::Suld1DArrayV2I16Zero:
455 case NVPTXISD::Suld1DArrayV2I32Zero:
456 case NVPTXISD::Suld1DArrayV2I64Zero:
457 case NVPTXISD::Suld1DArrayV4I8Zero:
458 case NVPTXISD::Suld1DArrayV4I16Zero:
459 case NVPTXISD::Suld1DArrayV4I32Zero:
460 case NVPTXISD::Suld2DI8Zero:
461 case NVPTXISD::Suld2DI16Zero:
462 case NVPTXISD::Suld2DI32Zero:
463 case NVPTXISD::Suld2DI64Zero:
464 case NVPTXISD::Suld2DV2I8Zero:
465 case NVPTXISD::Suld2DV2I16Zero:
466 case NVPTXISD::Suld2DV2I32Zero:
467 case NVPTXISD::Suld2DV2I64Zero:
468 case NVPTXISD::Suld2DV4I8Zero:
469 case NVPTXISD::Suld2DV4I16Zero:
470 case NVPTXISD::Suld2DV4I32Zero:
471 case NVPTXISD::Suld2DArrayI8Zero:
472 case NVPTXISD::Suld2DArrayI16Zero:
473 case NVPTXISD::Suld2DArrayI32Zero:
474 case NVPTXISD::Suld2DArrayI64Zero:
475 case NVPTXISD::Suld2DArrayV2I8Zero:
476 case NVPTXISD::Suld2DArrayV2I16Zero:
477 case NVPTXISD::Suld2DArrayV2I32Zero:
478 case NVPTXISD::Suld2DArrayV2I64Zero:
479 case NVPTXISD::Suld2DArrayV4I8Zero:
480 case NVPTXISD::Suld2DArrayV4I16Zero:
481 case NVPTXISD::Suld2DArrayV4I32Zero:
482 case NVPTXISD::Suld3DI8Zero:
483 case NVPTXISD::Suld3DI16Zero:
484 case NVPTXISD::Suld3DI32Zero:
485 case NVPTXISD::Suld3DI64Zero:
486 case NVPTXISD::Suld3DV2I8Zero:
487 case NVPTXISD::Suld3DV2I16Zero:
488 case NVPTXISD::Suld3DV2I32Zero:
489 case NVPTXISD::Suld3DV2I64Zero:
490 case NVPTXISD::Suld3DV4I8Zero:
491 case NVPTXISD::Suld3DV4I16Zero:
492 case NVPTXISD::Suld3DV4I32Zero:
493 ResNode = SelectSurfaceIntrinsic(N);
494 break;
495 case ISD::AND:
496 case ISD::SRA:
497 case ISD::SRL:
498 // Try to select BFE
499 ResNode = SelectBFE(N);
500 break;
501 case ISD::ADDRSPACECAST:
502 ResNode = SelectAddrSpaceCast(N);
503 break;
504 default:
505 break;
506 }
507 if (ResNode)
508 return ResNode;
509 return SelectCode(N);
510 }
511
SelectIntrinsicChain(SDNode * N)512 SDNode *NVPTXDAGToDAGISel::SelectIntrinsicChain(SDNode *N) {
513 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
514 switch (IID) {
515 default:
516 return NULL;
517 case Intrinsic::nvvm_ldg_global_f:
518 case Intrinsic::nvvm_ldg_global_i:
519 case Intrinsic::nvvm_ldg_global_p:
520 case Intrinsic::nvvm_ldu_global_f:
521 case Intrinsic::nvvm_ldu_global_i:
522 case Intrinsic::nvvm_ldu_global_p:
523 return SelectLDGLDU(N);
524 }
525 }
526
getCodeAddrSpace(MemSDNode * N)527 static unsigned int getCodeAddrSpace(MemSDNode *N) {
528 const Value *Src = N->getMemOperand()->getValue();
529
530 if (!Src)
531 return NVPTX::PTXLdStInstCode::GENERIC;
532
533 if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) {
534 switch (PT->getAddressSpace()) {
535 case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
536 case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
537 case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
538 case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
539 case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
540 case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT;
541 default: break;
542 }
543 }
544 return NVPTX::PTXLdStInstCode::GENERIC;
545 }
546
SelectIntrinsicNoChain(SDNode * N)547 SDNode *NVPTXDAGToDAGISel::SelectIntrinsicNoChain(SDNode *N) {
548 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
549 switch (IID) {
550 default:
551 return nullptr;
552 case Intrinsic::nvvm_texsurf_handle_internal:
553 return SelectTexSurfHandle(N);
554 }
555 }
556
SelectTexSurfHandle(SDNode * N)557 SDNode *NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
558 // Op 0 is the intrinsic ID
559 SDValue Wrapper = N->getOperand(1);
560 SDValue GlobalVal = Wrapper.getOperand(0);
561 return CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N), MVT::i64,
562 GlobalVal);
563 }
564
SelectAddrSpaceCast(SDNode * N)565 SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
566 SDValue Src = N->getOperand(0);
567 AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
568 unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
569 unsigned DstAddrSpace = CastN->getDestAddressSpace();
570
571 assert(SrcAddrSpace != DstAddrSpace &&
572 "addrspacecast must be between different address spaces");
573
574 if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
575 // Specific to generic
576 unsigned Opc;
577 switch (SrcAddrSpace) {
578 default: report_fatal_error("Bad address space in addrspacecast");
579 case ADDRESS_SPACE_GLOBAL:
580 Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes;
581 break;
582 case ADDRESS_SPACE_SHARED:
583 Opc = TM.is64Bit() ? NVPTX::cvta_shared_yes_64 : NVPTX::cvta_shared_yes;
584 break;
585 case ADDRESS_SPACE_CONST:
586 Opc = TM.is64Bit() ? NVPTX::cvta_const_yes_64 : NVPTX::cvta_const_yes;
587 break;
588 case ADDRESS_SPACE_LOCAL:
589 Opc = TM.is64Bit() ? NVPTX::cvta_local_yes_64 : NVPTX::cvta_local_yes;
590 break;
591 }
592 return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
593 } else {
594 // Generic to specific
595 if (SrcAddrSpace != 0)
596 report_fatal_error("Cannot cast between two non-generic address spaces");
597 unsigned Opc;
598 switch (DstAddrSpace) {
599 default: report_fatal_error("Bad address space in addrspacecast");
600 case ADDRESS_SPACE_GLOBAL:
601 Opc = TM.is64Bit() ? NVPTX::cvta_to_global_yes_64
602 : NVPTX::cvta_to_global_yes;
603 break;
604 case ADDRESS_SPACE_SHARED:
605 Opc = TM.is64Bit() ? NVPTX::cvta_to_shared_yes_64
606 : NVPTX::cvta_to_shared_yes;
607 break;
608 case ADDRESS_SPACE_CONST:
609 Opc =
610 TM.is64Bit() ? NVPTX::cvta_to_const_yes_64 : NVPTX::cvta_to_const_yes;
611 break;
612 case ADDRESS_SPACE_LOCAL:
613 Opc =
614 TM.is64Bit() ? NVPTX::cvta_to_local_yes_64 : NVPTX::cvta_to_local_yes;
615 break;
616 }
617 return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
618 }
619 }
620
SelectLoad(SDNode * N)621 SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
622 SDLoc dl(N);
623 LoadSDNode *LD = cast<LoadSDNode>(N);
624 EVT LoadedVT = LD->getMemoryVT();
625 SDNode *NVPTXLD = nullptr;
626
627 // do not support pre/post inc/dec
628 if (LD->isIndexed())
629 return nullptr;
630
631 if (!LoadedVT.isSimple())
632 return nullptr;
633
634 // Address Space Setting
635 unsigned int codeAddrSpace = getCodeAddrSpace(LD);
636
637 // Volatile Setting
638 // - .volatile is only availalble for .global and .shared
639 bool isVolatile = LD->isVolatile();
640 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
641 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
642 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
643 isVolatile = false;
644
645 // Vector Setting
646 MVT SimpleVT = LoadedVT.getSimpleVT();
647 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
648 if (SimpleVT.isVector()) {
649 unsigned num = SimpleVT.getVectorNumElements();
650 if (num == 2)
651 vecType = NVPTX::PTXLdStInstCode::V2;
652 else if (num == 4)
653 vecType = NVPTX::PTXLdStInstCode::V4;
654 else
655 return nullptr;
656 }
657
658 // Type Setting: fromType + fromTypeWidth
659 //
660 // Sign : ISD::SEXTLOAD
661 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
662 // type is integer
663 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
664 MVT ScalarVT = SimpleVT.getScalarType();
665 // Read at least 8 bits (predicates are stored as 8-bit values)
666 unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
667 unsigned int fromType;
668 if ((LD->getExtensionType() == ISD::SEXTLOAD))
669 fromType = NVPTX::PTXLdStInstCode::Signed;
670 else if (ScalarVT.isFloatingPoint())
671 fromType = NVPTX::PTXLdStInstCode::Float;
672 else
673 fromType = NVPTX::PTXLdStInstCode::Unsigned;
674
675 // Create the machine instruction DAG
676 SDValue Chain = N->getOperand(0);
677 SDValue N1 = N->getOperand(1);
678 SDValue Addr;
679 SDValue Offset, Base;
680 unsigned Opcode;
681 MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
682
683 if (SelectDirectAddr(N1, Addr)) {
684 switch (TargetVT) {
685 case MVT::i8:
686 Opcode = NVPTX::LD_i8_avar;
687 break;
688 case MVT::i16:
689 Opcode = NVPTX::LD_i16_avar;
690 break;
691 case MVT::i32:
692 Opcode = NVPTX::LD_i32_avar;
693 break;
694 case MVT::i64:
695 Opcode = NVPTX::LD_i64_avar;
696 break;
697 case MVT::f32:
698 Opcode = NVPTX::LD_f32_avar;
699 break;
700 case MVT::f64:
701 Opcode = NVPTX::LD_f64_avar;
702 break;
703 default:
704 return nullptr;
705 }
706 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
707 getI32Imm(vecType), getI32Imm(fromType),
708 getI32Imm(fromTypeWidth), Addr, Chain };
709 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
710 } else if (TM.is64Bit() ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
711 : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
712 switch (TargetVT) {
713 case MVT::i8:
714 Opcode = NVPTX::LD_i8_asi;
715 break;
716 case MVT::i16:
717 Opcode = NVPTX::LD_i16_asi;
718 break;
719 case MVT::i32:
720 Opcode = NVPTX::LD_i32_asi;
721 break;
722 case MVT::i64:
723 Opcode = NVPTX::LD_i64_asi;
724 break;
725 case MVT::f32:
726 Opcode = NVPTX::LD_f32_asi;
727 break;
728 case MVT::f64:
729 Opcode = NVPTX::LD_f64_asi;
730 break;
731 default:
732 return nullptr;
733 }
734 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
735 getI32Imm(vecType), getI32Imm(fromType),
736 getI32Imm(fromTypeWidth), Base, Offset, Chain };
737 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
738 } else if (TM.is64Bit() ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
739 : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
740 if (TM.is64Bit()) {
741 switch (TargetVT) {
742 case MVT::i8:
743 Opcode = NVPTX::LD_i8_ari_64;
744 break;
745 case MVT::i16:
746 Opcode = NVPTX::LD_i16_ari_64;
747 break;
748 case MVT::i32:
749 Opcode = NVPTX::LD_i32_ari_64;
750 break;
751 case MVT::i64:
752 Opcode = NVPTX::LD_i64_ari_64;
753 break;
754 case MVT::f32:
755 Opcode = NVPTX::LD_f32_ari_64;
756 break;
757 case MVT::f64:
758 Opcode = NVPTX::LD_f64_ari_64;
759 break;
760 default:
761 return nullptr;
762 }
763 } else {
764 switch (TargetVT) {
765 case MVT::i8:
766 Opcode = NVPTX::LD_i8_ari;
767 break;
768 case MVT::i16:
769 Opcode = NVPTX::LD_i16_ari;
770 break;
771 case MVT::i32:
772 Opcode = NVPTX::LD_i32_ari;
773 break;
774 case MVT::i64:
775 Opcode = NVPTX::LD_i64_ari;
776 break;
777 case MVT::f32:
778 Opcode = NVPTX::LD_f32_ari;
779 break;
780 case MVT::f64:
781 Opcode = NVPTX::LD_f64_ari;
782 break;
783 default:
784 return nullptr;
785 }
786 }
787 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
788 getI32Imm(vecType), getI32Imm(fromType),
789 getI32Imm(fromTypeWidth), Base, Offset, Chain };
790 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
791 } else {
792 if (TM.is64Bit()) {
793 switch (TargetVT) {
794 case MVT::i8:
795 Opcode = NVPTX::LD_i8_areg_64;
796 break;
797 case MVT::i16:
798 Opcode = NVPTX::LD_i16_areg_64;
799 break;
800 case MVT::i32:
801 Opcode = NVPTX::LD_i32_areg_64;
802 break;
803 case MVT::i64:
804 Opcode = NVPTX::LD_i64_areg_64;
805 break;
806 case MVT::f32:
807 Opcode = NVPTX::LD_f32_areg_64;
808 break;
809 case MVT::f64:
810 Opcode = NVPTX::LD_f64_areg_64;
811 break;
812 default:
813 return nullptr;
814 }
815 } else {
816 switch (TargetVT) {
817 case MVT::i8:
818 Opcode = NVPTX::LD_i8_areg;
819 break;
820 case MVT::i16:
821 Opcode = NVPTX::LD_i16_areg;
822 break;
823 case MVT::i32:
824 Opcode = NVPTX::LD_i32_areg;
825 break;
826 case MVT::i64:
827 Opcode = NVPTX::LD_i64_areg;
828 break;
829 case MVT::f32:
830 Opcode = NVPTX::LD_f32_areg;
831 break;
832 case MVT::f64:
833 Opcode = NVPTX::LD_f64_areg;
834 break;
835 default:
836 return nullptr;
837 }
838 }
839 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
840 getI32Imm(vecType), getI32Imm(fromType),
841 getI32Imm(fromTypeWidth), N1, Chain };
842 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
843 }
844
845 if (NVPTXLD) {
846 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
847 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
848 cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
849 }
850
851 return NVPTXLD;
852 }
853
SelectLoadVector(SDNode * N)854 SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
855
856 SDValue Chain = N->getOperand(0);
857 SDValue Op1 = N->getOperand(1);
858 SDValue Addr, Offset, Base;
859 unsigned Opcode;
860 SDLoc DL(N);
861 SDNode *LD;
862 MemSDNode *MemSD = cast<MemSDNode>(N);
863 EVT LoadedVT = MemSD->getMemoryVT();
864
865 if (!LoadedVT.isSimple())
866 return nullptr;
867
868 // Address Space Setting
869 unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
870
871 // Volatile Setting
872 // - .volatile is only availalble for .global and .shared
873 bool IsVolatile = MemSD->isVolatile();
874 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
875 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
876 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
877 IsVolatile = false;
878
879 // Vector Setting
880 MVT SimpleVT = LoadedVT.getSimpleVT();
881
882 // Type Setting: fromType + fromTypeWidth
883 //
884 // Sign : ISD::SEXTLOAD
885 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
886 // type is integer
887 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
888 MVT ScalarVT = SimpleVT.getScalarType();
889 // Read at least 8 bits (predicates are stored as 8-bit values)
890 unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
891 unsigned int FromType;
892 // The last operand holds the original LoadSDNode::getExtensionType() value
893 unsigned ExtensionType = cast<ConstantSDNode>(
894 N->getOperand(N->getNumOperands() - 1))->getZExtValue();
895 if (ExtensionType == ISD::SEXTLOAD)
896 FromType = NVPTX::PTXLdStInstCode::Signed;
897 else if (ScalarVT.isFloatingPoint())
898 FromType = NVPTX::PTXLdStInstCode::Float;
899 else
900 FromType = NVPTX::PTXLdStInstCode::Unsigned;
901
902 unsigned VecType;
903
904 switch (N->getOpcode()) {
905 case NVPTXISD::LoadV2:
906 VecType = NVPTX::PTXLdStInstCode::V2;
907 break;
908 case NVPTXISD::LoadV4:
909 VecType = NVPTX::PTXLdStInstCode::V4;
910 break;
911 default:
912 return nullptr;
913 }
914
915 EVT EltVT = N->getValueType(0);
916
917 if (SelectDirectAddr(Op1, Addr)) {
918 switch (N->getOpcode()) {
919 default:
920 return nullptr;
921 case NVPTXISD::LoadV2:
922 switch (EltVT.getSimpleVT().SimpleTy) {
923 default:
924 return nullptr;
925 case MVT::i8:
926 Opcode = NVPTX::LDV_i8_v2_avar;
927 break;
928 case MVT::i16:
929 Opcode = NVPTX::LDV_i16_v2_avar;
930 break;
931 case MVT::i32:
932 Opcode = NVPTX::LDV_i32_v2_avar;
933 break;
934 case MVT::i64:
935 Opcode = NVPTX::LDV_i64_v2_avar;
936 break;
937 case MVT::f32:
938 Opcode = NVPTX::LDV_f32_v2_avar;
939 break;
940 case MVT::f64:
941 Opcode = NVPTX::LDV_f64_v2_avar;
942 break;
943 }
944 break;
945 case NVPTXISD::LoadV4:
946 switch (EltVT.getSimpleVT().SimpleTy) {
947 default:
948 return nullptr;
949 case MVT::i8:
950 Opcode = NVPTX::LDV_i8_v4_avar;
951 break;
952 case MVT::i16:
953 Opcode = NVPTX::LDV_i16_v4_avar;
954 break;
955 case MVT::i32:
956 Opcode = NVPTX::LDV_i32_v4_avar;
957 break;
958 case MVT::f32:
959 Opcode = NVPTX::LDV_f32_v4_avar;
960 break;
961 }
962 break;
963 }
964
965 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
966 getI32Imm(VecType), getI32Imm(FromType),
967 getI32Imm(FromTypeWidth), Addr, Chain };
968 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
969 } else if (TM.is64Bit() ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
970 : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
971 switch (N->getOpcode()) {
972 default:
973 return nullptr;
974 case NVPTXISD::LoadV2:
975 switch (EltVT.getSimpleVT().SimpleTy) {
976 default:
977 return nullptr;
978 case MVT::i8:
979 Opcode = NVPTX::LDV_i8_v2_asi;
980 break;
981 case MVT::i16:
982 Opcode = NVPTX::LDV_i16_v2_asi;
983 break;
984 case MVT::i32:
985 Opcode = NVPTX::LDV_i32_v2_asi;
986 break;
987 case MVT::i64:
988 Opcode = NVPTX::LDV_i64_v2_asi;
989 break;
990 case MVT::f32:
991 Opcode = NVPTX::LDV_f32_v2_asi;
992 break;
993 case MVT::f64:
994 Opcode = NVPTX::LDV_f64_v2_asi;
995 break;
996 }
997 break;
998 case NVPTXISD::LoadV4:
999 switch (EltVT.getSimpleVT().SimpleTy) {
1000 default:
1001 return nullptr;
1002 case MVT::i8:
1003 Opcode = NVPTX::LDV_i8_v4_asi;
1004 break;
1005 case MVT::i16:
1006 Opcode = NVPTX::LDV_i16_v4_asi;
1007 break;
1008 case MVT::i32:
1009 Opcode = NVPTX::LDV_i32_v4_asi;
1010 break;
1011 case MVT::f32:
1012 Opcode = NVPTX::LDV_f32_v4_asi;
1013 break;
1014 }
1015 break;
1016 }
1017
1018 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
1019 getI32Imm(VecType), getI32Imm(FromType),
1020 getI32Imm(FromTypeWidth), Base, Offset, Chain };
1021 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1022 } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1023 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1024 if (TM.is64Bit()) {
1025 switch (N->getOpcode()) {
1026 default:
1027 return nullptr;
1028 case NVPTXISD::LoadV2:
1029 switch (EltVT.getSimpleVT().SimpleTy) {
1030 default:
1031 return nullptr;
1032 case MVT::i8:
1033 Opcode = NVPTX::LDV_i8_v2_ari_64;
1034 break;
1035 case MVT::i16:
1036 Opcode = NVPTX::LDV_i16_v2_ari_64;
1037 break;
1038 case MVT::i32:
1039 Opcode = NVPTX::LDV_i32_v2_ari_64;
1040 break;
1041 case MVT::i64:
1042 Opcode = NVPTX::LDV_i64_v2_ari_64;
1043 break;
1044 case MVT::f32:
1045 Opcode = NVPTX::LDV_f32_v2_ari_64;
1046 break;
1047 case MVT::f64:
1048 Opcode = NVPTX::LDV_f64_v2_ari_64;
1049 break;
1050 }
1051 break;
1052 case NVPTXISD::LoadV4:
1053 switch (EltVT.getSimpleVT().SimpleTy) {
1054 default:
1055 return nullptr;
1056 case MVT::i8:
1057 Opcode = NVPTX::LDV_i8_v4_ari_64;
1058 break;
1059 case MVT::i16:
1060 Opcode = NVPTX::LDV_i16_v4_ari_64;
1061 break;
1062 case MVT::i32:
1063 Opcode = NVPTX::LDV_i32_v4_ari_64;
1064 break;
1065 case MVT::f32:
1066 Opcode = NVPTX::LDV_f32_v4_ari_64;
1067 break;
1068 }
1069 break;
1070 }
1071 } else {
1072 switch (N->getOpcode()) {
1073 default:
1074 return nullptr;
1075 case NVPTXISD::LoadV2:
1076 switch (EltVT.getSimpleVT().SimpleTy) {
1077 default:
1078 return nullptr;
1079 case MVT::i8:
1080 Opcode = NVPTX::LDV_i8_v2_ari;
1081 break;
1082 case MVT::i16:
1083 Opcode = NVPTX::LDV_i16_v2_ari;
1084 break;
1085 case MVT::i32:
1086 Opcode = NVPTX::LDV_i32_v2_ari;
1087 break;
1088 case MVT::i64:
1089 Opcode = NVPTX::LDV_i64_v2_ari;
1090 break;
1091 case MVT::f32:
1092 Opcode = NVPTX::LDV_f32_v2_ari;
1093 break;
1094 case MVT::f64:
1095 Opcode = NVPTX::LDV_f64_v2_ari;
1096 break;
1097 }
1098 break;
1099 case NVPTXISD::LoadV4:
1100 switch (EltVT.getSimpleVT().SimpleTy) {
1101 default:
1102 return nullptr;
1103 case MVT::i8:
1104 Opcode = NVPTX::LDV_i8_v4_ari;
1105 break;
1106 case MVT::i16:
1107 Opcode = NVPTX::LDV_i16_v4_ari;
1108 break;
1109 case MVT::i32:
1110 Opcode = NVPTX::LDV_i32_v4_ari;
1111 break;
1112 case MVT::f32:
1113 Opcode = NVPTX::LDV_f32_v4_ari;
1114 break;
1115 }
1116 break;
1117 }
1118 }
1119
1120 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
1121 getI32Imm(VecType), getI32Imm(FromType),
1122 getI32Imm(FromTypeWidth), Base, Offset, Chain };
1123
1124 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1125 } else {
1126 if (TM.is64Bit()) {
1127 switch (N->getOpcode()) {
1128 default:
1129 return nullptr;
1130 case NVPTXISD::LoadV2:
1131 switch (EltVT.getSimpleVT().SimpleTy) {
1132 default:
1133 return nullptr;
1134 case MVT::i8:
1135 Opcode = NVPTX::LDV_i8_v2_areg_64;
1136 break;
1137 case MVT::i16:
1138 Opcode = NVPTX::LDV_i16_v2_areg_64;
1139 break;
1140 case MVT::i32:
1141 Opcode = NVPTX::LDV_i32_v2_areg_64;
1142 break;
1143 case MVT::i64:
1144 Opcode = NVPTX::LDV_i64_v2_areg_64;
1145 break;
1146 case MVT::f32:
1147 Opcode = NVPTX::LDV_f32_v2_areg_64;
1148 break;
1149 case MVT::f64:
1150 Opcode = NVPTX::LDV_f64_v2_areg_64;
1151 break;
1152 }
1153 break;
1154 case NVPTXISD::LoadV4:
1155 switch (EltVT.getSimpleVT().SimpleTy) {
1156 default:
1157 return nullptr;
1158 case MVT::i8:
1159 Opcode = NVPTX::LDV_i8_v4_areg_64;
1160 break;
1161 case MVT::i16:
1162 Opcode = NVPTX::LDV_i16_v4_areg_64;
1163 break;
1164 case MVT::i32:
1165 Opcode = NVPTX::LDV_i32_v4_areg_64;
1166 break;
1167 case MVT::f32:
1168 Opcode = NVPTX::LDV_f32_v4_areg_64;
1169 break;
1170 }
1171 break;
1172 }
1173 } else {
1174 switch (N->getOpcode()) {
1175 default:
1176 return nullptr;
1177 case NVPTXISD::LoadV2:
1178 switch (EltVT.getSimpleVT().SimpleTy) {
1179 default:
1180 return nullptr;
1181 case MVT::i8:
1182 Opcode = NVPTX::LDV_i8_v2_areg;
1183 break;
1184 case MVT::i16:
1185 Opcode = NVPTX::LDV_i16_v2_areg;
1186 break;
1187 case MVT::i32:
1188 Opcode = NVPTX::LDV_i32_v2_areg;
1189 break;
1190 case MVT::i64:
1191 Opcode = NVPTX::LDV_i64_v2_areg;
1192 break;
1193 case MVT::f32:
1194 Opcode = NVPTX::LDV_f32_v2_areg;
1195 break;
1196 case MVT::f64:
1197 Opcode = NVPTX::LDV_f64_v2_areg;
1198 break;
1199 }
1200 break;
1201 case NVPTXISD::LoadV4:
1202 switch (EltVT.getSimpleVT().SimpleTy) {
1203 default:
1204 return nullptr;
1205 case MVT::i8:
1206 Opcode = NVPTX::LDV_i8_v4_areg;
1207 break;
1208 case MVT::i16:
1209 Opcode = NVPTX::LDV_i16_v4_areg;
1210 break;
1211 case MVT::i32:
1212 Opcode = NVPTX::LDV_i32_v4_areg;
1213 break;
1214 case MVT::f32:
1215 Opcode = NVPTX::LDV_f32_v4_areg;
1216 break;
1217 }
1218 break;
1219 }
1220 }
1221
1222 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
1223 getI32Imm(VecType), getI32Imm(FromType),
1224 getI32Imm(FromTypeWidth), Op1, Chain };
1225 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1226 }
1227
1228 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1229 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1230 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1231
1232 return LD;
1233 }
1234
SelectLDGLDU(SDNode * N)1235 SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
1236
1237 SDValue Chain = N->getOperand(0);
1238 SDValue Op1;
1239 MemSDNode *Mem;
1240 bool IsLDG = true;
1241
1242 // If this is an LDG intrinsic, the address is the third operand. Its its an
1243 // LDG/LDU SD node (from custom vector handling), then its the second operand
1244 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
1245 Op1 = N->getOperand(2);
1246 Mem = cast<MemIntrinsicSDNode>(N);
1247 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
1248 switch (IID) {
1249 default:
1250 return NULL;
1251 case Intrinsic::nvvm_ldg_global_f:
1252 case Intrinsic::nvvm_ldg_global_i:
1253 case Intrinsic::nvvm_ldg_global_p:
1254 IsLDG = true;
1255 break;
1256 case Intrinsic::nvvm_ldu_global_f:
1257 case Intrinsic::nvvm_ldu_global_i:
1258 case Intrinsic::nvvm_ldu_global_p:
1259 IsLDG = false;
1260 break;
1261 }
1262 } else {
1263 Op1 = N->getOperand(1);
1264 Mem = cast<MemSDNode>(N);
1265 }
1266
1267 unsigned Opcode;
1268 SDLoc DL(N);
1269 SDNode *LD;
1270 SDValue Base, Offset, Addr;
1271
1272 EVT EltVT = Mem->getMemoryVT();
1273 if (EltVT.isVector()) {
1274 EltVT = EltVT.getVectorElementType();
1275 }
1276
1277 if (SelectDirectAddr(Op1, Addr)) {
1278 switch (N->getOpcode()) {
1279 default:
1280 return nullptr;
1281 case ISD::INTRINSIC_W_CHAIN:
1282 if (IsLDG) {
1283 switch (EltVT.getSimpleVT().SimpleTy) {
1284 default:
1285 return nullptr;
1286 case MVT::i8:
1287 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8avar;
1288 break;
1289 case MVT::i16:
1290 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16avar;
1291 break;
1292 case MVT::i32:
1293 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32avar;
1294 break;
1295 case MVT::i64:
1296 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64avar;
1297 break;
1298 case MVT::f32:
1299 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32avar;
1300 break;
1301 case MVT::f64:
1302 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64avar;
1303 break;
1304 }
1305 } else {
1306 switch (EltVT.getSimpleVT().SimpleTy) {
1307 default:
1308 return nullptr;
1309 case MVT::i8:
1310 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8avar;
1311 break;
1312 case MVT::i16:
1313 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16avar;
1314 break;
1315 case MVT::i32:
1316 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32avar;
1317 break;
1318 case MVT::i64:
1319 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64avar;
1320 break;
1321 case MVT::f32:
1322 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32avar;
1323 break;
1324 case MVT::f64:
1325 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64avar;
1326 break;
1327 }
1328 }
1329 break;
1330 case NVPTXISD::LDGV2:
1331 switch (EltVT.getSimpleVT().SimpleTy) {
1332 default:
1333 return nullptr;
1334 case MVT::i8:
1335 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar;
1336 break;
1337 case MVT::i16:
1338 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar;
1339 break;
1340 case MVT::i32:
1341 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar;
1342 break;
1343 case MVT::i64:
1344 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar;
1345 break;
1346 case MVT::f32:
1347 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar;
1348 break;
1349 case MVT::f64:
1350 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar;
1351 break;
1352 }
1353 break;
1354 case NVPTXISD::LDUV2:
1355 switch (EltVT.getSimpleVT().SimpleTy) {
1356 default:
1357 return nullptr;
1358 case MVT::i8:
1359 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar;
1360 break;
1361 case MVT::i16:
1362 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar;
1363 break;
1364 case MVT::i32:
1365 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar;
1366 break;
1367 case MVT::i64:
1368 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar;
1369 break;
1370 case MVT::f32:
1371 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar;
1372 break;
1373 case MVT::f64:
1374 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar;
1375 break;
1376 }
1377 break;
1378 case NVPTXISD::LDGV4:
1379 switch (EltVT.getSimpleVT().SimpleTy) {
1380 default:
1381 return nullptr;
1382 case MVT::i8:
1383 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar;
1384 break;
1385 case MVT::i16:
1386 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar;
1387 break;
1388 case MVT::i32:
1389 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar;
1390 break;
1391 case MVT::f32:
1392 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar;
1393 break;
1394 }
1395 break;
1396 case NVPTXISD::LDUV4:
1397 switch (EltVT.getSimpleVT().SimpleTy) {
1398 default:
1399 return nullptr;
1400 case MVT::i8:
1401 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar;
1402 break;
1403 case MVT::i16:
1404 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar;
1405 break;
1406 case MVT::i32:
1407 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar;
1408 break;
1409 case MVT::f32:
1410 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar;
1411 break;
1412 }
1413 break;
1414 }
1415
1416 SDValue Ops[] = { Addr, Chain };
1417 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1418 } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1419 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1420 if (TM.is64Bit()) {
1421 switch (N->getOpcode()) {
1422 default:
1423 return nullptr;
1424 case ISD::INTRINSIC_W_CHAIN:
1425 if (IsLDG) {
1426 switch (EltVT.getSimpleVT().SimpleTy) {
1427 default:
1428 return nullptr;
1429 case MVT::i8:
1430 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari64;
1431 break;
1432 case MVT::i16:
1433 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari64;
1434 break;
1435 case MVT::i32:
1436 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari64;
1437 break;
1438 case MVT::i64:
1439 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari64;
1440 break;
1441 case MVT::f32:
1442 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari64;
1443 break;
1444 case MVT::f64:
1445 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari64;
1446 break;
1447 }
1448 } else {
1449 switch (EltVT.getSimpleVT().SimpleTy) {
1450 default:
1451 return nullptr;
1452 case MVT::i8:
1453 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari64;
1454 break;
1455 case MVT::i16:
1456 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari64;
1457 break;
1458 case MVT::i32:
1459 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari64;
1460 break;
1461 case MVT::i64:
1462 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari64;
1463 break;
1464 case MVT::f32:
1465 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari64;
1466 break;
1467 case MVT::f64:
1468 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari64;
1469 break;
1470 }
1471 }
1472 break;
1473 case NVPTXISD::LDGV2:
1474 switch (EltVT.getSimpleVT().SimpleTy) {
1475 default:
1476 return nullptr;
1477 case MVT::i8:
1478 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64;
1479 break;
1480 case MVT::i16:
1481 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64;
1482 break;
1483 case MVT::i32:
1484 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64;
1485 break;
1486 case MVT::i64:
1487 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64;
1488 break;
1489 case MVT::f32:
1490 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64;
1491 break;
1492 case MVT::f64:
1493 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64;
1494 break;
1495 }
1496 break;
1497 case NVPTXISD::LDUV2:
1498 switch (EltVT.getSimpleVT().SimpleTy) {
1499 default:
1500 return nullptr;
1501 case MVT::i8:
1502 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64;
1503 break;
1504 case MVT::i16:
1505 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64;
1506 break;
1507 case MVT::i32:
1508 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64;
1509 break;
1510 case MVT::i64:
1511 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64;
1512 break;
1513 case MVT::f32:
1514 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64;
1515 break;
1516 case MVT::f64:
1517 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64;
1518 break;
1519 }
1520 break;
1521 case NVPTXISD::LDGV4:
1522 switch (EltVT.getSimpleVT().SimpleTy) {
1523 default:
1524 return nullptr;
1525 case MVT::i8:
1526 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64;
1527 break;
1528 case MVT::i16:
1529 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64;
1530 break;
1531 case MVT::i32:
1532 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64;
1533 break;
1534 case MVT::f32:
1535 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64;
1536 break;
1537 }
1538 break;
1539 case NVPTXISD::LDUV4:
1540 switch (EltVT.getSimpleVT().SimpleTy) {
1541 default:
1542 return nullptr;
1543 case MVT::i8:
1544 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64;
1545 break;
1546 case MVT::i16:
1547 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64;
1548 break;
1549 case MVT::i32:
1550 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64;
1551 break;
1552 case MVT::f32:
1553 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64;
1554 break;
1555 }
1556 break;
1557 }
1558 } else {
1559 switch (N->getOpcode()) {
1560 default:
1561 return nullptr;
1562 case ISD::INTRINSIC_W_CHAIN:
1563 if (IsLDG) {
1564 switch (EltVT.getSimpleVT().SimpleTy) {
1565 default:
1566 return nullptr;
1567 case MVT::i8:
1568 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari;
1569 break;
1570 case MVT::i16:
1571 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari;
1572 break;
1573 case MVT::i32:
1574 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari;
1575 break;
1576 case MVT::i64:
1577 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari;
1578 break;
1579 case MVT::f32:
1580 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari;
1581 break;
1582 case MVT::f64:
1583 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari;
1584 break;
1585 }
1586 } else {
1587 switch (EltVT.getSimpleVT().SimpleTy) {
1588 default:
1589 return nullptr;
1590 case MVT::i8:
1591 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari;
1592 break;
1593 case MVT::i16:
1594 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari;
1595 break;
1596 case MVT::i32:
1597 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari;
1598 break;
1599 case MVT::i64:
1600 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari;
1601 break;
1602 case MVT::f32:
1603 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari;
1604 break;
1605 case MVT::f64:
1606 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari;
1607 break;
1608 }
1609 }
1610 break;
1611 case NVPTXISD::LDGV2:
1612 switch (EltVT.getSimpleVT().SimpleTy) {
1613 default:
1614 return nullptr;
1615 case MVT::i8:
1616 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32;
1617 break;
1618 case MVT::i16:
1619 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32;
1620 break;
1621 case MVT::i32:
1622 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32;
1623 break;
1624 case MVT::i64:
1625 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32;
1626 break;
1627 case MVT::f32:
1628 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32;
1629 break;
1630 case MVT::f64:
1631 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32;
1632 break;
1633 }
1634 break;
1635 case NVPTXISD::LDUV2:
1636 switch (EltVT.getSimpleVT().SimpleTy) {
1637 default:
1638 return nullptr;
1639 case MVT::i8:
1640 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32;
1641 break;
1642 case MVT::i16:
1643 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32;
1644 break;
1645 case MVT::i32:
1646 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32;
1647 break;
1648 case MVT::i64:
1649 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32;
1650 break;
1651 case MVT::f32:
1652 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32;
1653 break;
1654 case MVT::f64:
1655 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32;
1656 break;
1657 }
1658 break;
1659 case NVPTXISD::LDGV4:
1660 switch (EltVT.getSimpleVT().SimpleTy) {
1661 default:
1662 return nullptr;
1663 case MVT::i8:
1664 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32;
1665 break;
1666 case MVT::i16:
1667 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32;
1668 break;
1669 case MVT::i32:
1670 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32;
1671 break;
1672 case MVT::f32:
1673 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32;
1674 break;
1675 }
1676 break;
1677 case NVPTXISD::LDUV4:
1678 switch (EltVT.getSimpleVT().SimpleTy) {
1679 default:
1680 return nullptr;
1681 case MVT::i8:
1682 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32;
1683 break;
1684 case MVT::i16:
1685 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32;
1686 break;
1687 case MVT::i32:
1688 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32;
1689 break;
1690 case MVT::f32:
1691 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32;
1692 break;
1693 }
1694 break;
1695 }
1696 }
1697
1698 SDValue Ops[] = { Base, Offset, Chain };
1699
1700 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1701 } else {
1702 if (TM.is64Bit()) {
1703 switch (N->getOpcode()) {
1704 default:
1705 return nullptr;
1706 case ISD::INTRINSIC_W_CHAIN:
1707 if (IsLDG) {
1708 switch (EltVT.getSimpleVT().SimpleTy) {
1709 default:
1710 return nullptr;
1711 case MVT::i8:
1712 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg64;
1713 break;
1714 case MVT::i16:
1715 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg64;
1716 break;
1717 case MVT::i32:
1718 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg64;
1719 break;
1720 case MVT::i64:
1721 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg64;
1722 break;
1723 case MVT::f32:
1724 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg64;
1725 break;
1726 case MVT::f64:
1727 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg64;
1728 break;
1729 }
1730 } else {
1731 switch (EltVT.getSimpleVT().SimpleTy) {
1732 default:
1733 return nullptr;
1734 case MVT::i8:
1735 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg64;
1736 break;
1737 case MVT::i16:
1738 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg64;
1739 break;
1740 case MVT::i32:
1741 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg64;
1742 break;
1743 case MVT::i64:
1744 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg64;
1745 break;
1746 case MVT::f32:
1747 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg64;
1748 break;
1749 case MVT::f64:
1750 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg64;
1751 break;
1752 }
1753 }
1754 break;
1755 case NVPTXISD::LDGV2:
1756 switch (EltVT.getSimpleVT().SimpleTy) {
1757 default:
1758 return nullptr;
1759 case MVT::i8:
1760 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64;
1761 break;
1762 case MVT::i16:
1763 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64;
1764 break;
1765 case MVT::i32:
1766 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64;
1767 break;
1768 case MVT::i64:
1769 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64;
1770 break;
1771 case MVT::f32:
1772 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64;
1773 break;
1774 case MVT::f64:
1775 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64;
1776 break;
1777 }
1778 break;
1779 case NVPTXISD::LDUV2:
1780 switch (EltVT.getSimpleVT().SimpleTy) {
1781 default:
1782 return nullptr;
1783 case MVT::i8:
1784 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64;
1785 break;
1786 case MVT::i16:
1787 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64;
1788 break;
1789 case MVT::i32:
1790 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64;
1791 break;
1792 case MVT::i64:
1793 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64;
1794 break;
1795 case MVT::f32:
1796 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64;
1797 break;
1798 case MVT::f64:
1799 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64;
1800 break;
1801 }
1802 break;
1803 case NVPTXISD::LDGV4:
1804 switch (EltVT.getSimpleVT().SimpleTy) {
1805 default:
1806 return nullptr;
1807 case MVT::i8:
1808 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64;
1809 break;
1810 case MVT::i16:
1811 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64;
1812 break;
1813 case MVT::i32:
1814 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64;
1815 break;
1816 case MVT::f32:
1817 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64;
1818 break;
1819 }
1820 break;
1821 case NVPTXISD::LDUV4:
1822 switch (EltVT.getSimpleVT().SimpleTy) {
1823 default:
1824 return nullptr;
1825 case MVT::i8:
1826 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64;
1827 break;
1828 case MVT::i16:
1829 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64;
1830 break;
1831 case MVT::i32:
1832 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64;
1833 break;
1834 case MVT::f32:
1835 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64;
1836 break;
1837 }
1838 break;
1839 }
1840 } else {
1841 switch (N->getOpcode()) {
1842 default:
1843 return nullptr;
1844 case ISD::INTRINSIC_W_CHAIN:
1845 if (IsLDG) {
1846 switch (EltVT.getSimpleVT().SimpleTy) {
1847 default:
1848 return nullptr;
1849 case MVT::i8:
1850 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg;
1851 break;
1852 case MVT::i16:
1853 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg;
1854 break;
1855 case MVT::i32:
1856 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg;
1857 break;
1858 case MVT::i64:
1859 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg;
1860 break;
1861 case MVT::f32:
1862 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg;
1863 break;
1864 case MVT::f64:
1865 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg;
1866 break;
1867 }
1868 } else {
1869 switch (EltVT.getSimpleVT().SimpleTy) {
1870 default:
1871 return nullptr;
1872 case MVT::i8:
1873 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg;
1874 break;
1875 case MVT::i16:
1876 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg;
1877 break;
1878 case MVT::i32:
1879 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg;
1880 break;
1881 case MVT::i64:
1882 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg;
1883 break;
1884 case MVT::f32:
1885 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg;
1886 break;
1887 case MVT::f64:
1888 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg;
1889 break;
1890 }
1891 }
1892 break;
1893 case NVPTXISD::LDGV2:
1894 switch (EltVT.getSimpleVT().SimpleTy) {
1895 default:
1896 return nullptr;
1897 case MVT::i8:
1898 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32;
1899 break;
1900 case MVT::i16:
1901 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32;
1902 break;
1903 case MVT::i32:
1904 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32;
1905 break;
1906 case MVT::i64:
1907 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32;
1908 break;
1909 case MVT::f32:
1910 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32;
1911 break;
1912 case MVT::f64:
1913 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32;
1914 break;
1915 }
1916 break;
1917 case NVPTXISD::LDUV2:
1918 switch (EltVT.getSimpleVT().SimpleTy) {
1919 default:
1920 return nullptr;
1921 case MVT::i8:
1922 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32;
1923 break;
1924 case MVT::i16:
1925 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32;
1926 break;
1927 case MVT::i32:
1928 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32;
1929 break;
1930 case MVT::i64:
1931 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32;
1932 break;
1933 case MVT::f32:
1934 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32;
1935 break;
1936 case MVT::f64:
1937 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32;
1938 break;
1939 }
1940 break;
1941 case NVPTXISD::LDGV4:
1942 switch (EltVT.getSimpleVT().SimpleTy) {
1943 default:
1944 return nullptr;
1945 case MVT::i8:
1946 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32;
1947 break;
1948 case MVT::i16:
1949 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32;
1950 break;
1951 case MVT::i32:
1952 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32;
1953 break;
1954 case MVT::f32:
1955 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32;
1956 break;
1957 }
1958 break;
1959 case NVPTXISD::LDUV4:
1960 switch (EltVT.getSimpleVT().SimpleTy) {
1961 default:
1962 return nullptr;
1963 case MVT::i8:
1964 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32;
1965 break;
1966 case MVT::i16:
1967 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32;
1968 break;
1969 case MVT::i32:
1970 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32;
1971 break;
1972 case MVT::f32:
1973 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32;
1974 break;
1975 }
1976 break;
1977 }
1978 }
1979
1980 SDValue Ops[] = { Op1, Chain };
1981 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1982 }
1983
1984 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1985 MemRefs0[0] = Mem->getMemOperand();
1986 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1987
1988 return LD;
1989 }
1990
SelectStore(SDNode * N)1991 SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
1992 SDLoc dl(N);
1993 StoreSDNode *ST = cast<StoreSDNode>(N);
1994 EVT StoreVT = ST->getMemoryVT();
1995 SDNode *NVPTXST = nullptr;
1996
1997 // do not support pre/post inc/dec
1998 if (ST->isIndexed())
1999 return nullptr;
2000
2001 if (!StoreVT.isSimple())
2002 return nullptr;
2003
2004 // Address Space Setting
2005 unsigned int codeAddrSpace = getCodeAddrSpace(ST);
2006
2007 // Volatile Setting
2008 // - .volatile is only availalble for .global and .shared
2009 bool isVolatile = ST->isVolatile();
2010 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
2011 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
2012 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
2013 isVolatile = false;
2014
2015 // Vector Setting
2016 MVT SimpleVT = StoreVT.getSimpleVT();
2017 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
2018 if (SimpleVT.isVector()) {
2019 unsigned num = SimpleVT.getVectorNumElements();
2020 if (num == 2)
2021 vecType = NVPTX::PTXLdStInstCode::V2;
2022 else if (num == 4)
2023 vecType = NVPTX::PTXLdStInstCode::V4;
2024 else
2025 return nullptr;
2026 }
2027
2028 // Type Setting: toType + toTypeWidth
2029 // - for integer type, always use 'u'
2030 //
2031 MVT ScalarVT = SimpleVT.getScalarType();
2032 unsigned toTypeWidth = ScalarVT.getSizeInBits();
2033 unsigned int toType;
2034 if (ScalarVT.isFloatingPoint())
2035 toType = NVPTX::PTXLdStInstCode::Float;
2036 else
2037 toType = NVPTX::PTXLdStInstCode::Unsigned;
2038
2039 // Create the machine instruction DAG
2040 SDValue Chain = N->getOperand(0);
2041 SDValue N1 = N->getOperand(1);
2042 SDValue N2 = N->getOperand(2);
2043 SDValue Addr;
2044 SDValue Offset, Base;
2045 unsigned Opcode;
2046 MVT::SimpleValueType SourceVT = N1.getNode()->getSimpleValueType(0).SimpleTy;
2047
2048 if (SelectDirectAddr(N2, Addr)) {
2049 switch (SourceVT) {
2050 case MVT::i8:
2051 Opcode = NVPTX::ST_i8_avar;
2052 break;
2053 case MVT::i16:
2054 Opcode = NVPTX::ST_i16_avar;
2055 break;
2056 case MVT::i32:
2057 Opcode = NVPTX::ST_i32_avar;
2058 break;
2059 case MVT::i64:
2060 Opcode = NVPTX::ST_i64_avar;
2061 break;
2062 case MVT::f32:
2063 Opcode = NVPTX::ST_f32_avar;
2064 break;
2065 case MVT::f64:
2066 Opcode = NVPTX::ST_f64_avar;
2067 break;
2068 default:
2069 return nullptr;
2070 }
2071 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
2072 getI32Imm(vecType), getI32Imm(toType),
2073 getI32Imm(toTypeWidth), Addr, Chain };
2074 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2075 } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
2076 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
2077 switch (SourceVT) {
2078 case MVT::i8:
2079 Opcode = NVPTX::ST_i8_asi;
2080 break;
2081 case MVT::i16:
2082 Opcode = NVPTX::ST_i16_asi;
2083 break;
2084 case MVT::i32:
2085 Opcode = NVPTX::ST_i32_asi;
2086 break;
2087 case MVT::i64:
2088 Opcode = NVPTX::ST_i64_asi;
2089 break;
2090 case MVT::f32:
2091 Opcode = NVPTX::ST_f32_asi;
2092 break;
2093 case MVT::f64:
2094 Opcode = NVPTX::ST_f64_asi;
2095 break;
2096 default:
2097 return nullptr;
2098 }
2099 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
2100 getI32Imm(vecType), getI32Imm(toType),
2101 getI32Imm(toTypeWidth), Base, Offset, Chain };
2102 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2103 } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
2104 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
2105 if (TM.is64Bit()) {
2106 switch (SourceVT) {
2107 case MVT::i8:
2108 Opcode = NVPTX::ST_i8_ari_64;
2109 break;
2110 case MVT::i16:
2111 Opcode = NVPTX::ST_i16_ari_64;
2112 break;
2113 case MVT::i32:
2114 Opcode = NVPTX::ST_i32_ari_64;
2115 break;
2116 case MVT::i64:
2117 Opcode = NVPTX::ST_i64_ari_64;
2118 break;
2119 case MVT::f32:
2120 Opcode = NVPTX::ST_f32_ari_64;
2121 break;
2122 case MVT::f64:
2123 Opcode = NVPTX::ST_f64_ari_64;
2124 break;
2125 default:
2126 return nullptr;
2127 }
2128 } else {
2129 switch (SourceVT) {
2130 case MVT::i8:
2131 Opcode = NVPTX::ST_i8_ari;
2132 break;
2133 case MVT::i16:
2134 Opcode = NVPTX::ST_i16_ari;
2135 break;
2136 case MVT::i32:
2137 Opcode = NVPTX::ST_i32_ari;
2138 break;
2139 case MVT::i64:
2140 Opcode = NVPTX::ST_i64_ari;
2141 break;
2142 case MVT::f32:
2143 Opcode = NVPTX::ST_f32_ari;
2144 break;
2145 case MVT::f64:
2146 Opcode = NVPTX::ST_f64_ari;
2147 break;
2148 default:
2149 return nullptr;
2150 }
2151 }
2152 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
2153 getI32Imm(vecType), getI32Imm(toType),
2154 getI32Imm(toTypeWidth), Base, Offset, Chain };
2155 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2156 } else {
2157 if (TM.is64Bit()) {
2158 switch (SourceVT) {
2159 case MVT::i8:
2160 Opcode = NVPTX::ST_i8_areg_64;
2161 break;
2162 case MVT::i16:
2163 Opcode = NVPTX::ST_i16_areg_64;
2164 break;
2165 case MVT::i32:
2166 Opcode = NVPTX::ST_i32_areg_64;
2167 break;
2168 case MVT::i64:
2169 Opcode = NVPTX::ST_i64_areg_64;
2170 break;
2171 case MVT::f32:
2172 Opcode = NVPTX::ST_f32_areg_64;
2173 break;
2174 case MVT::f64:
2175 Opcode = NVPTX::ST_f64_areg_64;
2176 break;
2177 default:
2178 return nullptr;
2179 }
2180 } else {
2181 switch (SourceVT) {
2182 case MVT::i8:
2183 Opcode = NVPTX::ST_i8_areg;
2184 break;
2185 case MVT::i16:
2186 Opcode = NVPTX::ST_i16_areg;
2187 break;
2188 case MVT::i32:
2189 Opcode = NVPTX::ST_i32_areg;
2190 break;
2191 case MVT::i64:
2192 Opcode = NVPTX::ST_i64_areg;
2193 break;
2194 case MVT::f32:
2195 Opcode = NVPTX::ST_f32_areg;
2196 break;
2197 case MVT::f64:
2198 Opcode = NVPTX::ST_f64_areg;
2199 break;
2200 default:
2201 return nullptr;
2202 }
2203 }
2204 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
2205 getI32Imm(vecType), getI32Imm(toType),
2206 getI32Imm(toTypeWidth), N2, Chain };
2207 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2208 }
2209
2210 if (NVPTXST) {
2211 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2212 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2213 cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2214 }
2215
2216 return NVPTXST;
2217 }
2218
SelectStoreVector(SDNode * N)2219 SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
2220 SDValue Chain = N->getOperand(0);
2221 SDValue Op1 = N->getOperand(1);
2222 SDValue Addr, Offset, Base;
2223 unsigned Opcode;
2224 SDLoc DL(N);
2225 SDNode *ST;
2226 EVT EltVT = Op1.getValueType();
2227 MemSDNode *MemSD = cast<MemSDNode>(N);
2228 EVT StoreVT = MemSD->getMemoryVT();
2229
2230 // Address Space Setting
2231 unsigned CodeAddrSpace = getCodeAddrSpace(MemSD);
2232
2233 if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
2234 report_fatal_error("Cannot store to pointer that points to constant "
2235 "memory space");
2236 }
2237
2238 // Volatile Setting
2239 // - .volatile is only availalble for .global and .shared
2240 bool IsVolatile = MemSD->isVolatile();
2241 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
2242 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
2243 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
2244 IsVolatile = false;
2245
2246 // Type Setting: toType + toTypeWidth
2247 // - for integer type, always use 'u'
2248 assert(StoreVT.isSimple() && "Store value is not simple");
2249 MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
2250 unsigned ToTypeWidth = ScalarVT.getSizeInBits();
2251 unsigned ToType;
2252 if (ScalarVT.isFloatingPoint())
2253 ToType = NVPTX::PTXLdStInstCode::Float;
2254 else
2255 ToType = NVPTX::PTXLdStInstCode::Unsigned;
2256
2257 SmallVector<SDValue, 12> StOps;
2258 SDValue N2;
2259 unsigned VecType;
2260
2261 switch (N->getOpcode()) {
2262 case NVPTXISD::StoreV2:
2263 VecType = NVPTX::PTXLdStInstCode::V2;
2264 StOps.push_back(N->getOperand(1));
2265 StOps.push_back(N->getOperand(2));
2266 N2 = N->getOperand(3);
2267 break;
2268 case NVPTXISD::StoreV4:
2269 VecType = NVPTX::PTXLdStInstCode::V4;
2270 StOps.push_back(N->getOperand(1));
2271 StOps.push_back(N->getOperand(2));
2272 StOps.push_back(N->getOperand(3));
2273 StOps.push_back(N->getOperand(4));
2274 N2 = N->getOperand(5);
2275 break;
2276 default:
2277 return nullptr;
2278 }
2279
2280 StOps.push_back(getI32Imm(IsVolatile));
2281 StOps.push_back(getI32Imm(CodeAddrSpace));
2282 StOps.push_back(getI32Imm(VecType));
2283 StOps.push_back(getI32Imm(ToType));
2284 StOps.push_back(getI32Imm(ToTypeWidth));
2285
2286 if (SelectDirectAddr(N2, Addr)) {
2287 switch (N->getOpcode()) {
2288 default:
2289 return nullptr;
2290 case NVPTXISD::StoreV2:
2291 switch (EltVT.getSimpleVT().SimpleTy) {
2292 default:
2293 return nullptr;
2294 case MVT::i8:
2295 Opcode = NVPTX::STV_i8_v2_avar;
2296 break;
2297 case MVT::i16:
2298 Opcode = NVPTX::STV_i16_v2_avar;
2299 break;
2300 case MVT::i32:
2301 Opcode = NVPTX::STV_i32_v2_avar;
2302 break;
2303 case MVT::i64:
2304 Opcode = NVPTX::STV_i64_v2_avar;
2305 break;
2306 case MVT::f32:
2307 Opcode = NVPTX::STV_f32_v2_avar;
2308 break;
2309 case MVT::f64:
2310 Opcode = NVPTX::STV_f64_v2_avar;
2311 break;
2312 }
2313 break;
2314 case NVPTXISD::StoreV4:
2315 switch (EltVT.getSimpleVT().SimpleTy) {
2316 default:
2317 return nullptr;
2318 case MVT::i8:
2319 Opcode = NVPTX::STV_i8_v4_avar;
2320 break;
2321 case MVT::i16:
2322 Opcode = NVPTX::STV_i16_v4_avar;
2323 break;
2324 case MVT::i32:
2325 Opcode = NVPTX::STV_i32_v4_avar;
2326 break;
2327 case MVT::f32:
2328 Opcode = NVPTX::STV_f32_v4_avar;
2329 break;
2330 }
2331 break;
2332 }
2333 StOps.push_back(Addr);
2334 } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
2335 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
2336 switch (N->getOpcode()) {
2337 default:
2338 return nullptr;
2339 case NVPTXISD::StoreV2:
2340 switch (EltVT.getSimpleVT().SimpleTy) {
2341 default:
2342 return nullptr;
2343 case MVT::i8:
2344 Opcode = NVPTX::STV_i8_v2_asi;
2345 break;
2346 case MVT::i16:
2347 Opcode = NVPTX::STV_i16_v2_asi;
2348 break;
2349 case MVT::i32:
2350 Opcode = NVPTX::STV_i32_v2_asi;
2351 break;
2352 case MVT::i64:
2353 Opcode = NVPTX::STV_i64_v2_asi;
2354 break;
2355 case MVT::f32:
2356 Opcode = NVPTX::STV_f32_v2_asi;
2357 break;
2358 case MVT::f64:
2359 Opcode = NVPTX::STV_f64_v2_asi;
2360 break;
2361 }
2362 break;
2363 case NVPTXISD::StoreV4:
2364 switch (EltVT.getSimpleVT().SimpleTy) {
2365 default:
2366 return nullptr;
2367 case MVT::i8:
2368 Opcode = NVPTX::STV_i8_v4_asi;
2369 break;
2370 case MVT::i16:
2371 Opcode = NVPTX::STV_i16_v4_asi;
2372 break;
2373 case MVT::i32:
2374 Opcode = NVPTX::STV_i32_v4_asi;
2375 break;
2376 case MVT::f32:
2377 Opcode = NVPTX::STV_f32_v4_asi;
2378 break;
2379 }
2380 break;
2381 }
2382 StOps.push_back(Base);
2383 StOps.push_back(Offset);
2384 } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
2385 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
2386 if (TM.is64Bit()) {
2387 switch (N->getOpcode()) {
2388 default:
2389 return nullptr;
2390 case NVPTXISD::StoreV2:
2391 switch (EltVT.getSimpleVT().SimpleTy) {
2392 default:
2393 return nullptr;
2394 case MVT::i8:
2395 Opcode = NVPTX::STV_i8_v2_ari_64;
2396 break;
2397 case MVT::i16:
2398 Opcode = NVPTX::STV_i16_v2_ari_64;
2399 break;
2400 case MVT::i32:
2401 Opcode = NVPTX::STV_i32_v2_ari_64;
2402 break;
2403 case MVT::i64:
2404 Opcode = NVPTX::STV_i64_v2_ari_64;
2405 break;
2406 case MVT::f32:
2407 Opcode = NVPTX::STV_f32_v2_ari_64;
2408 break;
2409 case MVT::f64:
2410 Opcode = NVPTX::STV_f64_v2_ari_64;
2411 break;
2412 }
2413 break;
2414 case NVPTXISD::StoreV4:
2415 switch (EltVT.getSimpleVT().SimpleTy) {
2416 default:
2417 return nullptr;
2418 case MVT::i8:
2419 Opcode = NVPTX::STV_i8_v4_ari_64;
2420 break;
2421 case MVT::i16:
2422 Opcode = NVPTX::STV_i16_v4_ari_64;
2423 break;
2424 case MVT::i32:
2425 Opcode = NVPTX::STV_i32_v4_ari_64;
2426 break;
2427 case MVT::f32:
2428 Opcode = NVPTX::STV_f32_v4_ari_64;
2429 break;
2430 }
2431 break;
2432 }
2433 } else {
2434 switch (N->getOpcode()) {
2435 default:
2436 return nullptr;
2437 case NVPTXISD::StoreV2:
2438 switch (EltVT.getSimpleVT().SimpleTy) {
2439 default:
2440 return nullptr;
2441 case MVT::i8:
2442 Opcode = NVPTX::STV_i8_v2_ari;
2443 break;
2444 case MVT::i16:
2445 Opcode = NVPTX::STV_i16_v2_ari;
2446 break;
2447 case MVT::i32:
2448 Opcode = NVPTX::STV_i32_v2_ari;
2449 break;
2450 case MVT::i64:
2451 Opcode = NVPTX::STV_i64_v2_ari;
2452 break;
2453 case MVT::f32:
2454 Opcode = NVPTX::STV_f32_v2_ari;
2455 break;
2456 case MVT::f64:
2457 Opcode = NVPTX::STV_f64_v2_ari;
2458 break;
2459 }
2460 break;
2461 case NVPTXISD::StoreV4:
2462 switch (EltVT.getSimpleVT().SimpleTy) {
2463 default:
2464 return nullptr;
2465 case MVT::i8:
2466 Opcode = NVPTX::STV_i8_v4_ari;
2467 break;
2468 case MVT::i16:
2469 Opcode = NVPTX::STV_i16_v4_ari;
2470 break;
2471 case MVT::i32:
2472 Opcode = NVPTX::STV_i32_v4_ari;
2473 break;
2474 case MVT::f32:
2475 Opcode = NVPTX::STV_f32_v4_ari;
2476 break;
2477 }
2478 break;
2479 }
2480 }
2481 StOps.push_back(Base);
2482 StOps.push_back(Offset);
2483 } else {
2484 if (TM.is64Bit()) {
2485 switch (N->getOpcode()) {
2486 default:
2487 return nullptr;
2488 case NVPTXISD::StoreV2:
2489 switch (EltVT.getSimpleVT().SimpleTy) {
2490 default:
2491 return nullptr;
2492 case MVT::i8:
2493 Opcode = NVPTX::STV_i8_v2_areg_64;
2494 break;
2495 case MVT::i16:
2496 Opcode = NVPTX::STV_i16_v2_areg_64;
2497 break;
2498 case MVT::i32:
2499 Opcode = NVPTX::STV_i32_v2_areg_64;
2500 break;
2501 case MVT::i64:
2502 Opcode = NVPTX::STV_i64_v2_areg_64;
2503 break;
2504 case MVT::f32:
2505 Opcode = NVPTX::STV_f32_v2_areg_64;
2506 break;
2507 case MVT::f64:
2508 Opcode = NVPTX::STV_f64_v2_areg_64;
2509 break;
2510 }
2511 break;
2512 case NVPTXISD::StoreV4:
2513 switch (EltVT.getSimpleVT().SimpleTy) {
2514 default:
2515 return nullptr;
2516 case MVT::i8:
2517 Opcode = NVPTX::STV_i8_v4_areg_64;
2518 break;
2519 case MVT::i16:
2520 Opcode = NVPTX::STV_i16_v4_areg_64;
2521 break;
2522 case MVT::i32:
2523 Opcode = NVPTX::STV_i32_v4_areg_64;
2524 break;
2525 case MVT::f32:
2526 Opcode = NVPTX::STV_f32_v4_areg_64;
2527 break;
2528 }
2529 break;
2530 }
2531 } else {
2532 switch (N->getOpcode()) {
2533 default:
2534 return nullptr;
2535 case NVPTXISD::StoreV2:
2536 switch (EltVT.getSimpleVT().SimpleTy) {
2537 default:
2538 return nullptr;
2539 case MVT::i8:
2540 Opcode = NVPTX::STV_i8_v2_areg;
2541 break;
2542 case MVT::i16:
2543 Opcode = NVPTX::STV_i16_v2_areg;
2544 break;
2545 case MVT::i32:
2546 Opcode = NVPTX::STV_i32_v2_areg;
2547 break;
2548 case MVT::i64:
2549 Opcode = NVPTX::STV_i64_v2_areg;
2550 break;
2551 case MVT::f32:
2552 Opcode = NVPTX::STV_f32_v2_areg;
2553 break;
2554 case MVT::f64:
2555 Opcode = NVPTX::STV_f64_v2_areg;
2556 break;
2557 }
2558 break;
2559 case NVPTXISD::StoreV4:
2560 switch (EltVT.getSimpleVT().SimpleTy) {
2561 default:
2562 return nullptr;
2563 case MVT::i8:
2564 Opcode = NVPTX::STV_i8_v4_areg;
2565 break;
2566 case MVT::i16:
2567 Opcode = NVPTX::STV_i16_v4_areg;
2568 break;
2569 case MVT::i32:
2570 Opcode = NVPTX::STV_i32_v4_areg;
2571 break;
2572 case MVT::f32:
2573 Opcode = NVPTX::STV_f32_v4_areg;
2574 break;
2575 }
2576 break;
2577 }
2578 }
2579 StOps.push_back(N2);
2580 }
2581
2582 StOps.push_back(Chain);
2583
2584 ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps);
2585
2586 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2587 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2588 cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2589
2590 return ST;
2591 }
2592
SelectLoadParam(SDNode * Node)2593 SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {
2594 SDValue Chain = Node->getOperand(0);
2595 SDValue Offset = Node->getOperand(2);
2596 SDValue Flag = Node->getOperand(3);
2597 SDLoc DL(Node);
2598 MemSDNode *Mem = cast<MemSDNode>(Node);
2599
2600 unsigned VecSize;
2601 switch (Node->getOpcode()) {
2602 default:
2603 return nullptr;
2604 case NVPTXISD::LoadParam:
2605 VecSize = 1;
2606 break;
2607 case NVPTXISD::LoadParamV2:
2608 VecSize = 2;
2609 break;
2610 case NVPTXISD::LoadParamV4:
2611 VecSize = 4;
2612 break;
2613 }
2614
2615 EVT EltVT = Node->getValueType(0);
2616 EVT MemVT = Mem->getMemoryVT();
2617
2618 unsigned Opc = 0;
2619
2620 switch (VecSize) {
2621 default:
2622 return nullptr;
2623 case 1:
2624 switch (MemVT.getSimpleVT().SimpleTy) {
2625 default:
2626 return nullptr;
2627 case MVT::i1:
2628 Opc = NVPTX::LoadParamMemI8;
2629 break;
2630 case MVT::i8:
2631 Opc = NVPTX::LoadParamMemI8;
2632 break;
2633 case MVT::i16:
2634 Opc = NVPTX::LoadParamMemI16;
2635 break;
2636 case MVT::i32:
2637 Opc = NVPTX::LoadParamMemI32;
2638 break;
2639 case MVT::i64:
2640 Opc = NVPTX::LoadParamMemI64;
2641 break;
2642 case MVT::f32:
2643 Opc = NVPTX::LoadParamMemF32;
2644 break;
2645 case MVT::f64:
2646 Opc = NVPTX::LoadParamMemF64;
2647 break;
2648 }
2649 break;
2650 case 2:
2651 switch (MemVT.getSimpleVT().SimpleTy) {
2652 default:
2653 return nullptr;
2654 case MVT::i1:
2655 Opc = NVPTX::LoadParamMemV2I8;
2656 break;
2657 case MVT::i8:
2658 Opc = NVPTX::LoadParamMemV2I8;
2659 break;
2660 case MVT::i16:
2661 Opc = NVPTX::LoadParamMemV2I16;
2662 break;
2663 case MVT::i32:
2664 Opc = NVPTX::LoadParamMemV2I32;
2665 break;
2666 case MVT::i64:
2667 Opc = NVPTX::LoadParamMemV2I64;
2668 break;
2669 case MVT::f32:
2670 Opc = NVPTX::LoadParamMemV2F32;
2671 break;
2672 case MVT::f64:
2673 Opc = NVPTX::LoadParamMemV2F64;
2674 break;
2675 }
2676 break;
2677 case 4:
2678 switch (MemVT.getSimpleVT().SimpleTy) {
2679 default:
2680 return nullptr;
2681 case MVT::i1:
2682 Opc = NVPTX::LoadParamMemV4I8;
2683 break;
2684 case MVT::i8:
2685 Opc = NVPTX::LoadParamMemV4I8;
2686 break;
2687 case MVT::i16:
2688 Opc = NVPTX::LoadParamMemV4I16;
2689 break;
2690 case MVT::i32:
2691 Opc = NVPTX::LoadParamMemV4I32;
2692 break;
2693 case MVT::f32:
2694 Opc = NVPTX::LoadParamMemV4F32;
2695 break;
2696 }
2697 break;
2698 }
2699
2700 SDVTList VTs;
2701 if (VecSize == 1) {
2702 VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
2703 } else if (VecSize == 2) {
2704 VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
2705 } else {
2706 EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
2707 VTs = CurDAG->getVTList(EVTs);
2708 }
2709
2710 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2711
2712 SmallVector<SDValue, 2> Ops;
2713 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2714 Ops.push_back(Chain);
2715 Ops.push_back(Flag);
2716
2717 SDNode *Ret =
2718 CurDAG->getMachineNode(Opc, DL, VTs, Ops);
2719 return Ret;
2720 }
2721
SelectStoreRetval(SDNode * N)2722 SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) {
2723 SDLoc DL(N);
2724 SDValue Chain = N->getOperand(0);
2725 SDValue Offset = N->getOperand(1);
2726 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2727 MemSDNode *Mem = cast<MemSDNode>(N);
2728
2729 // How many elements do we have?
2730 unsigned NumElts = 1;
2731 switch (N->getOpcode()) {
2732 default:
2733 return nullptr;
2734 case NVPTXISD::StoreRetval:
2735 NumElts = 1;
2736 break;
2737 case NVPTXISD::StoreRetvalV2:
2738 NumElts = 2;
2739 break;
2740 case NVPTXISD::StoreRetvalV4:
2741 NumElts = 4;
2742 break;
2743 }
2744
2745 // Build vector of operands
2746 SmallVector<SDValue, 6> Ops;
2747 for (unsigned i = 0; i < NumElts; ++i)
2748 Ops.push_back(N->getOperand(i + 2));
2749 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2750 Ops.push_back(Chain);
2751
2752 // Determine target opcode
2753 // If we have an i1, use an 8-bit store. The lowering code in
2754 // NVPTXISelLowering will have already emitted an upcast.
2755 unsigned Opcode = 0;
2756 switch (NumElts) {
2757 default:
2758 return nullptr;
2759 case 1:
2760 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2761 default:
2762 return nullptr;
2763 case MVT::i1:
2764 Opcode = NVPTX::StoreRetvalI8;
2765 break;
2766 case MVT::i8:
2767 Opcode = NVPTX::StoreRetvalI8;
2768 break;
2769 case MVT::i16:
2770 Opcode = NVPTX::StoreRetvalI16;
2771 break;
2772 case MVT::i32:
2773 Opcode = NVPTX::StoreRetvalI32;
2774 break;
2775 case MVT::i64:
2776 Opcode = NVPTX::StoreRetvalI64;
2777 break;
2778 case MVT::f32:
2779 Opcode = NVPTX::StoreRetvalF32;
2780 break;
2781 case MVT::f64:
2782 Opcode = NVPTX::StoreRetvalF64;
2783 break;
2784 }
2785 break;
2786 case 2:
2787 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2788 default:
2789 return nullptr;
2790 case MVT::i1:
2791 Opcode = NVPTX::StoreRetvalV2I8;
2792 break;
2793 case MVT::i8:
2794 Opcode = NVPTX::StoreRetvalV2I8;
2795 break;
2796 case MVT::i16:
2797 Opcode = NVPTX::StoreRetvalV2I16;
2798 break;
2799 case MVT::i32:
2800 Opcode = NVPTX::StoreRetvalV2I32;
2801 break;
2802 case MVT::i64:
2803 Opcode = NVPTX::StoreRetvalV2I64;
2804 break;
2805 case MVT::f32:
2806 Opcode = NVPTX::StoreRetvalV2F32;
2807 break;
2808 case MVT::f64:
2809 Opcode = NVPTX::StoreRetvalV2F64;
2810 break;
2811 }
2812 break;
2813 case 4:
2814 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2815 default:
2816 return nullptr;
2817 case MVT::i1:
2818 Opcode = NVPTX::StoreRetvalV4I8;
2819 break;
2820 case MVT::i8:
2821 Opcode = NVPTX::StoreRetvalV4I8;
2822 break;
2823 case MVT::i16:
2824 Opcode = NVPTX::StoreRetvalV4I16;
2825 break;
2826 case MVT::i32:
2827 Opcode = NVPTX::StoreRetvalV4I32;
2828 break;
2829 case MVT::f32:
2830 Opcode = NVPTX::StoreRetvalV4F32;
2831 break;
2832 }
2833 break;
2834 }
2835
2836 SDNode *Ret =
2837 CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
2838 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2839 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2840 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2841
2842 return Ret;
2843 }
2844
SelectStoreParam(SDNode * N)2845 SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
2846 SDLoc DL(N);
2847 SDValue Chain = N->getOperand(0);
2848 SDValue Param = N->getOperand(1);
2849 unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
2850 SDValue Offset = N->getOperand(2);
2851 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2852 MemSDNode *Mem = cast<MemSDNode>(N);
2853 SDValue Flag = N->getOperand(N->getNumOperands() - 1);
2854
2855 // How many elements do we have?
2856 unsigned NumElts = 1;
2857 switch (N->getOpcode()) {
2858 default:
2859 return nullptr;
2860 case NVPTXISD::StoreParamU32:
2861 case NVPTXISD::StoreParamS32:
2862 case NVPTXISD::StoreParam:
2863 NumElts = 1;
2864 break;
2865 case NVPTXISD::StoreParamV2:
2866 NumElts = 2;
2867 break;
2868 case NVPTXISD::StoreParamV4:
2869 NumElts = 4;
2870 break;
2871 }
2872
2873 // Build vector of operands
2874 SmallVector<SDValue, 8> Ops;
2875 for (unsigned i = 0; i < NumElts; ++i)
2876 Ops.push_back(N->getOperand(i + 3));
2877 Ops.push_back(CurDAG->getTargetConstant(ParamVal, MVT::i32));
2878 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2879 Ops.push_back(Chain);
2880 Ops.push_back(Flag);
2881
2882 // Determine target opcode
2883 // If we have an i1, use an 8-bit store. The lowering code in
2884 // NVPTXISelLowering will have already emitted an upcast.
2885 unsigned Opcode = 0;
2886 switch (N->getOpcode()) {
2887 default:
2888 switch (NumElts) {
2889 default:
2890 return nullptr;
2891 case 1:
2892 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2893 default:
2894 return nullptr;
2895 case MVT::i1:
2896 Opcode = NVPTX::StoreParamI8;
2897 break;
2898 case MVT::i8:
2899 Opcode = NVPTX::StoreParamI8;
2900 break;
2901 case MVT::i16:
2902 Opcode = NVPTX::StoreParamI16;
2903 break;
2904 case MVT::i32:
2905 Opcode = NVPTX::StoreParamI32;
2906 break;
2907 case MVT::i64:
2908 Opcode = NVPTX::StoreParamI64;
2909 break;
2910 case MVT::f32:
2911 Opcode = NVPTX::StoreParamF32;
2912 break;
2913 case MVT::f64:
2914 Opcode = NVPTX::StoreParamF64;
2915 break;
2916 }
2917 break;
2918 case 2:
2919 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2920 default:
2921 return nullptr;
2922 case MVT::i1:
2923 Opcode = NVPTX::StoreParamV2I8;
2924 break;
2925 case MVT::i8:
2926 Opcode = NVPTX::StoreParamV2I8;
2927 break;
2928 case MVT::i16:
2929 Opcode = NVPTX::StoreParamV2I16;
2930 break;
2931 case MVT::i32:
2932 Opcode = NVPTX::StoreParamV2I32;
2933 break;
2934 case MVT::i64:
2935 Opcode = NVPTX::StoreParamV2I64;
2936 break;
2937 case MVT::f32:
2938 Opcode = NVPTX::StoreParamV2F32;
2939 break;
2940 case MVT::f64:
2941 Opcode = NVPTX::StoreParamV2F64;
2942 break;
2943 }
2944 break;
2945 case 4:
2946 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2947 default:
2948 return nullptr;
2949 case MVT::i1:
2950 Opcode = NVPTX::StoreParamV4I8;
2951 break;
2952 case MVT::i8:
2953 Opcode = NVPTX::StoreParamV4I8;
2954 break;
2955 case MVT::i16:
2956 Opcode = NVPTX::StoreParamV4I16;
2957 break;
2958 case MVT::i32:
2959 Opcode = NVPTX::StoreParamV4I32;
2960 break;
2961 case MVT::f32:
2962 Opcode = NVPTX::StoreParamV4F32;
2963 break;
2964 }
2965 break;
2966 }
2967 break;
2968 // Special case: if we have a sign-extend/zero-extend node, insert the
2969 // conversion instruction first, and use that as the value operand to
2970 // the selected StoreParam node.
2971 case NVPTXISD::StoreParamU32: {
2972 Opcode = NVPTX::StoreParamI32;
2973 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
2974 MVT::i32);
2975 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
2976 MVT::i32, Ops[0], CvtNone);
2977 Ops[0] = SDValue(Cvt, 0);
2978 break;
2979 }
2980 case NVPTXISD::StoreParamS32: {
2981 Opcode = NVPTX::StoreParamI32;
2982 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
2983 MVT::i32);
2984 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
2985 MVT::i32, Ops[0], CvtNone);
2986 Ops[0] = SDValue(Cvt, 0);
2987 break;
2988 }
2989 }
2990
2991 SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
2992 SDNode *Ret =
2993 CurDAG->getMachineNode(Opcode, DL, RetVTs, Ops);
2994 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2995 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2996 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2997
2998 return Ret;
2999 }
3000
SelectTextureIntrinsic(SDNode * N)3001 SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) {
3002 SDValue Chain = N->getOperand(0);
3003 SDNode *Ret = nullptr;
3004 unsigned Opc = 0;
3005 SmallVector<SDValue, 8> Ops;
3006
3007 switch (N->getOpcode()) {
3008 default: return nullptr;
3009 case NVPTXISD::Tex1DFloatS32:
3010 Opc = NVPTX::TEX_1D_F32_S32;
3011 break;
3012 case NVPTXISD::Tex1DFloatFloat:
3013 Opc = NVPTX::TEX_1D_F32_F32;
3014 break;
3015 case NVPTXISD::Tex1DFloatFloatLevel:
3016 Opc = NVPTX::TEX_1D_F32_F32_LEVEL;
3017 break;
3018 case NVPTXISD::Tex1DFloatFloatGrad:
3019 Opc = NVPTX::TEX_1D_F32_F32_GRAD;
3020 break;
3021 case NVPTXISD::Tex1DS32S32:
3022 Opc = NVPTX::TEX_1D_S32_S32;
3023 break;
3024 case NVPTXISD::Tex1DS32Float:
3025 Opc = NVPTX::TEX_1D_S32_F32;
3026 break;
3027 case NVPTXISD::Tex1DS32FloatLevel:
3028 Opc = NVPTX::TEX_1D_S32_F32_LEVEL;
3029 break;
3030 case NVPTXISD::Tex1DS32FloatGrad:
3031 Opc = NVPTX::TEX_1D_S32_F32_GRAD;
3032 break;
3033 case NVPTXISD::Tex1DU32S32:
3034 Opc = NVPTX::TEX_1D_U32_S32;
3035 break;
3036 case NVPTXISD::Tex1DU32Float:
3037 Opc = NVPTX::TEX_1D_U32_F32;
3038 break;
3039 case NVPTXISD::Tex1DU32FloatLevel:
3040 Opc = NVPTX::TEX_1D_U32_F32_LEVEL;
3041 break;
3042 case NVPTXISD::Tex1DU32FloatGrad:
3043 Opc = NVPTX::TEX_1D_U32_F32_GRAD;
3044 break;
3045 case NVPTXISD::Tex1DArrayFloatS32:
3046 Opc = NVPTX::TEX_1D_ARRAY_F32_S32;
3047 break;
3048 case NVPTXISD::Tex1DArrayFloatFloat:
3049 Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
3050 break;
3051 case NVPTXISD::Tex1DArrayFloatFloatLevel:
3052 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL;
3053 break;
3054 case NVPTXISD::Tex1DArrayFloatFloatGrad:
3055 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
3056 break;
3057 case NVPTXISD::Tex1DArrayS32S32:
3058 Opc = NVPTX::TEX_1D_ARRAY_S32_S32;
3059 break;
3060 case NVPTXISD::Tex1DArrayS32Float:
3061 Opc = NVPTX::TEX_1D_ARRAY_S32_F32;
3062 break;
3063 case NVPTXISD::Tex1DArrayS32FloatLevel:
3064 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL;
3065 break;
3066 case NVPTXISD::Tex1DArrayS32FloatGrad:
3067 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD;
3068 break;
3069 case NVPTXISD::Tex1DArrayU32S32:
3070 Opc = NVPTX::TEX_1D_ARRAY_U32_S32;
3071 break;
3072 case NVPTXISD::Tex1DArrayU32Float:
3073 Opc = NVPTX::TEX_1D_ARRAY_U32_F32;
3074 break;
3075 case NVPTXISD::Tex1DArrayU32FloatLevel:
3076 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL;
3077 break;
3078 case NVPTXISD::Tex1DArrayU32FloatGrad:
3079 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD;
3080 break;
3081 case NVPTXISD::Tex2DFloatS32:
3082 Opc = NVPTX::TEX_2D_F32_S32;
3083 break;
3084 case NVPTXISD::Tex2DFloatFloat:
3085 Opc = NVPTX::TEX_2D_F32_F32;
3086 break;
3087 case NVPTXISD::Tex2DFloatFloatLevel:
3088 Opc = NVPTX::TEX_2D_F32_F32_LEVEL;
3089 break;
3090 case NVPTXISD::Tex2DFloatFloatGrad:
3091 Opc = NVPTX::TEX_2D_F32_F32_GRAD;
3092 break;
3093 case NVPTXISD::Tex2DS32S32:
3094 Opc = NVPTX::TEX_2D_S32_S32;
3095 break;
3096 case NVPTXISD::Tex2DS32Float:
3097 Opc = NVPTX::TEX_2D_S32_F32;
3098 break;
3099 case NVPTXISD::Tex2DS32FloatLevel:
3100 Opc = NVPTX::TEX_2D_S32_F32_LEVEL;
3101 break;
3102 case NVPTXISD::Tex2DS32FloatGrad:
3103 Opc = NVPTX::TEX_2D_S32_F32_GRAD;
3104 break;
3105 case NVPTXISD::Tex2DU32S32:
3106 Opc = NVPTX::TEX_2D_U32_S32;
3107 break;
3108 case NVPTXISD::Tex2DU32Float:
3109 Opc = NVPTX::TEX_2D_U32_F32;
3110 break;
3111 case NVPTXISD::Tex2DU32FloatLevel:
3112 Opc = NVPTX::TEX_2D_U32_F32_LEVEL;
3113 break;
3114 case NVPTXISD::Tex2DU32FloatGrad:
3115 Opc = NVPTX::TEX_2D_U32_F32_GRAD;
3116 break;
3117 case NVPTXISD::Tex2DArrayFloatS32:
3118 Opc = NVPTX::TEX_2D_ARRAY_F32_S32;
3119 break;
3120 case NVPTXISD::Tex2DArrayFloatFloat:
3121 Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
3122 break;
3123 case NVPTXISD::Tex2DArrayFloatFloatLevel:
3124 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL;
3125 break;
3126 case NVPTXISD::Tex2DArrayFloatFloatGrad:
3127 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
3128 break;
3129 case NVPTXISD::Tex2DArrayS32S32:
3130 Opc = NVPTX::TEX_2D_ARRAY_S32_S32;
3131 break;
3132 case NVPTXISD::Tex2DArrayS32Float:
3133 Opc = NVPTX::TEX_2D_ARRAY_S32_F32;
3134 break;
3135 case NVPTXISD::Tex2DArrayS32FloatLevel:
3136 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL;
3137 break;
3138 case NVPTXISD::Tex2DArrayS32FloatGrad:
3139 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD;
3140 break;
3141 case NVPTXISD::Tex2DArrayU32S32:
3142 Opc = NVPTX::TEX_2D_ARRAY_U32_S32;
3143 break;
3144 case NVPTXISD::Tex2DArrayU32Float:
3145 Opc = NVPTX::TEX_2D_ARRAY_U32_F32;
3146 break;
3147 case NVPTXISD::Tex2DArrayU32FloatLevel:
3148 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL;
3149 break;
3150 case NVPTXISD::Tex2DArrayU32FloatGrad:
3151 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD;
3152 break;
3153 case NVPTXISD::Tex3DFloatS32:
3154 Opc = NVPTX::TEX_3D_F32_S32;
3155 break;
3156 case NVPTXISD::Tex3DFloatFloat:
3157 Opc = NVPTX::TEX_3D_F32_F32;
3158 break;
3159 case NVPTXISD::Tex3DFloatFloatLevel:
3160 Opc = NVPTX::TEX_3D_F32_F32_LEVEL;
3161 break;
3162 case NVPTXISD::Tex3DFloatFloatGrad:
3163 Opc = NVPTX::TEX_3D_F32_F32_GRAD;
3164 break;
3165 case NVPTXISD::Tex3DS32S32:
3166 Opc = NVPTX::TEX_3D_S32_S32;
3167 break;
3168 case NVPTXISD::Tex3DS32Float:
3169 Opc = NVPTX::TEX_3D_S32_F32;
3170 break;
3171 case NVPTXISD::Tex3DS32FloatLevel:
3172 Opc = NVPTX::TEX_3D_S32_F32_LEVEL;
3173 break;
3174 case NVPTXISD::Tex3DS32FloatGrad:
3175 Opc = NVPTX::TEX_3D_S32_F32_GRAD;
3176 break;
3177 case NVPTXISD::Tex3DU32S32:
3178 Opc = NVPTX::TEX_3D_U32_S32;
3179 break;
3180 case NVPTXISD::Tex3DU32Float:
3181 Opc = NVPTX::TEX_3D_U32_F32;
3182 break;
3183 case NVPTXISD::Tex3DU32FloatLevel:
3184 Opc = NVPTX::TEX_3D_U32_F32_LEVEL;
3185 break;
3186 case NVPTXISD::Tex3DU32FloatGrad:
3187 Opc = NVPTX::TEX_3D_U32_F32_GRAD;
3188 break;
3189 case NVPTXISD::TexCubeFloatFloat:
3190 Opc = NVPTX::TEX_CUBE_F32_F32;
3191 break;
3192 case NVPTXISD::TexCubeFloatFloatLevel:
3193 Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL;
3194 break;
3195 case NVPTXISD::TexCubeS32Float:
3196 Opc = NVPTX::TEX_CUBE_S32_F32;
3197 break;
3198 case NVPTXISD::TexCubeS32FloatLevel:
3199 Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL;
3200 break;
3201 case NVPTXISD::TexCubeU32Float:
3202 Opc = NVPTX::TEX_CUBE_U32_F32;
3203 break;
3204 case NVPTXISD::TexCubeU32FloatLevel:
3205 Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL;
3206 break;
3207 case NVPTXISD::TexCubeArrayFloatFloat:
3208 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32;
3209 break;
3210 case NVPTXISD::TexCubeArrayFloatFloatLevel:
3211 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL;
3212 break;
3213 case NVPTXISD::TexCubeArrayS32Float:
3214 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32;
3215 break;
3216 case NVPTXISD::TexCubeArrayS32FloatLevel:
3217 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL;
3218 break;
3219 case NVPTXISD::TexCubeArrayU32Float:
3220 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32;
3221 break;
3222 case NVPTXISD::TexCubeArrayU32FloatLevel:
3223 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL;
3224 break;
3225 case NVPTXISD::Tld4R2DFloatFloat:
3226 Opc = NVPTX::TLD4_R_2D_F32_F32;
3227 break;
3228 case NVPTXISD::Tld4G2DFloatFloat:
3229 Opc = NVPTX::TLD4_G_2D_F32_F32;
3230 break;
3231 case NVPTXISD::Tld4B2DFloatFloat:
3232 Opc = NVPTX::TLD4_B_2D_F32_F32;
3233 break;
3234 case NVPTXISD::Tld4A2DFloatFloat:
3235 Opc = NVPTX::TLD4_A_2D_F32_F32;
3236 break;
3237 case NVPTXISD::Tld4R2DS64Float:
3238 Opc = NVPTX::TLD4_R_2D_S32_F32;
3239 break;
3240 case NVPTXISD::Tld4G2DS64Float:
3241 Opc = NVPTX::TLD4_G_2D_S32_F32;
3242 break;
3243 case NVPTXISD::Tld4B2DS64Float:
3244 Opc = NVPTX::TLD4_B_2D_S32_F32;
3245 break;
3246 case NVPTXISD::Tld4A2DS64Float:
3247 Opc = NVPTX::TLD4_A_2D_S32_F32;
3248 break;
3249 case NVPTXISD::Tld4R2DU64Float:
3250 Opc = NVPTX::TLD4_R_2D_U32_F32;
3251 break;
3252 case NVPTXISD::Tld4G2DU64Float:
3253 Opc = NVPTX::TLD4_G_2D_U32_F32;
3254 break;
3255 case NVPTXISD::Tld4B2DU64Float:
3256 Opc = NVPTX::TLD4_B_2D_U32_F32;
3257 break;
3258 case NVPTXISD::Tld4A2DU64Float:
3259 Opc = NVPTX::TLD4_A_2D_U32_F32;
3260 break;
3261 case NVPTXISD::TexUnified1DFloatS32:
3262 Opc = NVPTX::TEX_UNIFIED_1D_F32_S32;
3263 break;
3264 case NVPTXISD::TexUnified1DFloatFloat:
3265 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32;
3266 break;
3267 case NVPTXISD::TexUnified1DFloatFloatLevel:
3268 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL;
3269 break;
3270 case NVPTXISD::TexUnified1DFloatFloatGrad:
3271 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD;
3272 break;
3273 case NVPTXISD::TexUnified1DS32S32:
3274 Opc = NVPTX::TEX_UNIFIED_1D_S32_S32;
3275 break;
3276 case NVPTXISD::TexUnified1DS32Float:
3277 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32;
3278 break;
3279 case NVPTXISD::TexUnified1DS32FloatLevel:
3280 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL;
3281 break;
3282 case NVPTXISD::TexUnified1DS32FloatGrad:
3283 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD;
3284 break;
3285 case NVPTXISD::TexUnified1DU32S32:
3286 Opc = NVPTX::TEX_UNIFIED_1D_U32_S32;
3287 break;
3288 case NVPTXISD::TexUnified1DU32Float:
3289 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32;
3290 break;
3291 case NVPTXISD::TexUnified1DU32FloatLevel:
3292 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL;
3293 break;
3294 case NVPTXISD::TexUnified1DU32FloatGrad:
3295 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD;
3296 break;
3297 case NVPTXISD::TexUnified1DArrayFloatS32:
3298 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32;
3299 break;
3300 case NVPTXISD::TexUnified1DArrayFloatFloat:
3301 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32;
3302 break;
3303 case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
3304 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL;
3305 break;
3306 case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
3307 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD;
3308 break;
3309 case NVPTXISD::TexUnified1DArrayS32S32:
3310 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32;
3311 break;
3312 case NVPTXISD::TexUnified1DArrayS32Float:
3313 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32;
3314 break;
3315 case NVPTXISD::TexUnified1DArrayS32FloatLevel:
3316 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL;
3317 break;
3318 case NVPTXISD::TexUnified1DArrayS32FloatGrad:
3319 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD;
3320 break;
3321 case NVPTXISD::TexUnified1DArrayU32S32:
3322 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32;
3323 break;
3324 case NVPTXISD::TexUnified1DArrayU32Float:
3325 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32;
3326 break;
3327 case NVPTXISD::TexUnified1DArrayU32FloatLevel:
3328 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL;
3329 break;
3330 case NVPTXISD::TexUnified1DArrayU32FloatGrad:
3331 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD;
3332 break;
3333 case NVPTXISD::TexUnified2DFloatS32:
3334 Opc = NVPTX::TEX_UNIFIED_2D_F32_S32;
3335 break;
3336 case NVPTXISD::TexUnified2DFloatFloat:
3337 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32;
3338 break;
3339 case NVPTXISD::TexUnified2DFloatFloatLevel:
3340 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL;
3341 break;
3342 case NVPTXISD::TexUnified2DFloatFloatGrad:
3343 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD;
3344 break;
3345 case NVPTXISD::TexUnified2DS32S32:
3346 Opc = NVPTX::TEX_UNIFIED_2D_S32_S32;
3347 break;
3348 case NVPTXISD::TexUnified2DS32Float:
3349 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32;
3350 break;
3351 case NVPTXISD::TexUnified2DS32FloatLevel:
3352 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL;
3353 break;
3354 case NVPTXISD::TexUnified2DS32FloatGrad:
3355 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD;
3356 break;
3357 case NVPTXISD::TexUnified2DU32S32:
3358 Opc = NVPTX::TEX_UNIFIED_2D_U32_S32;
3359 break;
3360 case NVPTXISD::TexUnified2DU32Float:
3361 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32;
3362 break;
3363 case NVPTXISD::TexUnified2DU32FloatLevel:
3364 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL;
3365 break;
3366 case NVPTXISD::TexUnified2DU32FloatGrad:
3367 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD;
3368 break;
3369 case NVPTXISD::TexUnified2DArrayFloatS32:
3370 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32;
3371 break;
3372 case NVPTXISD::TexUnified2DArrayFloatFloat:
3373 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32;
3374 break;
3375 case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
3376 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL;
3377 break;
3378 case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
3379 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD;
3380 break;
3381 case NVPTXISD::TexUnified2DArrayS32S32:
3382 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32;
3383 break;
3384 case NVPTXISD::TexUnified2DArrayS32Float:
3385 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32;
3386 break;
3387 case NVPTXISD::TexUnified2DArrayS32FloatLevel:
3388 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL;
3389 break;
3390 case NVPTXISD::TexUnified2DArrayS32FloatGrad:
3391 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD;
3392 break;
3393 case NVPTXISD::TexUnified2DArrayU32S32:
3394 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32;
3395 break;
3396 case NVPTXISD::TexUnified2DArrayU32Float:
3397 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32;
3398 break;
3399 case NVPTXISD::TexUnified2DArrayU32FloatLevel:
3400 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL;
3401 break;
3402 case NVPTXISD::TexUnified2DArrayU32FloatGrad:
3403 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD;
3404 break;
3405 case NVPTXISD::TexUnified3DFloatS32:
3406 Opc = NVPTX::TEX_UNIFIED_3D_F32_S32;
3407 break;
3408 case NVPTXISD::TexUnified3DFloatFloat:
3409 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32;
3410 break;
3411 case NVPTXISD::TexUnified3DFloatFloatLevel:
3412 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL;
3413 break;
3414 case NVPTXISD::TexUnified3DFloatFloatGrad:
3415 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD;
3416 break;
3417 case NVPTXISD::TexUnified3DS32S32:
3418 Opc = NVPTX::TEX_UNIFIED_3D_S32_S32;
3419 break;
3420 case NVPTXISD::TexUnified3DS32Float:
3421 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32;
3422 break;
3423 case NVPTXISD::TexUnified3DS32FloatLevel:
3424 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL;
3425 break;
3426 case NVPTXISD::TexUnified3DS32FloatGrad:
3427 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD;
3428 break;
3429 case NVPTXISD::TexUnified3DU32S32:
3430 Opc = NVPTX::TEX_UNIFIED_3D_U32_S32;
3431 break;
3432 case NVPTXISD::TexUnified3DU32Float:
3433 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32;
3434 break;
3435 case NVPTXISD::TexUnified3DU32FloatLevel:
3436 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL;
3437 break;
3438 case NVPTXISD::TexUnified3DU32FloatGrad:
3439 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD;
3440 break;
3441 case NVPTXISD::TexUnifiedCubeFloatFloat:
3442 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32;
3443 break;
3444 case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
3445 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL;
3446 break;
3447 case NVPTXISD::TexUnifiedCubeS32Float:
3448 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32;
3449 break;
3450 case NVPTXISD::TexUnifiedCubeS32FloatLevel:
3451 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL;
3452 break;
3453 case NVPTXISD::TexUnifiedCubeU32Float:
3454 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32;
3455 break;
3456 case NVPTXISD::TexUnifiedCubeU32FloatLevel:
3457 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL;
3458 break;
3459 case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
3460 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32;
3461 break;
3462 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
3463 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL;
3464 break;
3465 case NVPTXISD::TexUnifiedCubeArrayS32Float:
3466 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32;
3467 break;
3468 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
3469 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL;
3470 break;
3471 case NVPTXISD::TexUnifiedCubeArrayU32Float:
3472 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32;
3473 break;
3474 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
3475 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL;
3476 break;
3477 case NVPTXISD::Tld4UnifiedR2DFloatFloat:
3478 Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32;
3479 break;
3480 case NVPTXISD::Tld4UnifiedG2DFloatFloat:
3481 Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32;
3482 break;
3483 case NVPTXISD::Tld4UnifiedB2DFloatFloat:
3484 Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32;
3485 break;
3486 case NVPTXISD::Tld4UnifiedA2DFloatFloat:
3487 Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32;
3488 break;
3489 case NVPTXISD::Tld4UnifiedR2DS64Float:
3490 Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32;
3491 break;
3492 case NVPTXISD::Tld4UnifiedG2DS64Float:
3493 Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32;
3494 break;
3495 case NVPTXISD::Tld4UnifiedB2DS64Float:
3496 Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32;
3497 break;
3498 case NVPTXISD::Tld4UnifiedA2DS64Float:
3499 Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32;
3500 break;
3501 case NVPTXISD::Tld4UnifiedR2DU64Float:
3502 Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32;
3503 break;
3504 case NVPTXISD::Tld4UnifiedG2DU64Float:
3505 Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32;
3506 break;
3507 case NVPTXISD::Tld4UnifiedB2DU64Float:
3508 Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32;
3509 break;
3510 case NVPTXISD::Tld4UnifiedA2DU64Float:
3511 Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32;
3512 break;
3513 }
3514
3515 // Copy over operands
3516 for (unsigned i = 1; i < N->getNumOperands(); ++i) {
3517 Ops.push_back(N->getOperand(i));
3518 }
3519
3520 Ops.push_back(Chain);
3521 Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3522 return Ret;
3523 }
3524
SelectSurfaceIntrinsic(SDNode * N)3525 SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) {
3526 SDValue Chain = N->getOperand(0);
3527 SDValue TexHandle = N->getOperand(1);
3528 SDNode *Ret = nullptr;
3529 unsigned Opc = 0;
3530 SmallVector<SDValue, 8> Ops;
3531 switch (N->getOpcode()) {
3532 default: return nullptr;
3533 case NVPTXISD::Suld1DI8Clamp:
3534 Opc = NVPTX::SULD_1D_I8_CLAMP;
3535 Ops.push_back(TexHandle);
3536 Ops.push_back(N->getOperand(2));
3537 Ops.push_back(Chain);
3538 break;
3539 case NVPTXISD::Suld1DI16Clamp:
3540 Opc = NVPTX::SULD_1D_I16_CLAMP;
3541 Ops.push_back(TexHandle);
3542 Ops.push_back(N->getOperand(2));
3543 Ops.push_back(Chain);
3544 break;
3545 case NVPTXISD::Suld1DI32Clamp:
3546 Opc = NVPTX::SULD_1D_I32_CLAMP;
3547 Ops.push_back(TexHandle);
3548 Ops.push_back(N->getOperand(2));
3549 Ops.push_back(Chain);
3550 break;
3551 case NVPTXISD::Suld1DI64Clamp:
3552 Opc = NVPTX::SULD_1D_I64_CLAMP;
3553 Ops.push_back(TexHandle);
3554 Ops.push_back(N->getOperand(2));
3555 Ops.push_back(Chain);
3556 break;
3557 case NVPTXISD::Suld1DV2I8Clamp:
3558 Opc = NVPTX::SULD_1D_V2I8_CLAMP;
3559 Ops.push_back(TexHandle);
3560 Ops.push_back(N->getOperand(2));
3561 Ops.push_back(Chain);
3562 break;
3563 case NVPTXISD::Suld1DV2I16Clamp:
3564 Opc = NVPTX::SULD_1D_V2I16_CLAMP;
3565 Ops.push_back(TexHandle);
3566 Ops.push_back(N->getOperand(2));
3567 Ops.push_back(Chain);
3568 break;
3569 case NVPTXISD::Suld1DV2I32Clamp:
3570 Opc = NVPTX::SULD_1D_V2I32_CLAMP;
3571 Ops.push_back(TexHandle);
3572 Ops.push_back(N->getOperand(2));
3573 Ops.push_back(Chain);
3574 break;
3575 case NVPTXISD::Suld1DV2I64Clamp:
3576 Opc = NVPTX::SULD_1D_V2I64_CLAMP;
3577 Ops.push_back(TexHandle);
3578 Ops.push_back(N->getOperand(2));
3579 Ops.push_back(Chain);
3580 break;
3581 case NVPTXISD::Suld1DV4I8Clamp:
3582 Opc = NVPTX::SULD_1D_V4I8_CLAMP;
3583 Ops.push_back(TexHandle);
3584 Ops.push_back(N->getOperand(2));
3585 Ops.push_back(Chain);
3586 break;
3587 case NVPTXISD::Suld1DV4I16Clamp:
3588 Opc = NVPTX::SULD_1D_V4I16_CLAMP;
3589 Ops.push_back(TexHandle);
3590 Ops.push_back(N->getOperand(2));
3591 Ops.push_back(Chain);
3592 break;
3593 case NVPTXISD::Suld1DV4I32Clamp:
3594 Opc = NVPTX::SULD_1D_V4I32_CLAMP;
3595 Ops.push_back(TexHandle);
3596 Ops.push_back(N->getOperand(2));
3597 Ops.push_back(Chain);
3598 break;
3599 case NVPTXISD::Suld1DArrayI8Clamp:
3600 Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP;
3601 Ops.push_back(TexHandle);
3602 Ops.push_back(N->getOperand(2));
3603 Ops.push_back(N->getOperand(3));
3604 Ops.push_back(Chain);
3605 break;
3606 case NVPTXISD::Suld1DArrayI16Clamp:
3607 Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP;
3608 Ops.push_back(TexHandle);
3609 Ops.push_back(N->getOperand(2));
3610 Ops.push_back(N->getOperand(3));
3611 Ops.push_back(Chain);
3612 break;
3613 case NVPTXISD::Suld1DArrayI32Clamp:
3614 Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP;
3615 Ops.push_back(TexHandle);
3616 Ops.push_back(N->getOperand(2));
3617 Ops.push_back(N->getOperand(3));
3618 Ops.push_back(Chain);
3619 break;
3620 case NVPTXISD::Suld1DArrayI64Clamp:
3621 Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP;
3622 Ops.push_back(TexHandle);
3623 Ops.push_back(N->getOperand(2));
3624 Ops.push_back(N->getOperand(3));
3625 Ops.push_back(Chain);
3626 break;
3627 case NVPTXISD::Suld1DArrayV2I8Clamp:
3628 Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP;
3629 Ops.push_back(TexHandle);
3630 Ops.push_back(N->getOperand(2));
3631 Ops.push_back(N->getOperand(3));
3632 Ops.push_back(Chain);
3633 break;
3634 case NVPTXISD::Suld1DArrayV2I16Clamp:
3635 Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP;
3636 Ops.push_back(TexHandle);
3637 Ops.push_back(N->getOperand(2));
3638 Ops.push_back(N->getOperand(3));
3639 Ops.push_back(Chain);
3640 break;
3641 case NVPTXISD::Suld1DArrayV2I32Clamp:
3642 Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP;
3643 Ops.push_back(TexHandle);
3644 Ops.push_back(N->getOperand(2));
3645 Ops.push_back(N->getOperand(3));
3646 Ops.push_back(Chain);
3647 break;
3648 case NVPTXISD::Suld1DArrayV2I64Clamp:
3649 Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP;
3650 Ops.push_back(TexHandle);
3651 Ops.push_back(N->getOperand(2));
3652 Ops.push_back(N->getOperand(3));
3653 Ops.push_back(Chain);
3654 break;
3655 case NVPTXISD::Suld1DArrayV4I8Clamp:
3656 Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP;
3657 Ops.push_back(TexHandle);
3658 Ops.push_back(N->getOperand(2));
3659 Ops.push_back(N->getOperand(3));
3660 Ops.push_back(Chain);
3661 break;
3662 case NVPTXISD::Suld1DArrayV4I16Clamp:
3663 Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP;
3664 Ops.push_back(TexHandle);
3665 Ops.push_back(N->getOperand(2));
3666 Ops.push_back(N->getOperand(3));
3667 Ops.push_back(Chain);
3668 break;
3669 case NVPTXISD::Suld1DArrayV4I32Clamp:
3670 Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP;
3671 Ops.push_back(TexHandle);
3672 Ops.push_back(N->getOperand(2));
3673 Ops.push_back(N->getOperand(3));
3674 Ops.push_back(Chain);
3675 break;
3676 case NVPTXISD::Suld2DI8Clamp:
3677 Opc = NVPTX::SULD_2D_I8_CLAMP;
3678 Ops.push_back(TexHandle);
3679 Ops.push_back(N->getOperand(2));
3680 Ops.push_back(N->getOperand(3));
3681 Ops.push_back(Chain);
3682 break;
3683 case NVPTXISD::Suld2DI16Clamp:
3684 Opc = NVPTX::SULD_2D_I16_CLAMP;
3685 Ops.push_back(TexHandle);
3686 Ops.push_back(N->getOperand(2));
3687 Ops.push_back(N->getOperand(3));
3688 Ops.push_back(Chain);
3689 break;
3690 case NVPTXISD::Suld2DI32Clamp:
3691 Opc = NVPTX::SULD_2D_I32_CLAMP;
3692 Ops.push_back(TexHandle);
3693 Ops.push_back(N->getOperand(2));
3694 Ops.push_back(N->getOperand(3));
3695 Ops.push_back(Chain);
3696 break;
3697 case NVPTXISD::Suld2DI64Clamp:
3698 Opc = NVPTX::SULD_2D_I64_CLAMP;
3699 Ops.push_back(TexHandle);
3700 Ops.push_back(N->getOperand(2));
3701 Ops.push_back(N->getOperand(3));
3702 Ops.push_back(Chain);
3703 break;
3704 case NVPTXISD::Suld2DV2I8Clamp:
3705 Opc = NVPTX::SULD_2D_V2I8_CLAMP;
3706 Ops.push_back(TexHandle);
3707 Ops.push_back(N->getOperand(2));
3708 Ops.push_back(N->getOperand(3));
3709 Ops.push_back(Chain);
3710 break;
3711 case NVPTXISD::Suld2DV2I16Clamp:
3712 Opc = NVPTX::SULD_2D_V2I16_CLAMP;
3713 Ops.push_back(TexHandle);
3714 Ops.push_back(N->getOperand(2));
3715 Ops.push_back(N->getOperand(3));
3716 Ops.push_back(Chain);
3717 break;
3718 case NVPTXISD::Suld2DV2I32Clamp:
3719 Opc = NVPTX::SULD_2D_V2I32_CLAMP;
3720 Ops.push_back(TexHandle);
3721 Ops.push_back(N->getOperand(2));
3722 Ops.push_back(N->getOperand(3));
3723 Ops.push_back(Chain);
3724 break;
3725 case NVPTXISD::Suld2DV2I64Clamp:
3726 Opc = NVPTX::SULD_2D_V2I64_CLAMP;
3727 Ops.push_back(TexHandle);
3728 Ops.push_back(N->getOperand(2));
3729 Ops.push_back(N->getOperand(3));
3730 Ops.push_back(Chain);
3731 break;
3732 case NVPTXISD::Suld2DV4I8Clamp:
3733 Opc = NVPTX::SULD_2D_V4I8_CLAMP;
3734 Ops.push_back(TexHandle);
3735 Ops.push_back(N->getOperand(2));
3736 Ops.push_back(N->getOperand(3));
3737 Ops.push_back(Chain);
3738 break;
3739 case NVPTXISD::Suld2DV4I16Clamp:
3740 Opc = NVPTX::SULD_2D_V4I16_CLAMP;
3741 Ops.push_back(TexHandle);
3742 Ops.push_back(N->getOperand(2));
3743 Ops.push_back(N->getOperand(3));
3744 Ops.push_back(Chain);
3745 break;
3746 case NVPTXISD::Suld2DV4I32Clamp:
3747 Opc = NVPTX::SULD_2D_V4I32_CLAMP;
3748 Ops.push_back(TexHandle);
3749 Ops.push_back(N->getOperand(2));
3750 Ops.push_back(N->getOperand(3));
3751 Ops.push_back(Chain);
3752 break;
3753 case NVPTXISD::Suld2DArrayI8Clamp:
3754 Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP;
3755 Ops.push_back(TexHandle);
3756 Ops.push_back(N->getOperand(2));
3757 Ops.push_back(N->getOperand(3));
3758 Ops.push_back(N->getOperand(4));
3759 Ops.push_back(Chain);
3760 break;
3761 case NVPTXISD::Suld2DArrayI16Clamp:
3762 Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP;
3763 Ops.push_back(TexHandle);
3764 Ops.push_back(N->getOperand(2));
3765 Ops.push_back(N->getOperand(3));
3766 Ops.push_back(N->getOperand(4));
3767 Ops.push_back(Chain);
3768 break;
3769 case NVPTXISD::Suld2DArrayI32Clamp:
3770 Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP;
3771 Ops.push_back(TexHandle);
3772 Ops.push_back(N->getOperand(2));
3773 Ops.push_back(N->getOperand(3));
3774 Ops.push_back(N->getOperand(4));
3775 Ops.push_back(Chain);
3776 break;
3777 case NVPTXISD::Suld2DArrayI64Clamp:
3778 Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP;
3779 Ops.push_back(TexHandle);
3780 Ops.push_back(N->getOperand(2));
3781 Ops.push_back(N->getOperand(3));
3782 Ops.push_back(N->getOperand(4));
3783 Ops.push_back(Chain);
3784 break;
3785 case NVPTXISD::Suld2DArrayV2I8Clamp:
3786 Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP;
3787 Ops.push_back(TexHandle);
3788 Ops.push_back(N->getOperand(2));
3789 Ops.push_back(N->getOperand(3));
3790 Ops.push_back(N->getOperand(4));
3791 Ops.push_back(Chain);
3792 break;
3793 case NVPTXISD::Suld2DArrayV2I16Clamp:
3794 Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP;
3795 Ops.push_back(TexHandle);
3796 Ops.push_back(N->getOperand(2));
3797 Ops.push_back(N->getOperand(3));
3798 Ops.push_back(N->getOperand(4));
3799 Ops.push_back(Chain);
3800 break;
3801 case NVPTXISD::Suld2DArrayV2I32Clamp:
3802 Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP;
3803 Ops.push_back(TexHandle);
3804 Ops.push_back(N->getOperand(2));
3805 Ops.push_back(N->getOperand(3));
3806 Ops.push_back(N->getOperand(4));
3807 Ops.push_back(Chain);
3808 break;
3809 case NVPTXISD::Suld2DArrayV2I64Clamp:
3810 Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP;
3811 Ops.push_back(TexHandle);
3812 Ops.push_back(N->getOperand(2));
3813 Ops.push_back(N->getOperand(3));
3814 Ops.push_back(N->getOperand(4));
3815 Ops.push_back(Chain);
3816 break;
3817 case NVPTXISD::Suld2DArrayV4I8Clamp:
3818 Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP;
3819 Ops.push_back(TexHandle);
3820 Ops.push_back(N->getOperand(2));
3821 Ops.push_back(N->getOperand(3));
3822 Ops.push_back(N->getOperand(4));
3823 Ops.push_back(Chain);
3824 break;
3825 case NVPTXISD::Suld2DArrayV4I16Clamp:
3826 Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP;
3827 Ops.push_back(TexHandle);
3828 Ops.push_back(N->getOperand(2));
3829 Ops.push_back(N->getOperand(3));
3830 Ops.push_back(N->getOperand(4));
3831 Ops.push_back(Chain);
3832 break;
3833 case NVPTXISD::Suld2DArrayV4I32Clamp:
3834 Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP;
3835 Ops.push_back(TexHandle);
3836 Ops.push_back(N->getOperand(2));
3837 Ops.push_back(N->getOperand(3));
3838 Ops.push_back(N->getOperand(4));
3839 Ops.push_back(Chain);
3840 break;
3841 case NVPTXISD::Suld3DI8Clamp:
3842 Opc = NVPTX::SULD_3D_I8_CLAMP;
3843 Ops.push_back(TexHandle);
3844 Ops.push_back(N->getOperand(2));
3845 Ops.push_back(N->getOperand(3));
3846 Ops.push_back(N->getOperand(4));
3847 Ops.push_back(Chain);
3848 break;
3849 case NVPTXISD::Suld3DI16Clamp:
3850 Opc = NVPTX::SULD_3D_I16_CLAMP;
3851 Ops.push_back(TexHandle);
3852 Ops.push_back(N->getOperand(2));
3853 Ops.push_back(N->getOperand(3));
3854 Ops.push_back(N->getOperand(4));
3855 Ops.push_back(Chain);
3856 break;
3857 case NVPTXISD::Suld3DI32Clamp:
3858 Opc = NVPTX::SULD_3D_I32_CLAMP;
3859 Ops.push_back(TexHandle);
3860 Ops.push_back(N->getOperand(2));
3861 Ops.push_back(N->getOperand(3));
3862 Ops.push_back(N->getOperand(4));
3863 Ops.push_back(Chain);
3864 break;
3865 case NVPTXISD::Suld3DI64Clamp:
3866 Opc = NVPTX::SULD_3D_I64_CLAMP;
3867 Ops.push_back(TexHandle);
3868 Ops.push_back(N->getOperand(2));
3869 Ops.push_back(N->getOperand(3));
3870 Ops.push_back(N->getOperand(4));
3871 Ops.push_back(Chain);
3872 break;
3873 case NVPTXISD::Suld3DV2I8Clamp:
3874 Opc = NVPTX::SULD_3D_V2I8_CLAMP;
3875 Ops.push_back(TexHandle);
3876 Ops.push_back(N->getOperand(2));
3877 Ops.push_back(N->getOperand(3));
3878 Ops.push_back(N->getOperand(4));
3879 Ops.push_back(Chain);
3880 break;
3881 case NVPTXISD::Suld3DV2I16Clamp:
3882 Opc = NVPTX::SULD_3D_V2I16_CLAMP;
3883 Ops.push_back(TexHandle);
3884 Ops.push_back(N->getOperand(2));
3885 Ops.push_back(N->getOperand(3));
3886 Ops.push_back(N->getOperand(4));
3887 Ops.push_back(Chain);
3888 break;
3889 case NVPTXISD::Suld3DV2I32Clamp:
3890 Opc = NVPTX::SULD_3D_V2I32_CLAMP;
3891 Ops.push_back(TexHandle);
3892 Ops.push_back(N->getOperand(2));
3893 Ops.push_back(N->getOperand(3));
3894 Ops.push_back(N->getOperand(4));
3895 Ops.push_back(Chain);
3896 break;
3897 case NVPTXISD::Suld3DV2I64Clamp:
3898 Opc = NVPTX::SULD_3D_V2I64_CLAMP;
3899 Ops.push_back(TexHandle);
3900 Ops.push_back(N->getOperand(2));
3901 Ops.push_back(N->getOperand(3));
3902 Ops.push_back(N->getOperand(4));
3903 Ops.push_back(Chain);
3904 break;
3905 case NVPTXISD::Suld3DV4I8Clamp:
3906 Opc = NVPTX::SULD_3D_V4I8_CLAMP;
3907 Ops.push_back(TexHandle);
3908 Ops.push_back(N->getOperand(2));
3909 Ops.push_back(N->getOperand(3));
3910 Ops.push_back(N->getOperand(4));
3911 Ops.push_back(Chain);
3912 break;
3913 case NVPTXISD::Suld3DV4I16Clamp:
3914 Opc = NVPTX::SULD_3D_V4I16_CLAMP;
3915 Ops.push_back(TexHandle);
3916 Ops.push_back(N->getOperand(2));
3917 Ops.push_back(N->getOperand(3));
3918 Ops.push_back(N->getOperand(4));
3919 Ops.push_back(Chain);
3920 break;
3921 case NVPTXISD::Suld3DV4I32Clamp:
3922 Opc = NVPTX::SULD_3D_V4I32_CLAMP;
3923 Ops.push_back(TexHandle);
3924 Ops.push_back(N->getOperand(2));
3925 Ops.push_back(N->getOperand(3));
3926 Ops.push_back(N->getOperand(4));
3927 Ops.push_back(Chain);
3928 break;
3929 case NVPTXISD::Suld1DI8Trap:
3930 Opc = NVPTX::SULD_1D_I8_TRAP;
3931 Ops.push_back(TexHandle);
3932 Ops.push_back(N->getOperand(2));
3933 Ops.push_back(Chain);
3934 break;
3935 case NVPTXISD::Suld1DI16Trap:
3936 Opc = NVPTX::SULD_1D_I16_TRAP;
3937 Ops.push_back(TexHandle);
3938 Ops.push_back(N->getOperand(2));
3939 Ops.push_back(Chain);
3940 break;
3941 case NVPTXISD::Suld1DI32Trap:
3942 Opc = NVPTX::SULD_1D_I32_TRAP;
3943 Ops.push_back(TexHandle);
3944 Ops.push_back(N->getOperand(2));
3945 Ops.push_back(Chain);
3946 break;
3947 case NVPTXISD::Suld1DI64Trap:
3948 Opc = NVPTX::SULD_1D_I64_TRAP;
3949 Ops.push_back(TexHandle);
3950 Ops.push_back(N->getOperand(2));
3951 Ops.push_back(Chain);
3952 break;
3953 case NVPTXISD::Suld1DV2I8Trap:
3954 Opc = NVPTX::SULD_1D_V2I8_TRAP;
3955 Ops.push_back(TexHandle);
3956 Ops.push_back(N->getOperand(2));
3957 Ops.push_back(Chain);
3958 break;
3959 case NVPTXISD::Suld1DV2I16Trap:
3960 Opc = NVPTX::SULD_1D_V2I16_TRAP;
3961 Ops.push_back(TexHandle);
3962 Ops.push_back(N->getOperand(2));
3963 Ops.push_back(Chain);
3964 break;
3965 case NVPTXISD::Suld1DV2I32Trap:
3966 Opc = NVPTX::SULD_1D_V2I32_TRAP;
3967 Ops.push_back(TexHandle);
3968 Ops.push_back(N->getOperand(2));
3969 Ops.push_back(Chain);
3970 break;
3971 case NVPTXISD::Suld1DV2I64Trap:
3972 Opc = NVPTX::SULD_1D_V2I64_TRAP;
3973 Ops.push_back(TexHandle);
3974 Ops.push_back(N->getOperand(2));
3975 Ops.push_back(Chain);
3976 break;
3977 case NVPTXISD::Suld1DV4I8Trap:
3978 Opc = NVPTX::SULD_1D_V4I8_TRAP;
3979 Ops.push_back(TexHandle);
3980 Ops.push_back(N->getOperand(2));
3981 Ops.push_back(Chain);
3982 break;
3983 case NVPTXISD::Suld1DV4I16Trap:
3984 Opc = NVPTX::SULD_1D_V4I16_TRAP;
3985 Ops.push_back(TexHandle);
3986 Ops.push_back(N->getOperand(2));
3987 Ops.push_back(Chain);
3988 break;
3989 case NVPTXISD::Suld1DV4I32Trap:
3990 Opc = NVPTX::SULD_1D_V4I32_TRAP;
3991 Ops.push_back(TexHandle);
3992 Ops.push_back(N->getOperand(2));
3993 Ops.push_back(Chain);
3994 break;
3995 case NVPTXISD::Suld1DArrayI8Trap:
3996 Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP;
3997 Ops.push_back(TexHandle);
3998 Ops.push_back(N->getOperand(2));
3999 Ops.push_back(N->getOperand(3));
4000 Ops.push_back(Chain);
4001 break;
4002 case NVPTXISD::Suld1DArrayI16Trap:
4003 Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP;
4004 Ops.push_back(TexHandle);
4005 Ops.push_back(N->getOperand(2));
4006 Ops.push_back(N->getOperand(3));
4007 Ops.push_back(Chain);
4008 break;
4009 case NVPTXISD::Suld1DArrayI32Trap:
4010 Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP;
4011 Ops.push_back(TexHandle);
4012 Ops.push_back(N->getOperand(2));
4013 Ops.push_back(N->getOperand(3));
4014 Ops.push_back(Chain);
4015 break;
4016 case NVPTXISD::Suld1DArrayI64Trap:
4017 Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP;
4018 Ops.push_back(TexHandle);
4019 Ops.push_back(N->getOperand(2));
4020 Ops.push_back(N->getOperand(3));
4021 Ops.push_back(Chain);
4022 break;
4023 case NVPTXISD::Suld1DArrayV2I8Trap:
4024 Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
4025 Ops.push_back(TexHandle);
4026 Ops.push_back(N->getOperand(2));
4027 Ops.push_back(N->getOperand(3));
4028 Ops.push_back(Chain);
4029 break;
4030 case NVPTXISD::Suld1DArrayV2I16Trap:
4031 Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP;
4032 Ops.push_back(TexHandle);
4033 Ops.push_back(N->getOperand(2));
4034 Ops.push_back(N->getOperand(3));
4035 Ops.push_back(Chain);
4036 break;
4037 case NVPTXISD::Suld1DArrayV2I32Trap:
4038 Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP;
4039 Ops.push_back(TexHandle);
4040 Ops.push_back(N->getOperand(2));
4041 Ops.push_back(N->getOperand(3));
4042 Ops.push_back(Chain);
4043 break;
4044 case NVPTXISD::Suld1DArrayV2I64Trap:
4045 Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP;
4046 Ops.push_back(TexHandle);
4047 Ops.push_back(N->getOperand(2));
4048 Ops.push_back(N->getOperand(3));
4049 Ops.push_back(Chain);
4050 break;
4051 case NVPTXISD::Suld1DArrayV4I8Trap:
4052 Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
4053 Ops.push_back(TexHandle);
4054 Ops.push_back(N->getOperand(2));
4055 Ops.push_back(N->getOperand(3));
4056 Ops.push_back(Chain);
4057 break;
4058 case NVPTXISD::Suld1DArrayV4I16Trap:
4059 Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP;
4060 Ops.push_back(TexHandle);
4061 Ops.push_back(N->getOperand(2));
4062 Ops.push_back(N->getOperand(3));
4063 Ops.push_back(Chain);
4064 break;
4065 case NVPTXISD::Suld1DArrayV4I32Trap:
4066 Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP;
4067 Ops.push_back(TexHandle);
4068 Ops.push_back(N->getOperand(2));
4069 Ops.push_back(N->getOperand(3));
4070 Ops.push_back(Chain);
4071 break;
4072 case NVPTXISD::Suld2DI8Trap:
4073 Opc = NVPTX::SULD_2D_I8_TRAP;
4074 Ops.push_back(TexHandle);
4075 Ops.push_back(N->getOperand(2));
4076 Ops.push_back(N->getOperand(3));
4077 Ops.push_back(Chain);
4078 break;
4079 case NVPTXISD::Suld2DI16Trap:
4080 Opc = NVPTX::SULD_2D_I16_TRAP;
4081 Ops.push_back(TexHandle);
4082 Ops.push_back(N->getOperand(2));
4083 Ops.push_back(N->getOperand(3));
4084 Ops.push_back(Chain);
4085 break;
4086 case NVPTXISD::Suld2DI32Trap:
4087 Opc = NVPTX::SULD_2D_I32_TRAP;
4088 Ops.push_back(TexHandle);
4089 Ops.push_back(N->getOperand(2));
4090 Ops.push_back(N->getOperand(3));
4091 Ops.push_back(Chain);
4092 break;
4093 case NVPTXISD::Suld2DI64Trap:
4094 Opc = NVPTX::SULD_2D_I64_TRAP;
4095 Ops.push_back(TexHandle);
4096 Ops.push_back(N->getOperand(2));
4097 Ops.push_back(N->getOperand(3));
4098 Ops.push_back(Chain);
4099 break;
4100 case NVPTXISD::Suld2DV2I8Trap:
4101 Opc = NVPTX::SULD_2D_V2I8_TRAP;
4102 Ops.push_back(TexHandle);
4103 Ops.push_back(N->getOperand(2));
4104 Ops.push_back(N->getOperand(3));
4105 Ops.push_back(Chain);
4106 break;
4107 case NVPTXISD::Suld2DV2I16Trap:
4108 Opc = NVPTX::SULD_2D_V2I16_TRAP;
4109 Ops.push_back(TexHandle);
4110 Ops.push_back(N->getOperand(2));
4111 Ops.push_back(N->getOperand(3));
4112 Ops.push_back(Chain);
4113 break;
4114 case NVPTXISD::Suld2DV2I32Trap:
4115 Opc = NVPTX::SULD_2D_V2I32_TRAP;
4116 Ops.push_back(TexHandle);
4117 Ops.push_back(N->getOperand(2));
4118 Ops.push_back(N->getOperand(3));
4119 Ops.push_back(Chain);
4120 break;
4121 case NVPTXISD::Suld2DV2I64Trap:
4122 Opc = NVPTX::SULD_2D_V2I64_TRAP;
4123 Ops.push_back(TexHandle);
4124 Ops.push_back(N->getOperand(2));
4125 Ops.push_back(N->getOperand(3));
4126 Ops.push_back(Chain);
4127 break;
4128 case NVPTXISD::Suld2DV4I8Trap:
4129 Opc = NVPTX::SULD_2D_V4I8_TRAP;
4130 Ops.push_back(TexHandle);
4131 Ops.push_back(N->getOperand(2));
4132 Ops.push_back(N->getOperand(3));
4133 Ops.push_back(Chain);
4134 break;
4135 case NVPTXISD::Suld2DV4I16Trap:
4136 Opc = NVPTX::SULD_2D_V4I16_TRAP;
4137 Ops.push_back(TexHandle);
4138 Ops.push_back(N->getOperand(2));
4139 Ops.push_back(N->getOperand(3));
4140 Ops.push_back(Chain);
4141 break;
4142 case NVPTXISD::Suld2DV4I32Trap:
4143 Opc = NVPTX::SULD_2D_V4I32_TRAP;
4144 Ops.push_back(TexHandle);
4145 Ops.push_back(N->getOperand(2));
4146 Ops.push_back(N->getOperand(3));
4147 Ops.push_back(Chain);
4148 break;
4149 case NVPTXISD::Suld2DArrayI8Trap:
4150 Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP;
4151 Ops.push_back(TexHandle);
4152 Ops.push_back(N->getOperand(2));
4153 Ops.push_back(N->getOperand(3));
4154 Ops.push_back(N->getOperand(4));
4155 Ops.push_back(Chain);
4156 break;
4157 case NVPTXISD::Suld2DArrayI16Trap:
4158 Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP;
4159 Ops.push_back(TexHandle);
4160 Ops.push_back(N->getOperand(2));
4161 Ops.push_back(N->getOperand(3));
4162 Ops.push_back(N->getOperand(4));
4163 Ops.push_back(Chain);
4164 break;
4165 case NVPTXISD::Suld2DArrayI32Trap:
4166 Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP;
4167 Ops.push_back(TexHandle);
4168 Ops.push_back(N->getOperand(2));
4169 Ops.push_back(N->getOperand(3));
4170 Ops.push_back(N->getOperand(4));
4171 Ops.push_back(Chain);
4172 break;
4173 case NVPTXISD::Suld2DArrayI64Trap:
4174 Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP;
4175 Ops.push_back(TexHandle);
4176 Ops.push_back(N->getOperand(2));
4177 Ops.push_back(N->getOperand(3));
4178 Ops.push_back(N->getOperand(4));
4179 Ops.push_back(Chain);
4180 break;
4181 case NVPTXISD::Suld2DArrayV2I8Trap:
4182 Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
4183 Ops.push_back(TexHandle);
4184 Ops.push_back(N->getOperand(2));
4185 Ops.push_back(N->getOperand(3));
4186 Ops.push_back(N->getOperand(4));
4187 Ops.push_back(Chain);
4188 break;
4189 case NVPTXISD::Suld2DArrayV2I16Trap:
4190 Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP;
4191 Ops.push_back(TexHandle);
4192 Ops.push_back(N->getOperand(2));
4193 Ops.push_back(N->getOperand(3));
4194 Ops.push_back(N->getOperand(4));
4195 Ops.push_back(Chain);
4196 break;
4197 case NVPTXISD::Suld2DArrayV2I32Trap:
4198 Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP;
4199 Ops.push_back(TexHandle);
4200 Ops.push_back(N->getOperand(2));
4201 Ops.push_back(N->getOperand(3));
4202 Ops.push_back(N->getOperand(4));
4203 Ops.push_back(Chain);
4204 break;
4205 case NVPTXISD::Suld2DArrayV2I64Trap:
4206 Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP;
4207 Ops.push_back(TexHandle);
4208 Ops.push_back(N->getOperand(2));
4209 Ops.push_back(N->getOperand(3));
4210 Ops.push_back(N->getOperand(4));
4211 Ops.push_back(Chain);
4212 break;
4213 case NVPTXISD::Suld2DArrayV4I8Trap:
4214 Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
4215 Ops.push_back(TexHandle);
4216 Ops.push_back(N->getOperand(2));
4217 Ops.push_back(N->getOperand(3));
4218 Ops.push_back(N->getOperand(4));
4219 Ops.push_back(Chain);
4220 break;
4221 case NVPTXISD::Suld2DArrayV4I16Trap:
4222 Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP;
4223 Ops.push_back(TexHandle);
4224 Ops.push_back(N->getOperand(2));
4225 Ops.push_back(N->getOperand(3));
4226 Ops.push_back(N->getOperand(4));
4227 Ops.push_back(Chain);
4228 break;
4229 case NVPTXISD::Suld2DArrayV4I32Trap:
4230 Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP;
4231 Ops.push_back(TexHandle);
4232 Ops.push_back(N->getOperand(2));
4233 Ops.push_back(N->getOperand(3));
4234 Ops.push_back(N->getOperand(4));
4235 Ops.push_back(Chain);
4236 break;
4237 case NVPTXISD::Suld3DI8Trap:
4238 Opc = NVPTX::SULD_3D_I8_TRAP;
4239 Ops.push_back(TexHandle);
4240 Ops.push_back(N->getOperand(2));
4241 Ops.push_back(N->getOperand(3));
4242 Ops.push_back(N->getOperand(4));
4243 Ops.push_back(Chain);
4244 break;
4245 case NVPTXISD::Suld3DI16Trap:
4246 Opc = NVPTX::SULD_3D_I16_TRAP;
4247 Ops.push_back(TexHandle);
4248 Ops.push_back(N->getOperand(2));
4249 Ops.push_back(N->getOperand(3));
4250 Ops.push_back(N->getOperand(4));
4251 Ops.push_back(Chain);
4252 break;
4253 case NVPTXISD::Suld3DI32Trap:
4254 Opc = NVPTX::SULD_3D_I32_TRAP;
4255 Ops.push_back(TexHandle);
4256 Ops.push_back(N->getOperand(2));
4257 Ops.push_back(N->getOperand(3));
4258 Ops.push_back(N->getOperand(4));
4259 Ops.push_back(Chain);
4260 break;
4261 case NVPTXISD::Suld3DI64Trap:
4262 Opc = NVPTX::SULD_3D_I64_TRAP;
4263 Ops.push_back(TexHandle);
4264 Ops.push_back(N->getOperand(2));
4265 Ops.push_back(N->getOperand(3));
4266 Ops.push_back(N->getOperand(4));
4267 Ops.push_back(Chain);
4268 break;
4269 case NVPTXISD::Suld3DV2I8Trap:
4270 Opc = NVPTX::SULD_3D_V2I8_TRAP;
4271 Ops.push_back(TexHandle);
4272 Ops.push_back(N->getOperand(2));
4273 Ops.push_back(N->getOperand(3));
4274 Ops.push_back(N->getOperand(4));
4275 Ops.push_back(Chain);
4276 break;
4277 case NVPTXISD::Suld3DV2I16Trap:
4278 Opc = NVPTX::SULD_3D_V2I16_TRAP;
4279 Ops.push_back(TexHandle);
4280 Ops.push_back(N->getOperand(2));
4281 Ops.push_back(N->getOperand(3));
4282 Ops.push_back(N->getOperand(4));
4283 Ops.push_back(Chain);
4284 break;
4285 case NVPTXISD::Suld3DV2I32Trap:
4286 Opc = NVPTX::SULD_3D_V2I32_TRAP;
4287 Ops.push_back(TexHandle);
4288 Ops.push_back(N->getOperand(2));
4289 Ops.push_back(N->getOperand(3));
4290 Ops.push_back(N->getOperand(4));
4291 Ops.push_back(Chain);
4292 break;
4293 case NVPTXISD::Suld3DV2I64Trap:
4294 Opc = NVPTX::SULD_3D_V2I64_TRAP;
4295 Ops.push_back(TexHandle);
4296 Ops.push_back(N->getOperand(2));
4297 Ops.push_back(N->getOperand(3));
4298 Ops.push_back(N->getOperand(4));
4299 Ops.push_back(Chain);
4300 break;
4301 case NVPTXISD::Suld3DV4I8Trap:
4302 Opc = NVPTX::SULD_3D_V4I8_TRAP;
4303 Ops.push_back(TexHandle);
4304 Ops.push_back(N->getOperand(2));
4305 Ops.push_back(N->getOperand(3));
4306 Ops.push_back(N->getOperand(4));
4307 Ops.push_back(Chain);
4308 break;
4309 case NVPTXISD::Suld3DV4I16Trap:
4310 Opc = NVPTX::SULD_3D_V4I16_TRAP;
4311 Ops.push_back(TexHandle);
4312 Ops.push_back(N->getOperand(2));
4313 Ops.push_back(N->getOperand(3));
4314 Ops.push_back(N->getOperand(4));
4315 Ops.push_back(Chain);
4316 break;
4317 case NVPTXISD::Suld3DV4I32Trap:
4318 Opc = NVPTX::SULD_3D_V4I32_TRAP;
4319 Ops.push_back(TexHandle);
4320 Ops.push_back(N->getOperand(2));
4321 Ops.push_back(N->getOperand(3));
4322 Ops.push_back(N->getOperand(4));
4323 Ops.push_back(Chain);
4324 break;
4325 case NVPTXISD::Suld1DI8Zero:
4326 Opc = NVPTX::SULD_1D_I8_ZERO;
4327 Ops.push_back(TexHandle);
4328 Ops.push_back(N->getOperand(2));
4329 Ops.push_back(Chain);
4330 break;
4331 case NVPTXISD::Suld1DI16Zero:
4332 Opc = NVPTX::SULD_1D_I16_ZERO;
4333 Ops.push_back(TexHandle);
4334 Ops.push_back(N->getOperand(2));
4335 Ops.push_back(Chain);
4336 break;
4337 case NVPTXISD::Suld1DI32Zero:
4338 Opc = NVPTX::SULD_1D_I32_ZERO;
4339 Ops.push_back(TexHandle);
4340 Ops.push_back(N->getOperand(2));
4341 Ops.push_back(Chain);
4342 break;
4343 case NVPTXISD::Suld1DI64Zero:
4344 Opc = NVPTX::SULD_1D_I64_ZERO;
4345 Ops.push_back(TexHandle);
4346 Ops.push_back(N->getOperand(2));
4347 Ops.push_back(Chain);
4348 break;
4349 case NVPTXISD::Suld1DV2I8Zero:
4350 Opc = NVPTX::SULD_1D_V2I8_ZERO;
4351 Ops.push_back(TexHandle);
4352 Ops.push_back(N->getOperand(2));
4353 Ops.push_back(Chain);
4354 break;
4355 case NVPTXISD::Suld1DV2I16Zero:
4356 Opc = NVPTX::SULD_1D_V2I16_ZERO;
4357 Ops.push_back(TexHandle);
4358 Ops.push_back(N->getOperand(2));
4359 Ops.push_back(Chain);
4360 break;
4361 case NVPTXISD::Suld1DV2I32Zero:
4362 Opc = NVPTX::SULD_1D_V2I32_ZERO;
4363 Ops.push_back(TexHandle);
4364 Ops.push_back(N->getOperand(2));
4365 Ops.push_back(Chain);
4366 break;
4367 case NVPTXISD::Suld1DV2I64Zero:
4368 Opc = NVPTX::SULD_1D_V2I64_ZERO;
4369 Ops.push_back(TexHandle);
4370 Ops.push_back(N->getOperand(2));
4371 Ops.push_back(Chain);
4372 break;
4373 case NVPTXISD::Suld1DV4I8Zero:
4374 Opc = NVPTX::SULD_1D_V4I8_ZERO;
4375 Ops.push_back(TexHandle);
4376 Ops.push_back(N->getOperand(2));
4377 Ops.push_back(Chain);
4378 break;
4379 case NVPTXISD::Suld1DV4I16Zero:
4380 Opc = NVPTX::SULD_1D_V4I16_ZERO;
4381 Ops.push_back(TexHandle);
4382 Ops.push_back(N->getOperand(2));
4383 Ops.push_back(Chain);
4384 break;
4385 case NVPTXISD::Suld1DV4I32Zero:
4386 Opc = NVPTX::SULD_1D_V4I32_ZERO;
4387 Ops.push_back(TexHandle);
4388 Ops.push_back(N->getOperand(2));
4389 Ops.push_back(Chain);
4390 break;
4391 case NVPTXISD::Suld1DArrayI8Zero:
4392 Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO;
4393 Ops.push_back(TexHandle);
4394 Ops.push_back(N->getOperand(2));
4395 Ops.push_back(N->getOperand(3));
4396 Ops.push_back(Chain);
4397 break;
4398 case NVPTXISD::Suld1DArrayI16Zero:
4399 Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO;
4400 Ops.push_back(TexHandle);
4401 Ops.push_back(N->getOperand(2));
4402 Ops.push_back(N->getOperand(3));
4403 Ops.push_back(Chain);
4404 break;
4405 case NVPTXISD::Suld1DArrayI32Zero:
4406 Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO;
4407 Ops.push_back(TexHandle);
4408 Ops.push_back(N->getOperand(2));
4409 Ops.push_back(N->getOperand(3));
4410 Ops.push_back(Chain);
4411 break;
4412 case NVPTXISD::Suld1DArrayI64Zero:
4413 Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO;
4414 Ops.push_back(TexHandle);
4415 Ops.push_back(N->getOperand(2));
4416 Ops.push_back(N->getOperand(3));
4417 Ops.push_back(Chain);
4418 break;
4419 case NVPTXISD::Suld1DArrayV2I8Zero:
4420 Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO;
4421 Ops.push_back(TexHandle);
4422 Ops.push_back(N->getOperand(2));
4423 Ops.push_back(N->getOperand(3));
4424 Ops.push_back(Chain);
4425 break;
4426 case NVPTXISD::Suld1DArrayV2I16Zero:
4427 Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO;
4428 Ops.push_back(TexHandle);
4429 Ops.push_back(N->getOperand(2));
4430 Ops.push_back(N->getOperand(3));
4431 Ops.push_back(Chain);
4432 break;
4433 case NVPTXISD::Suld1DArrayV2I32Zero:
4434 Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO;
4435 Ops.push_back(TexHandle);
4436 Ops.push_back(N->getOperand(2));
4437 Ops.push_back(N->getOperand(3));
4438 Ops.push_back(Chain);
4439 break;
4440 case NVPTXISD::Suld1DArrayV2I64Zero:
4441 Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO;
4442 Ops.push_back(TexHandle);
4443 Ops.push_back(N->getOperand(2));
4444 Ops.push_back(N->getOperand(3));
4445 Ops.push_back(Chain);
4446 break;
4447 case NVPTXISD::Suld1DArrayV4I8Zero:
4448 Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO;
4449 Ops.push_back(TexHandle);
4450 Ops.push_back(N->getOperand(2));
4451 Ops.push_back(N->getOperand(3));
4452 Ops.push_back(Chain);
4453 break;
4454 case NVPTXISD::Suld1DArrayV4I16Zero:
4455 Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO;
4456 Ops.push_back(TexHandle);
4457 Ops.push_back(N->getOperand(2));
4458 Ops.push_back(N->getOperand(3));
4459 Ops.push_back(Chain);
4460 break;
4461 case NVPTXISD::Suld1DArrayV4I32Zero:
4462 Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO;
4463 Ops.push_back(TexHandle);
4464 Ops.push_back(N->getOperand(2));
4465 Ops.push_back(N->getOperand(3));
4466 Ops.push_back(Chain);
4467 break;
4468 case NVPTXISD::Suld2DI8Zero:
4469 Opc = NVPTX::SULD_2D_I8_ZERO;
4470 Ops.push_back(TexHandle);
4471 Ops.push_back(N->getOperand(2));
4472 Ops.push_back(N->getOperand(3));
4473 Ops.push_back(Chain);
4474 break;
4475 case NVPTXISD::Suld2DI16Zero:
4476 Opc = NVPTX::SULD_2D_I16_ZERO;
4477 Ops.push_back(TexHandle);
4478 Ops.push_back(N->getOperand(2));
4479 Ops.push_back(N->getOperand(3));
4480 Ops.push_back(Chain);
4481 break;
4482 case NVPTXISD::Suld2DI32Zero:
4483 Opc = NVPTX::SULD_2D_I32_ZERO;
4484 Ops.push_back(TexHandle);
4485 Ops.push_back(N->getOperand(2));
4486 Ops.push_back(N->getOperand(3));
4487 Ops.push_back(Chain);
4488 break;
4489 case NVPTXISD::Suld2DI64Zero:
4490 Opc = NVPTX::SULD_2D_I64_ZERO;
4491 Ops.push_back(TexHandle);
4492 Ops.push_back(N->getOperand(2));
4493 Ops.push_back(N->getOperand(3));
4494 Ops.push_back(Chain);
4495 break;
4496 case NVPTXISD::Suld2DV2I8Zero:
4497 Opc = NVPTX::SULD_2D_V2I8_ZERO;
4498 Ops.push_back(TexHandle);
4499 Ops.push_back(N->getOperand(2));
4500 Ops.push_back(N->getOperand(3));
4501 Ops.push_back(Chain);
4502 break;
4503 case NVPTXISD::Suld2DV2I16Zero:
4504 Opc = NVPTX::SULD_2D_V2I16_ZERO;
4505 Ops.push_back(TexHandle);
4506 Ops.push_back(N->getOperand(2));
4507 Ops.push_back(N->getOperand(3));
4508 Ops.push_back(Chain);
4509 break;
4510 case NVPTXISD::Suld2DV2I32Zero:
4511 Opc = NVPTX::SULD_2D_V2I32_ZERO;
4512 Ops.push_back(TexHandle);
4513 Ops.push_back(N->getOperand(2));
4514 Ops.push_back(N->getOperand(3));
4515 Ops.push_back(Chain);
4516 break;
4517 case NVPTXISD::Suld2DV2I64Zero:
4518 Opc = NVPTX::SULD_2D_V2I64_ZERO;
4519 Ops.push_back(TexHandle);
4520 Ops.push_back(N->getOperand(2));
4521 Ops.push_back(N->getOperand(3));
4522 Ops.push_back(Chain);
4523 break;
4524 case NVPTXISD::Suld2DV4I8Zero:
4525 Opc = NVPTX::SULD_2D_V4I8_ZERO;
4526 Ops.push_back(TexHandle);
4527 Ops.push_back(N->getOperand(2));
4528 Ops.push_back(N->getOperand(3));
4529 Ops.push_back(Chain);
4530 break;
4531 case NVPTXISD::Suld2DV4I16Zero:
4532 Opc = NVPTX::SULD_2D_V4I16_ZERO;
4533 Ops.push_back(TexHandle);
4534 Ops.push_back(N->getOperand(2));
4535 Ops.push_back(N->getOperand(3));
4536 Ops.push_back(Chain);
4537 break;
4538 case NVPTXISD::Suld2DV4I32Zero:
4539 Opc = NVPTX::SULD_2D_V4I32_ZERO;
4540 Ops.push_back(TexHandle);
4541 Ops.push_back(N->getOperand(2));
4542 Ops.push_back(N->getOperand(3));
4543 Ops.push_back(Chain);
4544 break;
4545 case NVPTXISD::Suld2DArrayI8Zero:
4546 Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO;
4547 Ops.push_back(TexHandle);
4548 Ops.push_back(N->getOperand(2));
4549 Ops.push_back(N->getOperand(3));
4550 Ops.push_back(N->getOperand(4));
4551 Ops.push_back(Chain);
4552 break;
4553 case NVPTXISD::Suld2DArrayI16Zero:
4554 Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO;
4555 Ops.push_back(TexHandle);
4556 Ops.push_back(N->getOperand(2));
4557 Ops.push_back(N->getOperand(3));
4558 Ops.push_back(N->getOperand(4));
4559 Ops.push_back(Chain);
4560 break;
4561 case NVPTXISD::Suld2DArrayI32Zero:
4562 Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO;
4563 Ops.push_back(TexHandle);
4564 Ops.push_back(N->getOperand(2));
4565 Ops.push_back(N->getOperand(3));
4566 Ops.push_back(N->getOperand(4));
4567 Ops.push_back(Chain);
4568 break;
4569 case NVPTXISD::Suld2DArrayI64Zero:
4570 Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO;
4571 Ops.push_back(TexHandle);
4572 Ops.push_back(N->getOperand(2));
4573 Ops.push_back(N->getOperand(3));
4574 Ops.push_back(N->getOperand(4));
4575 Ops.push_back(Chain);
4576 break;
4577 case NVPTXISD::Suld2DArrayV2I8Zero:
4578 Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO;
4579 Ops.push_back(TexHandle);
4580 Ops.push_back(N->getOperand(2));
4581 Ops.push_back(N->getOperand(3));
4582 Ops.push_back(N->getOperand(4));
4583 Ops.push_back(Chain);
4584 break;
4585 case NVPTXISD::Suld2DArrayV2I16Zero:
4586 Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO;
4587 Ops.push_back(TexHandle);
4588 Ops.push_back(N->getOperand(2));
4589 Ops.push_back(N->getOperand(3));
4590 Ops.push_back(N->getOperand(4));
4591 Ops.push_back(Chain);
4592 break;
4593 case NVPTXISD::Suld2DArrayV2I32Zero:
4594 Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO;
4595 Ops.push_back(TexHandle);
4596 Ops.push_back(N->getOperand(2));
4597 Ops.push_back(N->getOperand(3));
4598 Ops.push_back(N->getOperand(4));
4599 Ops.push_back(Chain);
4600 break;
4601 case NVPTXISD::Suld2DArrayV2I64Zero:
4602 Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO;
4603 Ops.push_back(TexHandle);
4604 Ops.push_back(N->getOperand(2));
4605 Ops.push_back(N->getOperand(3));
4606 Ops.push_back(N->getOperand(4));
4607 Ops.push_back(Chain);
4608 break;
4609 case NVPTXISD::Suld2DArrayV4I8Zero:
4610 Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO;
4611 Ops.push_back(TexHandle);
4612 Ops.push_back(N->getOperand(2));
4613 Ops.push_back(N->getOperand(3));
4614 Ops.push_back(N->getOperand(4));
4615 Ops.push_back(Chain);
4616 break;
4617 case NVPTXISD::Suld2DArrayV4I16Zero:
4618 Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO;
4619 Ops.push_back(TexHandle);
4620 Ops.push_back(N->getOperand(2));
4621 Ops.push_back(N->getOperand(3));
4622 Ops.push_back(N->getOperand(4));
4623 Ops.push_back(Chain);
4624 break;
4625 case NVPTXISD::Suld2DArrayV4I32Zero:
4626 Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO;
4627 Ops.push_back(TexHandle);
4628 Ops.push_back(N->getOperand(2));
4629 Ops.push_back(N->getOperand(3));
4630 Ops.push_back(N->getOperand(4));
4631 Ops.push_back(Chain);
4632 break;
4633 case NVPTXISD::Suld3DI8Zero:
4634 Opc = NVPTX::SULD_3D_I8_ZERO;
4635 Ops.push_back(TexHandle);
4636 Ops.push_back(N->getOperand(2));
4637 Ops.push_back(N->getOperand(3));
4638 Ops.push_back(N->getOperand(4));
4639 Ops.push_back(Chain);
4640 break;
4641 case NVPTXISD::Suld3DI16Zero:
4642 Opc = NVPTX::SULD_3D_I16_ZERO;
4643 Ops.push_back(TexHandle);
4644 Ops.push_back(N->getOperand(2));
4645 Ops.push_back(N->getOperand(3));
4646 Ops.push_back(N->getOperand(4));
4647 Ops.push_back(Chain);
4648 break;
4649 case NVPTXISD::Suld3DI32Zero:
4650 Opc = NVPTX::SULD_3D_I32_ZERO;
4651 Ops.push_back(TexHandle);
4652 Ops.push_back(N->getOperand(2));
4653 Ops.push_back(N->getOperand(3));
4654 Ops.push_back(N->getOperand(4));
4655 Ops.push_back(Chain);
4656 break;
4657 case NVPTXISD::Suld3DI64Zero:
4658 Opc = NVPTX::SULD_3D_I64_ZERO;
4659 Ops.push_back(TexHandle);
4660 Ops.push_back(N->getOperand(2));
4661 Ops.push_back(N->getOperand(3));
4662 Ops.push_back(N->getOperand(4));
4663 Ops.push_back(Chain);
4664 break;
4665 case NVPTXISD::Suld3DV2I8Zero:
4666 Opc = NVPTX::SULD_3D_V2I8_ZERO;
4667 Ops.push_back(TexHandle);
4668 Ops.push_back(N->getOperand(2));
4669 Ops.push_back(N->getOperand(3));
4670 Ops.push_back(N->getOperand(4));
4671 Ops.push_back(Chain);
4672 break;
4673 case NVPTXISD::Suld3DV2I16Zero:
4674 Opc = NVPTX::SULD_3D_V2I16_ZERO;
4675 Ops.push_back(TexHandle);
4676 Ops.push_back(N->getOperand(2));
4677 Ops.push_back(N->getOperand(3));
4678 Ops.push_back(N->getOperand(4));
4679 Ops.push_back(Chain);
4680 break;
4681 case NVPTXISD::Suld3DV2I32Zero:
4682 Opc = NVPTX::SULD_3D_V2I32_ZERO;
4683 Ops.push_back(TexHandle);
4684 Ops.push_back(N->getOperand(2));
4685 Ops.push_back(N->getOperand(3));
4686 Ops.push_back(N->getOperand(4));
4687 Ops.push_back(Chain);
4688 break;
4689 case NVPTXISD::Suld3DV2I64Zero:
4690 Opc = NVPTX::SULD_3D_V2I64_ZERO;
4691 Ops.push_back(TexHandle);
4692 Ops.push_back(N->getOperand(2));
4693 Ops.push_back(N->getOperand(3));
4694 Ops.push_back(N->getOperand(4));
4695 Ops.push_back(Chain);
4696 break;
4697 case NVPTXISD::Suld3DV4I8Zero:
4698 Opc = NVPTX::SULD_3D_V4I8_ZERO;
4699 Ops.push_back(TexHandle);
4700 Ops.push_back(N->getOperand(2));
4701 Ops.push_back(N->getOperand(3));
4702 Ops.push_back(N->getOperand(4));
4703 Ops.push_back(Chain);
4704 break;
4705 case NVPTXISD::Suld3DV4I16Zero:
4706 Opc = NVPTX::SULD_3D_V4I16_ZERO;
4707 Ops.push_back(TexHandle);
4708 Ops.push_back(N->getOperand(2));
4709 Ops.push_back(N->getOperand(3));
4710 Ops.push_back(N->getOperand(4));
4711 Ops.push_back(Chain);
4712 break;
4713 case NVPTXISD::Suld3DV4I32Zero:
4714 Opc = NVPTX::SULD_3D_V4I32_ZERO;
4715 Ops.push_back(TexHandle);
4716 Ops.push_back(N->getOperand(2));
4717 Ops.push_back(N->getOperand(3));
4718 Ops.push_back(N->getOperand(4));
4719 Ops.push_back(Chain);
4720 break;
4721 }
4722 Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4723 return Ret;
4724 }
4725
4726
4727 /// SelectBFE - Look for instruction sequences that can be made more efficient
4728 /// by using the 'bfe' (bit-field extract) PTX instruction
SelectBFE(SDNode * N)4729 SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) {
4730 SDValue LHS = N->getOperand(0);
4731 SDValue RHS = N->getOperand(1);
4732 SDValue Len;
4733 SDValue Start;
4734 SDValue Val;
4735 bool IsSigned = false;
4736
4737 if (N->getOpcode() == ISD::AND) {
4738 // Canonicalize the operands
4739 // We want 'and %val, %mask'
4740 if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
4741 std::swap(LHS, RHS);
4742 }
4743
4744 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS);
4745 if (!Mask) {
4746 // We need a constant mask on the RHS of the AND
4747 return NULL;
4748 }
4749
4750 // Extract the mask bits
4751 uint64_t MaskVal = Mask->getZExtValue();
4752 if (!isMask_64(MaskVal)) {
4753 // We *could* handle shifted masks here, but doing so would require an
4754 // 'and' operation to fix up the low-order bits so we would trade
4755 // shr+and for bfe+and, which has the same throughput
4756 return NULL;
4757 }
4758
4759 // How many bits are in our mask?
4760 uint64_t NumBits = countTrailingOnes(MaskVal);
4761 Len = CurDAG->getTargetConstant(NumBits, MVT::i32);
4762
4763 if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
4764 // We have a 'srl/and' pair, extract the effective start bit and length
4765 Val = LHS.getNode()->getOperand(0);
4766 Start = LHS.getNode()->getOperand(1);
4767 ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
4768 if (StartConst) {
4769 uint64_t StartVal = StartConst->getZExtValue();
4770 // How many "good" bits do we have left? "good" is defined here as bits
4771 // that exist in the original value, not shifted in.
4772 uint64_t GoodBits = Start.getValueType().getSizeInBits() - StartVal;
4773 if (NumBits > GoodBits) {
4774 // Do not handle the case where bits have been shifted in. In theory
4775 // we could handle this, but the cost is likely higher than just
4776 // emitting the srl/and pair.
4777 return NULL;
4778 }
4779 Start = CurDAG->getTargetConstant(StartVal, MVT::i32);
4780 } else {
4781 // Do not handle the case where the shift amount (can be zero if no srl
4782 // was found) is not constant. We could handle this case, but it would
4783 // require run-time logic that would be more expensive than just
4784 // emitting the srl/and pair.
4785 return NULL;
4786 }
4787 } else {
4788 // Do not handle the case where the LHS of the and is not a shift. While
4789 // it would be trivial to handle this case, it would just transform
4790 // 'and' -> 'bfe', but 'and' has higher-throughput.
4791 return NULL;
4792 }
4793 } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
4794 if (LHS->getOpcode() == ISD::AND) {
4795 ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
4796 if (!ShiftCnst) {
4797 // Shift amount must be constant
4798 return NULL;
4799 }
4800
4801 uint64_t ShiftAmt = ShiftCnst->getZExtValue();
4802
4803 SDValue AndLHS = LHS->getOperand(0);
4804 SDValue AndRHS = LHS->getOperand(1);
4805
4806 // Canonicalize the AND to have the mask on the RHS
4807 if (isa<ConstantSDNode>(AndLHS)) {
4808 std::swap(AndLHS, AndRHS);
4809 }
4810
4811 ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
4812 if (!MaskCnst) {
4813 // Mask must be constant
4814 return NULL;
4815 }
4816
4817 uint64_t MaskVal = MaskCnst->getZExtValue();
4818 uint64_t NumZeros;
4819 uint64_t NumBits;
4820 if (isMask_64(MaskVal)) {
4821 NumZeros = 0;
4822 // The number of bits in the result bitfield will be the number of
4823 // trailing ones (the AND) minus the number of bits we shift off
4824 NumBits = countTrailingOnes(MaskVal) - ShiftAmt;
4825 } else if (isShiftedMask_64(MaskVal)) {
4826 NumZeros = countTrailingZeros(MaskVal);
4827 unsigned NumOnes = countTrailingOnes(MaskVal >> NumZeros);
4828 // The number of bits in the result bitfield will be the number of
4829 // trailing zeros plus the number of set bits in the mask minus the
4830 // number of bits we shift off
4831 NumBits = NumZeros + NumOnes - ShiftAmt;
4832 } else {
4833 // This is not a mask we can handle
4834 return NULL;
4835 }
4836
4837 if (ShiftAmt < NumZeros) {
4838 // Handling this case would require extra logic that would make this
4839 // transformation non-profitable
4840 return NULL;
4841 }
4842
4843 Val = AndLHS;
4844 Start = CurDAG->getTargetConstant(ShiftAmt, MVT::i32);
4845 Len = CurDAG->getTargetConstant(NumBits, MVT::i32);
4846 } else if (LHS->getOpcode() == ISD::SHL) {
4847 // Here, we have a pattern like:
4848 //
4849 // (sra (shl val, NN), MM)
4850 // or
4851 // (srl (shl val, NN), MM)
4852 //
4853 // If MM >= NN, we can efficiently optimize this with bfe
4854 Val = LHS->getOperand(0);
4855
4856 SDValue ShlRHS = LHS->getOperand(1);
4857 ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
4858 if (!ShlCnst) {
4859 // Shift amount must be constant
4860 return NULL;
4861 }
4862 uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
4863
4864 SDValue ShrRHS = RHS;
4865 ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
4866 if (!ShrCnst) {
4867 // Shift amount must be constant
4868 return NULL;
4869 }
4870 uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
4871
4872 // To avoid extra codegen and be profitable, we need Outer >= Inner
4873 if (OuterShiftAmt < InnerShiftAmt) {
4874 return NULL;
4875 }
4876
4877 // If the outer shift is more than the type size, we have no bitfield to
4878 // extract (since we also check that the inner shift is <= the outer shift
4879 // then this also implies that the inner shift is < the type size)
4880 if (OuterShiftAmt >= Val.getValueType().getSizeInBits()) {
4881 return NULL;
4882 }
4883
4884 Start =
4885 CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, MVT::i32);
4886 Len =
4887 CurDAG->getTargetConstant(Val.getValueType().getSizeInBits() -
4888 OuterShiftAmt, MVT::i32);
4889
4890 if (N->getOpcode() == ISD::SRA) {
4891 // If we have a arithmetic right shift, we need to use the signed bfe
4892 // variant
4893 IsSigned = true;
4894 }
4895 } else {
4896 // No can do...
4897 return NULL;
4898 }
4899 } else {
4900 // No can do...
4901 return NULL;
4902 }
4903
4904
4905 unsigned Opc;
4906 // For the BFE operations we form here from "and" and "srl", always use the
4907 // unsigned variants.
4908 if (Val.getValueType() == MVT::i32) {
4909 if (IsSigned) {
4910 Opc = NVPTX::BFE_S32rii;
4911 } else {
4912 Opc = NVPTX::BFE_U32rii;
4913 }
4914 } else if (Val.getValueType() == MVT::i64) {
4915 if (IsSigned) {
4916 Opc = NVPTX::BFE_S64rii;
4917 } else {
4918 Opc = NVPTX::BFE_U64rii;
4919 }
4920 } else {
4921 // We cannot handle this type
4922 return NULL;
4923 }
4924
4925 SDValue Ops[] = {
4926 Val, Start, Len
4927 };
4928
4929 SDNode *Ret =
4930 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4931
4932 return Ret;
4933 }
4934
4935 // SelectDirectAddr - Match a direct address for DAG.
4936 // A direct address could be a globaladdress or externalsymbol.
SelectDirectAddr(SDValue N,SDValue & Address)4937 bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
4938 // Return true if TGA or ES.
4939 if (N.getOpcode() == ISD::TargetGlobalAddress ||
4940 N.getOpcode() == ISD::TargetExternalSymbol) {
4941 Address = N;
4942 return true;
4943 }
4944 if (N.getOpcode() == NVPTXISD::Wrapper) {
4945 Address = N.getOperand(0);
4946 return true;
4947 }
4948 if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
4949 unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
4950 if (IID == Intrinsic::nvvm_ptr_gen_to_param)
4951 if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam)
4952 return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
4953 }
4954 return false;
4955 }
4956
4957 // symbol+offset
SelectADDRsi_imp(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset,MVT mvt)4958 bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
4959 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
4960 if (Addr.getOpcode() == ISD::ADD) {
4961 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
4962 SDValue base = Addr.getOperand(0);
4963 if (SelectDirectAddr(base, Base)) {
4964 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
4965 return true;
4966 }
4967 }
4968 }
4969 return false;
4970 }
4971
4972 // symbol+offset
SelectADDRsi(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset)4973 bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
4974 SDValue &Base, SDValue &Offset) {
4975 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
4976 }
4977
4978 // symbol+offset
SelectADDRsi64(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset)4979 bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
4980 SDValue &Base, SDValue &Offset) {
4981 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
4982 }
4983
4984 // register+offset
SelectADDRri_imp(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset,MVT mvt)4985 bool NVPTXDAGToDAGISel::SelectADDRri_imp(
4986 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
4987 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
4988 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
4989 Offset = CurDAG->getTargetConstant(0, mvt);
4990 return true;
4991 }
4992 if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
4993 Addr.getOpcode() == ISD::TargetGlobalAddress)
4994 return false; // direct calls.
4995
4996 if (Addr.getOpcode() == ISD::ADD) {
4997 if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
4998 return false;
4999 }
5000 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
5001 if (FrameIndexSDNode *FIN =
5002 dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
5003 // Constant offset from frame ref.
5004 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
5005 else
5006 Base = Addr.getOperand(0);
5007 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
5008 return true;
5009 }
5010 }
5011 return false;
5012 }
5013
5014 // register+offset
SelectADDRri(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset)5015 bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
5016 SDValue &Base, SDValue &Offset) {
5017 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
5018 }
5019
5020 // register+offset
SelectADDRri64(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset)5021 bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
5022 SDValue &Base, SDValue &Offset) {
5023 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
5024 }
5025
ChkMemSDNodeAddressSpace(SDNode * N,unsigned int spN) const5026 bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
5027 unsigned int spN) const {
5028 const Value *Src = nullptr;
5029 if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
5030 if (spN == 0 && mN->getMemOperand()->getPseudoValue())
5031 return true;
5032 Src = mN->getMemOperand()->getValue();
5033 }
5034 if (!Src)
5035 return false;
5036 if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
5037 return (PT->getAddressSpace() == spN);
5038 return false;
5039 }
5040
5041 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
5042 /// inline asm expressions.
SelectInlineAsmMemoryOperand(const SDValue & Op,unsigned ConstraintID,std::vector<SDValue> & OutOps)5043 bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
5044 const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
5045 SDValue Op0, Op1;
5046 switch (ConstraintID) {
5047 default:
5048 return true;
5049 case InlineAsm::Constraint_m: // memory
5050 if (SelectDirectAddr(Op, Op0)) {
5051 OutOps.push_back(Op0);
5052 OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
5053 return false;
5054 }
5055 if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
5056 OutOps.push_back(Op0);
5057 OutOps.push_back(Op1);
5058 return false;
5059 }
5060 break;
5061 }
5062 return true;
5063 }
5064