1 /*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "tgsi/tgsi_build.h"
24 #include "tgsi/tgsi_dump.h"
25 #include "tgsi/tgsi_scan.h"
26 #include "tgsi/tgsi_util.h"
27
28 #include <set>
29
30 #include "codegen/nv50_ir.h"
31 #include "codegen/nv50_ir_util.h"
32 #include "codegen/nv50_ir_build_util.h"
33
34 namespace tgsi {
35
36 class Source;
37
38 static nv50_ir::operation translateOpcode(uint opcode);
39 static nv50_ir::DataFile translateFile(uint file);
40 static nv50_ir::TexTarget translateTexture(uint texTarg);
41 static nv50_ir::SVSemantic translateSysVal(uint sysval);
42 static nv50_ir::CacheMode translateCacheMode(uint qualifier);
43 static nv50_ir::ImgFormat translateImgFormat(uint format);
44
45 class Instruction
46 {
47 public:
Instruction(const struct tgsi_full_instruction * inst)48 Instruction(const struct tgsi_full_instruction *inst) : insn(inst) { }
49
50 class SrcRegister
51 {
52 public:
SrcRegister(const struct tgsi_full_src_register * src)53 SrcRegister(const struct tgsi_full_src_register *src)
54 : reg(src->Register),
55 fsr(src)
56 { }
57
SrcRegister(const struct tgsi_src_register & src)58 SrcRegister(const struct tgsi_src_register& src) : reg(src), fsr(NULL) { }
59
SrcRegister(const struct tgsi_ind_register & ind)60 SrcRegister(const struct tgsi_ind_register& ind)
61 : reg(tgsi_util_get_src_from_ind(&ind)),
62 fsr(NULL)
63 { }
64
offsetToSrc(struct tgsi_texture_offset off)65 struct tgsi_src_register offsetToSrc(struct tgsi_texture_offset off)
66 {
67 struct tgsi_src_register reg;
68 memset(®, 0, sizeof(reg));
69 reg.Index = off.Index;
70 reg.File = off.File;
71 reg.SwizzleX = off.SwizzleX;
72 reg.SwizzleY = off.SwizzleY;
73 reg.SwizzleZ = off.SwizzleZ;
74 return reg;
75 }
76
SrcRegister(const struct tgsi_texture_offset & off)77 SrcRegister(const struct tgsi_texture_offset& off) :
78 reg(offsetToSrc(off)),
79 fsr(NULL)
80 { }
81
getFile() const82 uint getFile() const { return reg.File; }
83
is2D() const84 bool is2D() const { return reg.Dimension; }
85
isIndirect(int dim) const86 bool isIndirect(int dim) const
87 {
88 return (dim && fsr) ? fsr->Dimension.Indirect : reg.Indirect;
89 }
90
getIndex(int dim) const91 int getIndex(int dim) const
92 {
93 return (dim && fsr) ? fsr->Dimension.Index : reg.Index;
94 }
95
getSwizzle(int chan) const96 int getSwizzle(int chan) const
97 {
98 return tgsi_util_get_src_register_swizzle(®, chan);
99 }
100
getArrayId() const101 int getArrayId() const
102 {
103 if (isIndirect(0))
104 return fsr->Indirect.ArrayID;
105 return 0;
106 }
107
108 nv50_ir::Modifier getMod(int chan) const;
109
getIndirect(int dim) const110 SrcRegister getIndirect(int dim) const
111 {
112 assert(fsr && isIndirect(dim));
113 if (dim)
114 return SrcRegister(fsr->DimIndirect);
115 return SrcRegister(fsr->Indirect);
116 }
117
getValueU32(int c,const struct nv50_ir_prog_info * info) const118 uint32_t getValueU32(int c, const struct nv50_ir_prog_info *info) const
119 {
120 assert(reg.File == TGSI_FILE_IMMEDIATE);
121 assert(!reg.Absolute);
122 assert(!reg.Negate);
123 return info->immd.data[reg.Index * 4 + getSwizzle(c)];
124 }
125
126 private:
127 const struct tgsi_src_register reg;
128 const struct tgsi_full_src_register *fsr;
129 };
130
131 class DstRegister
132 {
133 public:
DstRegister(const struct tgsi_full_dst_register * dst)134 DstRegister(const struct tgsi_full_dst_register *dst)
135 : reg(dst->Register),
136 fdr(dst)
137 { }
138
DstRegister(const struct tgsi_dst_register & dst)139 DstRegister(const struct tgsi_dst_register& dst) : reg(dst), fdr(NULL) { }
140
getFile() const141 uint getFile() const { return reg.File; }
142
is2D() const143 bool is2D() const { return reg.Dimension; }
144
isIndirect(int dim) const145 bool isIndirect(int dim) const
146 {
147 return (dim && fdr) ? fdr->Dimension.Indirect : reg.Indirect;
148 }
149
getIndex(int dim) const150 int getIndex(int dim) const
151 {
152 return (dim && fdr) ? fdr->Dimension.Dimension : reg.Index;
153 }
154
getMask() const155 unsigned int getMask() const { return reg.WriteMask; }
156
isMasked(int chan) const157 bool isMasked(int chan) const { return !(getMask() & (1 << chan)); }
158
getIndirect(int dim) const159 SrcRegister getIndirect(int dim) const
160 {
161 assert(fdr && isIndirect(dim));
162 if (dim)
163 return SrcRegister(fdr->DimIndirect);
164 return SrcRegister(fdr->Indirect);
165 }
166
asSrc()167 struct tgsi_full_src_register asSrc()
168 {
169 assert(fdr);
170 return tgsi_full_src_register_from_dst(fdr);
171 }
172
getArrayId() const173 int getArrayId() const
174 {
175 if (isIndirect(0))
176 return fdr->Indirect.ArrayID;
177 return 0;
178 }
179
180 private:
181 const struct tgsi_dst_register reg;
182 const struct tgsi_full_dst_register *fdr;
183 };
184
getOpcode() const185 inline uint getOpcode() const { return insn->Instruction.Opcode; }
186
srcCount() const187 unsigned int srcCount() const { return insn->Instruction.NumSrcRegs; }
dstCount() const188 unsigned int dstCount() const { return insn->Instruction.NumDstRegs; }
189
190 // mask of used components of source s
191 unsigned int srcMask(unsigned int s) const;
192 unsigned int texOffsetMask() const;
193
getSrc(unsigned int s) const194 SrcRegister getSrc(unsigned int s) const
195 {
196 assert(s < srcCount());
197 return SrcRegister(&insn->Src[s]);
198 }
199
getDst(unsigned int d) const200 DstRegister getDst(unsigned int d) const
201 {
202 assert(d < dstCount());
203 return DstRegister(&insn->Dst[d]);
204 }
205
getTexOffset(unsigned int i) const206 SrcRegister getTexOffset(unsigned int i) const
207 {
208 assert(i < TGSI_FULL_MAX_TEX_OFFSETS);
209 return SrcRegister(insn->TexOffsets[i]);
210 }
211
getNumTexOffsets() const212 unsigned int getNumTexOffsets() const { return insn->Texture.NumOffsets; }
213
214 bool checkDstSrcAliasing() const;
215
getOP() const216 inline nv50_ir::operation getOP() const {
217 return translateOpcode(getOpcode()); }
218
219 nv50_ir::DataType inferSrcType() const;
220 nv50_ir::DataType inferDstType() const;
221
222 nv50_ir::CondCode getSetCond() const;
223
224 nv50_ir::TexInstruction::Target getTexture(const Source *, int s) const;
225
getImageFormat() const226 const nv50_ir::TexInstruction::ImgFormatDesc *getImageFormat() const {
227 return &nv50_ir::TexInstruction::formatTable[
228 translateImgFormat(insn->Memory.Format)];
229 }
230
getImageTarget() const231 nv50_ir::TexTarget getImageTarget() const {
232 return translateTexture(insn->Memory.Texture);
233 }
234
getCacheMode() const235 nv50_ir::CacheMode getCacheMode() const {
236 if (!insn->Instruction.Memory)
237 return nv50_ir::CACHE_CA;
238 return translateCacheMode(insn->Memory.Qualifier);
239 }
240
getLabel()241 inline uint getLabel() { return insn->Label.Label; }
242
getSaturate() const243 unsigned getSaturate() const { return insn->Instruction.Saturate; }
244
print() const245 void print() const
246 {
247 tgsi_dump_instruction(insn, 1);
248 }
249
250 private:
251 const struct tgsi_full_instruction *insn;
252 };
253
texOffsetMask() const254 unsigned int Instruction::texOffsetMask() const
255 {
256 const struct tgsi_instruction_texture *tex = &insn->Texture;
257 assert(insn->Instruction.Texture);
258
259 switch (tex->Texture) {
260 case TGSI_TEXTURE_BUFFER:
261 case TGSI_TEXTURE_1D:
262 case TGSI_TEXTURE_SHADOW1D:
263 case TGSI_TEXTURE_1D_ARRAY:
264 case TGSI_TEXTURE_SHADOW1D_ARRAY:
265 return 0x1;
266 case TGSI_TEXTURE_2D:
267 case TGSI_TEXTURE_SHADOW2D:
268 case TGSI_TEXTURE_2D_ARRAY:
269 case TGSI_TEXTURE_SHADOW2D_ARRAY:
270 case TGSI_TEXTURE_RECT:
271 case TGSI_TEXTURE_SHADOWRECT:
272 case TGSI_TEXTURE_2D_MSAA:
273 case TGSI_TEXTURE_2D_ARRAY_MSAA:
274 return 0x3;
275 case TGSI_TEXTURE_3D:
276 return 0x7;
277 default:
278 assert(!"Unexpected texture target");
279 return 0xf;
280 }
281 }
282
srcMask(unsigned int s) const283 unsigned int Instruction::srcMask(unsigned int s) const
284 {
285 unsigned int mask = insn->Dst[0].Register.WriteMask;
286
287 switch (insn->Instruction.Opcode) {
288 case TGSI_OPCODE_COS:
289 case TGSI_OPCODE_SIN:
290 return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0);
291 case TGSI_OPCODE_DP2:
292 return 0x3;
293 case TGSI_OPCODE_DP3:
294 return 0x7;
295 case TGSI_OPCODE_DP4:
296 case TGSI_OPCODE_KILL_IF: /* WriteMask ignored */
297 return 0xf;
298 case TGSI_OPCODE_DST:
299 return mask & (s ? 0xa : 0x6);
300 case TGSI_OPCODE_EX2:
301 case TGSI_OPCODE_EXP:
302 case TGSI_OPCODE_LG2:
303 case TGSI_OPCODE_LOG:
304 case TGSI_OPCODE_POW:
305 case TGSI_OPCODE_RCP:
306 case TGSI_OPCODE_RSQ:
307 return 0x1;
308 case TGSI_OPCODE_IF:
309 case TGSI_OPCODE_UIF:
310 return 0x1;
311 case TGSI_OPCODE_LIT:
312 return 0xb;
313 case TGSI_OPCODE_TEX2:
314 case TGSI_OPCODE_TXB2:
315 case TGSI_OPCODE_TXL2:
316 return (s == 0) ? 0xf : 0x3;
317 case TGSI_OPCODE_TEX:
318 case TGSI_OPCODE_TXB:
319 case TGSI_OPCODE_TXD:
320 case TGSI_OPCODE_TXL:
321 case TGSI_OPCODE_TXP:
322 case TGSI_OPCODE_TXF:
323 case TGSI_OPCODE_TG4:
324 case TGSI_OPCODE_TEX_LZ:
325 case TGSI_OPCODE_TXF_LZ:
326 case TGSI_OPCODE_LODQ:
327 {
328 const struct tgsi_instruction_texture *tex = &insn->Texture;
329
330 assert(insn->Instruction.Texture);
331
332 mask = 0x7;
333 if (insn->Instruction.Opcode != TGSI_OPCODE_TEX &&
334 insn->Instruction.Opcode != TGSI_OPCODE_TEX_LZ &&
335 insn->Instruction.Opcode != TGSI_OPCODE_TXF_LZ &&
336 insn->Instruction.Opcode != TGSI_OPCODE_TXD)
337 mask |= 0x8; /* bias, lod or proj */
338
339 switch (tex->Texture) {
340 case TGSI_TEXTURE_1D:
341 mask &= 0x9;
342 break;
343 case TGSI_TEXTURE_SHADOW1D:
344 mask &= 0xd;
345 break;
346 case TGSI_TEXTURE_1D_ARRAY:
347 case TGSI_TEXTURE_2D:
348 case TGSI_TEXTURE_RECT:
349 mask &= 0xb;
350 break;
351 case TGSI_TEXTURE_CUBE_ARRAY:
352 case TGSI_TEXTURE_SHADOW2D_ARRAY:
353 case TGSI_TEXTURE_SHADOWCUBE:
354 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
355 mask |= 0x8;
356 break;
357 default:
358 break;
359 }
360 }
361 return mask;
362 case TGSI_OPCODE_TXQ:
363 return 1;
364 case TGSI_OPCODE_D2I:
365 case TGSI_OPCODE_D2U:
366 case TGSI_OPCODE_D2F:
367 case TGSI_OPCODE_DSLT:
368 case TGSI_OPCODE_DSGE:
369 case TGSI_OPCODE_DSEQ:
370 case TGSI_OPCODE_DSNE:
371 case TGSI_OPCODE_U64SEQ:
372 case TGSI_OPCODE_U64SNE:
373 case TGSI_OPCODE_I64SLT:
374 case TGSI_OPCODE_U64SLT:
375 case TGSI_OPCODE_I64SGE:
376 case TGSI_OPCODE_U64SGE:
377 case TGSI_OPCODE_I642F:
378 case TGSI_OPCODE_U642F:
379 switch (util_bitcount(mask)) {
380 case 1: return 0x3;
381 case 2: return 0xf;
382 default:
383 assert(!"unexpected mask");
384 return 0xf;
385 }
386 case TGSI_OPCODE_I2D:
387 case TGSI_OPCODE_U2D:
388 case TGSI_OPCODE_F2D: {
389 unsigned int x = 0;
390 if ((mask & 0x3) == 0x3)
391 x |= 1;
392 if ((mask & 0xc) == 0xc)
393 x |= 2;
394 return x;
395 }
396 case TGSI_OPCODE_PK2H:
397 return 0x3;
398 case TGSI_OPCODE_UP2H:
399 return 0x1;
400 default:
401 break;
402 }
403
404 return mask;
405 }
406
getMod(int chan) const407 nv50_ir::Modifier Instruction::SrcRegister::getMod(int chan) const
408 {
409 nv50_ir::Modifier m(0);
410
411 if (reg.Absolute)
412 m = m | nv50_ir::Modifier(NV50_IR_MOD_ABS);
413 if (reg.Negate)
414 m = m | nv50_ir::Modifier(NV50_IR_MOD_NEG);
415 return m;
416 }
417
translateFile(uint file)418 static nv50_ir::DataFile translateFile(uint file)
419 {
420 switch (file) {
421 case TGSI_FILE_CONSTANT: return nv50_ir::FILE_MEMORY_CONST;
422 case TGSI_FILE_INPUT: return nv50_ir::FILE_SHADER_INPUT;
423 case TGSI_FILE_OUTPUT: return nv50_ir::FILE_SHADER_OUTPUT;
424 case TGSI_FILE_TEMPORARY: return nv50_ir::FILE_GPR;
425 case TGSI_FILE_ADDRESS: return nv50_ir::FILE_ADDRESS;
426 case TGSI_FILE_IMMEDIATE: return nv50_ir::FILE_IMMEDIATE;
427 case TGSI_FILE_SYSTEM_VALUE: return nv50_ir::FILE_SYSTEM_VALUE;
428 case TGSI_FILE_BUFFER: return nv50_ir::FILE_MEMORY_BUFFER;
429 case TGSI_FILE_IMAGE: return nv50_ir::FILE_MEMORY_GLOBAL;
430 case TGSI_FILE_MEMORY: return nv50_ir::FILE_MEMORY_GLOBAL;
431 case TGSI_FILE_SAMPLER:
432 case TGSI_FILE_NULL:
433 default:
434 return nv50_ir::FILE_NULL;
435 }
436 }
437
translateSysVal(uint sysval)438 static nv50_ir::SVSemantic translateSysVal(uint sysval)
439 {
440 switch (sysval) {
441 case TGSI_SEMANTIC_FACE: return nv50_ir::SV_FACE;
442 case TGSI_SEMANTIC_PSIZE: return nv50_ir::SV_POINT_SIZE;
443 case TGSI_SEMANTIC_PRIMID: return nv50_ir::SV_PRIMITIVE_ID;
444 case TGSI_SEMANTIC_INSTANCEID: return nv50_ir::SV_INSTANCE_ID;
445 case TGSI_SEMANTIC_VERTEXID: return nv50_ir::SV_VERTEX_ID;
446 case TGSI_SEMANTIC_GRID_SIZE: return nv50_ir::SV_NCTAID;
447 case TGSI_SEMANTIC_BLOCK_ID: return nv50_ir::SV_CTAID;
448 case TGSI_SEMANTIC_BLOCK_SIZE: return nv50_ir::SV_NTID;
449 case TGSI_SEMANTIC_THREAD_ID: return nv50_ir::SV_TID;
450 case TGSI_SEMANTIC_SAMPLEID: return nv50_ir::SV_SAMPLE_INDEX;
451 case TGSI_SEMANTIC_SAMPLEPOS: return nv50_ir::SV_SAMPLE_POS;
452 case TGSI_SEMANTIC_SAMPLEMASK: return nv50_ir::SV_SAMPLE_MASK;
453 case TGSI_SEMANTIC_INVOCATIONID: return nv50_ir::SV_INVOCATION_ID;
454 case TGSI_SEMANTIC_TESSCOORD: return nv50_ir::SV_TESS_COORD;
455 case TGSI_SEMANTIC_TESSOUTER: return nv50_ir::SV_TESS_OUTER;
456 case TGSI_SEMANTIC_TESSINNER: return nv50_ir::SV_TESS_INNER;
457 case TGSI_SEMANTIC_VERTICESIN: return nv50_ir::SV_VERTEX_COUNT;
458 case TGSI_SEMANTIC_HELPER_INVOCATION: return nv50_ir::SV_THREAD_KILL;
459 case TGSI_SEMANTIC_BASEVERTEX: return nv50_ir::SV_BASEVERTEX;
460 case TGSI_SEMANTIC_BASEINSTANCE: return nv50_ir::SV_BASEINSTANCE;
461 case TGSI_SEMANTIC_DRAWID: return nv50_ir::SV_DRAWID;
462 case TGSI_SEMANTIC_WORK_DIM: return nv50_ir::SV_WORK_DIM;
463 case TGSI_SEMANTIC_SUBGROUP_INVOCATION: return nv50_ir::SV_LANEID;
464 case TGSI_SEMANTIC_SUBGROUP_EQ_MASK: return nv50_ir::SV_LANEMASK_EQ;
465 case TGSI_SEMANTIC_SUBGROUP_LT_MASK: return nv50_ir::SV_LANEMASK_LT;
466 case TGSI_SEMANTIC_SUBGROUP_LE_MASK: return nv50_ir::SV_LANEMASK_LE;
467 case TGSI_SEMANTIC_SUBGROUP_GT_MASK: return nv50_ir::SV_LANEMASK_GT;
468 case TGSI_SEMANTIC_SUBGROUP_GE_MASK: return nv50_ir::SV_LANEMASK_GE;
469 default:
470 assert(0);
471 return nv50_ir::SV_CLOCK;
472 }
473 }
474
475 #define NV50_IR_TEX_TARG_CASE(a, b) \
476 case TGSI_TEXTURE_##a: return nv50_ir::TEX_TARGET_##b;
477
translateTexture(uint tex)478 static nv50_ir::TexTarget translateTexture(uint tex)
479 {
480 switch (tex) {
481 NV50_IR_TEX_TARG_CASE(1D, 1D);
482 NV50_IR_TEX_TARG_CASE(2D, 2D);
483 NV50_IR_TEX_TARG_CASE(2D_MSAA, 2D_MS);
484 NV50_IR_TEX_TARG_CASE(3D, 3D);
485 NV50_IR_TEX_TARG_CASE(CUBE, CUBE);
486 NV50_IR_TEX_TARG_CASE(RECT, RECT);
487 NV50_IR_TEX_TARG_CASE(1D_ARRAY, 1D_ARRAY);
488 NV50_IR_TEX_TARG_CASE(2D_ARRAY, 2D_ARRAY);
489 NV50_IR_TEX_TARG_CASE(2D_ARRAY_MSAA, 2D_MS_ARRAY);
490 NV50_IR_TEX_TARG_CASE(CUBE_ARRAY, CUBE_ARRAY);
491 NV50_IR_TEX_TARG_CASE(SHADOW1D, 1D_SHADOW);
492 NV50_IR_TEX_TARG_CASE(SHADOW2D, 2D_SHADOW);
493 NV50_IR_TEX_TARG_CASE(SHADOWCUBE, CUBE_SHADOW);
494 NV50_IR_TEX_TARG_CASE(SHADOWRECT, RECT_SHADOW);
495 NV50_IR_TEX_TARG_CASE(SHADOW1D_ARRAY, 1D_ARRAY_SHADOW);
496 NV50_IR_TEX_TARG_CASE(SHADOW2D_ARRAY, 2D_ARRAY_SHADOW);
497 NV50_IR_TEX_TARG_CASE(SHADOWCUBE_ARRAY, CUBE_ARRAY_SHADOW);
498 NV50_IR_TEX_TARG_CASE(BUFFER, BUFFER);
499
500 case TGSI_TEXTURE_UNKNOWN:
501 default:
502 assert(!"invalid texture target");
503 return nv50_ir::TEX_TARGET_2D;
504 }
505 }
506
translateCacheMode(uint qualifier)507 static nv50_ir::CacheMode translateCacheMode(uint qualifier)
508 {
509 if (qualifier & TGSI_MEMORY_VOLATILE)
510 return nv50_ir::CACHE_CV;
511 if (qualifier & TGSI_MEMORY_COHERENT)
512 return nv50_ir::CACHE_CG;
513 return nv50_ir::CACHE_CA;
514 }
515
translateImgFormat(uint format)516 static nv50_ir::ImgFormat translateImgFormat(uint format)
517 {
518
519 #define FMT_CASE(a, b) \
520 case PIPE_FORMAT_ ## a: return nv50_ir::FMT_ ## b
521
522 switch (format) {
523 FMT_CASE(NONE, NONE);
524
525 FMT_CASE(R32G32B32A32_FLOAT, RGBA32F);
526 FMT_CASE(R16G16B16A16_FLOAT, RGBA16F);
527 FMT_CASE(R32G32_FLOAT, RG32F);
528 FMT_CASE(R16G16_FLOAT, RG16F);
529 FMT_CASE(R11G11B10_FLOAT, R11G11B10F);
530 FMT_CASE(R32_FLOAT, R32F);
531 FMT_CASE(R16_FLOAT, R16F);
532
533 FMT_CASE(R32G32B32A32_UINT, RGBA32UI);
534 FMT_CASE(R16G16B16A16_UINT, RGBA16UI);
535 FMT_CASE(R10G10B10A2_UINT, RGB10A2UI);
536 FMT_CASE(R8G8B8A8_UINT, RGBA8UI);
537 FMT_CASE(R32G32_UINT, RG32UI);
538 FMT_CASE(R16G16_UINT, RG16UI);
539 FMT_CASE(R8G8_UINT, RG8UI);
540 FMT_CASE(R32_UINT, R32UI);
541 FMT_CASE(R16_UINT, R16UI);
542 FMT_CASE(R8_UINT, R8UI);
543
544 FMT_CASE(R32G32B32A32_SINT, RGBA32I);
545 FMT_CASE(R16G16B16A16_SINT, RGBA16I);
546 FMT_CASE(R8G8B8A8_SINT, RGBA8I);
547 FMT_CASE(R32G32_SINT, RG32I);
548 FMT_CASE(R16G16_SINT, RG16I);
549 FMT_CASE(R8G8_SINT, RG8I);
550 FMT_CASE(R32_SINT, R32I);
551 FMT_CASE(R16_SINT, R16I);
552 FMT_CASE(R8_SINT, R8I);
553
554 FMT_CASE(R16G16B16A16_UNORM, RGBA16);
555 FMT_CASE(R10G10B10A2_UNORM, RGB10A2);
556 FMT_CASE(R8G8B8A8_UNORM, RGBA8);
557 FMT_CASE(R16G16_UNORM, RG16);
558 FMT_CASE(R8G8_UNORM, RG8);
559 FMT_CASE(R16_UNORM, R16);
560 FMT_CASE(R8_UNORM, R8);
561
562 FMT_CASE(R16G16B16A16_SNORM, RGBA16_SNORM);
563 FMT_CASE(R8G8B8A8_SNORM, RGBA8_SNORM);
564 FMT_CASE(R16G16_SNORM, RG16_SNORM);
565 FMT_CASE(R8G8_SNORM, RG8_SNORM);
566 FMT_CASE(R16_SNORM, R16_SNORM);
567 FMT_CASE(R8_SNORM, R8_SNORM);
568
569 FMT_CASE(B8G8R8A8_UNORM, BGRA8);
570 }
571
572 assert(!"Unexpected format");
573 return nv50_ir::FMT_NONE;
574 }
575
inferSrcType() const576 nv50_ir::DataType Instruction::inferSrcType() const
577 {
578 switch (getOpcode()) {
579 case TGSI_OPCODE_UIF:
580 case TGSI_OPCODE_AND:
581 case TGSI_OPCODE_OR:
582 case TGSI_OPCODE_XOR:
583 case TGSI_OPCODE_NOT:
584 case TGSI_OPCODE_SHL:
585 case TGSI_OPCODE_U2F:
586 case TGSI_OPCODE_U2D:
587 case TGSI_OPCODE_U2I64:
588 case TGSI_OPCODE_UADD:
589 case TGSI_OPCODE_UDIV:
590 case TGSI_OPCODE_UMOD:
591 case TGSI_OPCODE_UMAD:
592 case TGSI_OPCODE_UMUL:
593 case TGSI_OPCODE_UMUL_HI:
594 case TGSI_OPCODE_UMAX:
595 case TGSI_OPCODE_UMIN:
596 case TGSI_OPCODE_USEQ:
597 case TGSI_OPCODE_USGE:
598 case TGSI_OPCODE_USLT:
599 case TGSI_OPCODE_USNE:
600 case TGSI_OPCODE_USHR:
601 case TGSI_OPCODE_ATOMUADD:
602 case TGSI_OPCODE_ATOMXCHG:
603 case TGSI_OPCODE_ATOMCAS:
604 case TGSI_OPCODE_ATOMAND:
605 case TGSI_OPCODE_ATOMOR:
606 case TGSI_OPCODE_ATOMXOR:
607 case TGSI_OPCODE_ATOMUMIN:
608 case TGSI_OPCODE_ATOMUMAX:
609 case TGSI_OPCODE_UBFE:
610 case TGSI_OPCODE_UMSB:
611 case TGSI_OPCODE_UP2H:
612 case TGSI_OPCODE_VOTE_ALL:
613 case TGSI_OPCODE_VOTE_ANY:
614 case TGSI_OPCODE_VOTE_EQ:
615 return nv50_ir::TYPE_U32;
616 case TGSI_OPCODE_I2F:
617 case TGSI_OPCODE_I2D:
618 case TGSI_OPCODE_I2I64:
619 case TGSI_OPCODE_IDIV:
620 case TGSI_OPCODE_IMUL_HI:
621 case TGSI_OPCODE_IMAX:
622 case TGSI_OPCODE_IMIN:
623 case TGSI_OPCODE_IABS:
624 case TGSI_OPCODE_INEG:
625 case TGSI_OPCODE_ISGE:
626 case TGSI_OPCODE_ISHR:
627 case TGSI_OPCODE_ISLT:
628 case TGSI_OPCODE_ISSG:
629 case TGSI_OPCODE_MOD:
630 case TGSI_OPCODE_UARL:
631 case TGSI_OPCODE_ATOMIMIN:
632 case TGSI_OPCODE_ATOMIMAX:
633 case TGSI_OPCODE_IBFE:
634 case TGSI_OPCODE_IMSB:
635 return nv50_ir::TYPE_S32;
636 case TGSI_OPCODE_D2F:
637 case TGSI_OPCODE_D2I:
638 case TGSI_OPCODE_D2U:
639 case TGSI_OPCODE_D2I64:
640 case TGSI_OPCODE_D2U64:
641 case TGSI_OPCODE_DABS:
642 case TGSI_OPCODE_DNEG:
643 case TGSI_OPCODE_DADD:
644 case TGSI_OPCODE_DMUL:
645 case TGSI_OPCODE_DDIV:
646 case TGSI_OPCODE_DMAX:
647 case TGSI_OPCODE_DMIN:
648 case TGSI_OPCODE_DSLT:
649 case TGSI_OPCODE_DSGE:
650 case TGSI_OPCODE_DSEQ:
651 case TGSI_OPCODE_DSNE:
652 case TGSI_OPCODE_DRCP:
653 case TGSI_OPCODE_DSQRT:
654 case TGSI_OPCODE_DMAD:
655 case TGSI_OPCODE_DFMA:
656 case TGSI_OPCODE_DFRAC:
657 case TGSI_OPCODE_DRSQ:
658 case TGSI_OPCODE_DTRUNC:
659 case TGSI_OPCODE_DCEIL:
660 case TGSI_OPCODE_DFLR:
661 case TGSI_OPCODE_DROUND:
662 return nv50_ir::TYPE_F64;
663 case TGSI_OPCODE_U64SEQ:
664 case TGSI_OPCODE_U64SNE:
665 case TGSI_OPCODE_U64SLT:
666 case TGSI_OPCODE_U64SGE:
667 case TGSI_OPCODE_U64MIN:
668 case TGSI_OPCODE_U64MAX:
669 case TGSI_OPCODE_U64ADD:
670 case TGSI_OPCODE_U64MUL:
671 case TGSI_OPCODE_U64SHL:
672 case TGSI_OPCODE_U64SHR:
673 case TGSI_OPCODE_U64DIV:
674 case TGSI_OPCODE_U64MOD:
675 case TGSI_OPCODE_U642F:
676 case TGSI_OPCODE_U642D:
677 return nv50_ir::TYPE_U64;
678 case TGSI_OPCODE_I64ABS:
679 case TGSI_OPCODE_I64SSG:
680 case TGSI_OPCODE_I64NEG:
681 case TGSI_OPCODE_I64SLT:
682 case TGSI_OPCODE_I64SGE:
683 case TGSI_OPCODE_I64MIN:
684 case TGSI_OPCODE_I64MAX:
685 case TGSI_OPCODE_I64SHR:
686 case TGSI_OPCODE_I64DIV:
687 case TGSI_OPCODE_I64MOD:
688 case TGSI_OPCODE_I642F:
689 case TGSI_OPCODE_I642D:
690 return nv50_ir::TYPE_S64;
691 default:
692 return nv50_ir::TYPE_F32;
693 }
694 }
695
inferDstType() const696 nv50_ir::DataType Instruction::inferDstType() const
697 {
698 switch (getOpcode()) {
699 case TGSI_OPCODE_D2U:
700 case TGSI_OPCODE_F2U: return nv50_ir::TYPE_U32;
701 case TGSI_OPCODE_D2I:
702 case TGSI_OPCODE_F2I: return nv50_ir::TYPE_S32;
703 case TGSI_OPCODE_FSEQ:
704 case TGSI_OPCODE_FSGE:
705 case TGSI_OPCODE_FSLT:
706 case TGSI_OPCODE_FSNE:
707 case TGSI_OPCODE_DSEQ:
708 case TGSI_OPCODE_DSGE:
709 case TGSI_OPCODE_DSLT:
710 case TGSI_OPCODE_DSNE:
711 case TGSI_OPCODE_I64SLT:
712 case TGSI_OPCODE_I64SGE:
713 case TGSI_OPCODE_U64SEQ:
714 case TGSI_OPCODE_U64SNE:
715 case TGSI_OPCODE_U64SLT:
716 case TGSI_OPCODE_U64SGE:
717 case TGSI_OPCODE_PK2H:
718 return nv50_ir::TYPE_U32;
719 case TGSI_OPCODE_I2F:
720 case TGSI_OPCODE_U2F:
721 case TGSI_OPCODE_D2F:
722 case TGSI_OPCODE_I642F:
723 case TGSI_OPCODE_U642F:
724 case TGSI_OPCODE_UP2H:
725 return nv50_ir::TYPE_F32;
726 case TGSI_OPCODE_I2D:
727 case TGSI_OPCODE_U2D:
728 case TGSI_OPCODE_F2D:
729 case TGSI_OPCODE_I642D:
730 case TGSI_OPCODE_U642D:
731 return nv50_ir::TYPE_F64;
732 case TGSI_OPCODE_I2I64:
733 case TGSI_OPCODE_U2I64:
734 case TGSI_OPCODE_F2I64:
735 case TGSI_OPCODE_D2I64:
736 return nv50_ir::TYPE_S64;
737 case TGSI_OPCODE_F2U64:
738 case TGSI_OPCODE_D2U64:
739 return nv50_ir::TYPE_U64;
740 default:
741 return inferSrcType();
742 }
743 }
744
getSetCond() const745 nv50_ir::CondCode Instruction::getSetCond() const
746 {
747 using namespace nv50_ir;
748
749 switch (getOpcode()) {
750 case TGSI_OPCODE_SLT:
751 case TGSI_OPCODE_ISLT:
752 case TGSI_OPCODE_USLT:
753 case TGSI_OPCODE_FSLT:
754 case TGSI_OPCODE_DSLT:
755 case TGSI_OPCODE_I64SLT:
756 case TGSI_OPCODE_U64SLT:
757 return CC_LT;
758 case TGSI_OPCODE_SLE:
759 return CC_LE;
760 case TGSI_OPCODE_SGE:
761 case TGSI_OPCODE_ISGE:
762 case TGSI_OPCODE_USGE:
763 case TGSI_OPCODE_FSGE:
764 case TGSI_OPCODE_DSGE:
765 case TGSI_OPCODE_I64SGE:
766 case TGSI_OPCODE_U64SGE:
767 return CC_GE;
768 case TGSI_OPCODE_SGT:
769 return CC_GT;
770 case TGSI_OPCODE_SEQ:
771 case TGSI_OPCODE_USEQ:
772 case TGSI_OPCODE_FSEQ:
773 case TGSI_OPCODE_DSEQ:
774 case TGSI_OPCODE_U64SEQ:
775 return CC_EQ;
776 case TGSI_OPCODE_SNE:
777 case TGSI_OPCODE_FSNE:
778 case TGSI_OPCODE_DSNE:
779 case TGSI_OPCODE_U64SNE:
780 return CC_NEU;
781 case TGSI_OPCODE_USNE:
782 return CC_NE;
783 default:
784 return CC_ALWAYS;
785 }
786 }
787
788 #define NV50_IR_OPCODE_CASE(a, b) case TGSI_OPCODE_##a: return nv50_ir::OP_##b
789
translateOpcode(uint opcode)790 static nv50_ir::operation translateOpcode(uint opcode)
791 {
792 switch (opcode) {
793 NV50_IR_OPCODE_CASE(ARL, SHL);
794 NV50_IR_OPCODE_CASE(MOV, MOV);
795
796 NV50_IR_OPCODE_CASE(RCP, RCP);
797 NV50_IR_OPCODE_CASE(RSQ, RSQ);
798 NV50_IR_OPCODE_CASE(SQRT, SQRT);
799
800 NV50_IR_OPCODE_CASE(MUL, MUL);
801 NV50_IR_OPCODE_CASE(ADD, ADD);
802
803 NV50_IR_OPCODE_CASE(MIN, MIN);
804 NV50_IR_OPCODE_CASE(MAX, MAX);
805 NV50_IR_OPCODE_CASE(SLT, SET);
806 NV50_IR_OPCODE_CASE(SGE, SET);
807 NV50_IR_OPCODE_CASE(MAD, MAD);
808 NV50_IR_OPCODE_CASE(FMA, FMA);
809
810 NV50_IR_OPCODE_CASE(FLR, FLOOR);
811 NV50_IR_OPCODE_CASE(ROUND, CVT);
812 NV50_IR_OPCODE_CASE(EX2, EX2);
813 NV50_IR_OPCODE_CASE(LG2, LG2);
814 NV50_IR_OPCODE_CASE(POW, POW);
815
816 NV50_IR_OPCODE_CASE(COS, COS);
817 NV50_IR_OPCODE_CASE(DDX, DFDX);
818 NV50_IR_OPCODE_CASE(DDX_FINE, DFDX);
819 NV50_IR_OPCODE_CASE(DDY, DFDY);
820 NV50_IR_OPCODE_CASE(DDY_FINE, DFDY);
821 NV50_IR_OPCODE_CASE(KILL, DISCARD);
822
823 NV50_IR_OPCODE_CASE(SEQ, SET);
824 NV50_IR_OPCODE_CASE(SGT, SET);
825 NV50_IR_OPCODE_CASE(SIN, SIN);
826 NV50_IR_OPCODE_CASE(SLE, SET);
827 NV50_IR_OPCODE_CASE(SNE, SET);
828 NV50_IR_OPCODE_CASE(TEX, TEX);
829 NV50_IR_OPCODE_CASE(TXD, TXD);
830 NV50_IR_OPCODE_CASE(TXP, TEX);
831
832 NV50_IR_OPCODE_CASE(CAL, CALL);
833 NV50_IR_OPCODE_CASE(RET, RET);
834 NV50_IR_OPCODE_CASE(CMP, SLCT);
835
836 NV50_IR_OPCODE_CASE(TXB, TXB);
837
838 NV50_IR_OPCODE_CASE(DIV, DIV);
839
840 NV50_IR_OPCODE_CASE(TXL, TXL);
841 NV50_IR_OPCODE_CASE(TEX_LZ, TXL);
842
843 NV50_IR_OPCODE_CASE(CEIL, CEIL);
844 NV50_IR_OPCODE_CASE(I2F, CVT);
845 NV50_IR_OPCODE_CASE(NOT, NOT);
846 NV50_IR_OPCODE_CASE(TRUNC, TRUNC);
847 NV50_IR_OPCODE_CASE(SHL, SHL);
848
849 NV50_IR_OPCODE_CASE(AND, AND);
850 NV50_IR_OPCODE_CASE(OR, OR);
851 NV50_IR_OPCODE_CASE(MOD, MOD);
852 NV50_IR_OPCODE_CASE(XOR, XOR);
853 NV50_IR_OPCODE_CASE(TXF, TXF);
854 NV50_IR_OPCODE_CASE(TXF_LZ, TXF);
855 NV50_IR_OPCODE_CASE(TXQ, TXQ);
856 NV50_IR_OPCODE_CASE(TXQS, TXQ);
857 NV50_IR_OPCODE_CASE(TG4, TXG);
858 NV50_IR_OPCODE_CASE(LODQ, TXLQ);
859
860 NV50_IR_OPCODE_CASE(EMIT, EMIT);
861 NV50_IR_OPCODE_CASE(ENDPRIM, RESTART);
862
863 NV50_IR_OPCODE_CASE(KILL_IF, DISCARD);
864
865 NV50_IR_OPCODE_CASE(F2I, CVT);
866 NV50_IR_OPCODE_CASE(FSEQ, SET);
867 NV50_IR_OPCODE_CASE(FSGE, SET);
868 NV50_IR_OPCODE_CASE(FSLT, SET);
869 NV50_IR_OPCODE_CASE(FSNE, SET);
870 NV50_IR_OPCODE_CASE(IDIV, DIV);
871 NV50_IR_OPCODE_CASE(IMAX, MAX);
872 NV50_IR_OPCODE_CASE(IMIN, MIN);
873 NV50_IR_OPCODE_CASE(IABS, ABS);
874 NV50_IR_OPCODE_CASE(INEG, NEG);
875 NV50_IR_OPCODE_CASE(ISGE, SET);
876 NV50_IR_OPCODE_CASE(ISHR, SHR);
877 NV50_IR_OPCODE_CASE(ISLT, SET);
878 NV50_IR_OPCODE_CASE(F2U, CVT);
879 NV50_IR_OPCODE_CASE(U2F, CVT);
880 NV50_IR_OPCODE_CASE(UADD, ADD);
881 NV50_IR_OPCODE_CASE(UDIV, DIV);
882 NV50_IR_OPCODE_CASE(UMAD, MAD);
883 NV50_IR_OPCODE_CASE(UMAX, MAX);
884 NV50_IR_OPCODE_CASE(UMIN, MIN);
885 NV50_IR_OPCODE_CASE(UMOD, MOD);
886 NV50_IR_OPCODE_CASE(UMUL, MUL);
887 NV50_IR_OPCODE_CASE(USEQ, SET);
888 NV50_IR_OPCODE_CASE(USGE, SET);
889 NV50_IR_OPCODE_CASE(USHR, SHR);
890 NV50_IR_OPCODE_CASE(USLT, SET);
891 NV50_IR_OPCODE_CASE(USNE, SET);
892
893 NV50_IR_OPCODE_CASE(DABS, ABS);
894 NV50_IR_OPCODE_CASE(DNEG, NEG);
895 NV50_IR_OPCODE_CASE(DADD, ADD);
896 NV50_IR_OPCODE_CASE(DMUL, MUL);
897 NV50_IR_OPCODE_CASE(DDIV, DIV);
898 NV50_IR_OPCODE_CASE(DMAX, MAX);
899 NV50_IR_OPCODE_CASE(DMIN, MIN);
900 NV50_IR_OPCODE_CASE(DSLT, SET);
901 NV50_IR_OPCODE_CASE(DSGE, SET);
902 NV50_IR_OPCODE_CASE(DSEQ, SET);
903 NV50_IR_OPCODE_CASE(DSNE, SET);
904 NV50_IR_OPCODE_CASE(DRCP, RCP);
905 NV50_IR_OPCODE_CASE(DSQRT, SQRT);
906 NV50_IR_OPCODE_CASE(DMAD, MAD);
907 NV50_IR_OPCODE_CASE(DFMA, FMA);
908 NV50_IR_OPCODE_CASE(D2I, CVT);
909 NV50_IR_OPCODE_CASE(D2U, CVT);
910 NV50_IR_OPCODE_CASE(I2D, CVT);
911 NV50_IR_OPCODE_CASE(U2D, CVT);
912 NV50_IR_OPCODE_CASE(DRSQ, RSQ);
913 NV50_IR_OPCODE_CASE(DTRUNC, TRUNC);
914 NV50_IR_OPCODE_CASE(DCEIL, CEIL);
915 NV50_IR_OPCODE_CASE(DFLR, FLOOR);
916 NV50_IR_OPCODE_CASE(DROUND, CVT);
917
918 NV50_IR_OPCODE_CASE(U64SEQ, SET);
919 NV50_IR_OPCODE_CASE(U64SNE, SET);
920 NV50_IR_OPCODE_CASE(U64SLT, SET);
921 NV50_IR_OPCODE_CASE(U64SGE, SET);
922 NV50_IR_OPCODE_CASE(I64SLT, SET);
923 NV50_IR_OPCODE_CASE(I64SGE, SET);
924 NV50_IR_OPCODE_CASE(I2I64, CVT);
925 NV50_IR_OPCODE_CASE(U2I64, CVT);
926 NV50_IR_OPCODE_CASE(F2I64, CVT);
927 NV50_IR_OPCODE_CASE(F2U64, CVT);
928 NV50_IR_OPCODE_CASE(D2I64, CVT);
929 NV50_IR_OPCODE_CASE(D2U64, CVT);
930 NV50_IR_OPCODE_CASE(I642F, CVT);
931 NV50_IR_OPCODE_CASE(U642F, CVT);
932 NV50_IR_OPCODE_CASE(I642D, CVT);
933 NV50_IR_OPCODE_CASE(U642D, CVT);
934
935 NV50_IR_OPCODE_CASE(I64MIN, MIN);
936 NV50_IR_OPCODE_CASE(U64MIN, MIN);
937 NV50_IR_OPCODE_CASE(I64MAX, MAX);
938 NV50_IR_OPCODE_CASE(U64MAX, MAX);
939 NV50_IR_OPCODE_CASE(I64ABS, ABS);
940 NV50_IR_OPCODE_CASE(I64NEG, NEG);
941 NV50_IR_OPCODE_CASE(U64ADD, ADD);
942 NV50_IR_OPCODE_CASE(U64MUL, MUL);
943 NV50_IR_OPCODE_CASE(U64SHL, SHL);
944 NV50_IR_OPCODE_CASE(I64SHR, SHR);
945 NV50_IR_OPCODE_CASE(U64SHR, SHR);
946
947 NV50_IR_OPCODE_CASE(IMUL_HI, MUL);
948 NV50_IR_OPCODE_CASE(UMUL_HI, MUL);
949
950 NV50_IR_OPCODE_CASE(SAMPLE, TEX);
951 NV50_IR_OPCODE_CASE(SAMPLE_B, TXB);
952 NV50_IR_OPCODE_CASE(SAMPLE_C, TEX);
953 NV50_IR_OPCODE_CASE(SAMPLE_C_LZ, TEX);
954 NV50_IR_OPCODE_CASE(SAMPLE_D, TXD);
955 NV50_IR_OPCODE_CASE(SAMPLE_L, TXL);
956 NV50_IR_OPCODE_CASE(SAMPLE_I, TXF);
957 NV50_IR_OPCODE_CASE(SAMPLE_I_MS, TXF);
958 NV50_IR_OPCODE_CASE(GATHER4, TXG);
959 NV50_IR_OPCODE_CASE(SVIEWINFO, TXQ);
960
961 NV50_IR_OPCODE_CASE(ATOMUADD, ATOM);
962 NV50_IR_OPCODE_CASE(ATOMXCHG, ATOM);
963 NV50_IR_OPCODE_CASE(ATOMCAS, ATOM);
964 NV50_IR_OPCODE_CASE(ATOMAND, ATOM);
965 NV50_IR_OPCODE_CASE(ATOMOR, ATOM);
966 NV50_IR_OPCODE_CASE(ATOMXOR, ATOM);
967 NV50_IR_OPCODE_CASE(ATOMUMIN, ATOM);
968 NV50_IR_OPCODE_CASE(ATOMUMAX, ATOM);
969 NV50_IR_OPCODE_CASE(ATOMIMIN, ATOM);
970 NV50_IR_OPCODE_CASE(ATOMIMAX, ATOM);
971
972 NV50_IR_OPCODE_CASE(TEX2, TEX);
973 NV50_IR_OPCODE_CASE(TXB2, TXB);
974 NV50_IR_OPCODE_CASE(TXL2, TXL);
975
976 NV50_IR_OPCODE_CASE(IBFE, EXTBF);
977 NV50_IR_OPCODE_CASE(UBFE, EXTBF);
978 NV50_IR_OPCODE_CASE(BFI, INSBF);
979 NV50_IR_OPCODE_CASE(BREV, EXTBF);
980 NV50_IR_OPCODE_CASE(POPC, POPCNT);
981 NV50_IR_OPCODE_CASE(LSB, BFIND);
982 NV50_IR_OPCODE_CASE(IMSB, BFIND);
983 NV50_IR_OPCODE_CASE(UMSB, BFIND);
984
985 NV50_IR_OPCODE_CASE(VOTE_ALL, VOTE);
986 NV50_IR_OPCODE_CASE(VOTE_ANY, VOTE);
987 NV50_IR_OPCODE_CASE(VOTE_EQ, VOTE);
988
989 NV50_IR_OPCODE_CASE(BALLOT, VOTE);
990 NV50_IR_OPCODE_CASE(READ_INVOC, SHFL);
991 NV50_IR_OPCODE_CASE(READ_FIRST, SHFL);
992
993 NV50_IR_OPCODE_CASE(END, EXIT);
994
995 default:
996 return nv50_ir::OP_NOP;
997 }
998 }
999
opcodeToSubOp(uint opcode)1000 static uint16_t opcodeToSubOp(uint opcode)
1001 {
1002 switch (opcode) {
1003 case TGSI_OPCODE_ATOMUADD: return NV50_IR_SUBOP_ATOM_ADD;
1004 case TGSI_OPCODE_ATOMXCHG: return NV50_IR_SUBOP_ATOM_EXCH;
1005 case TGSI_OPCODE_ATOMCAS: return NV50_IR_SUBOP_ATOM_CAS;
1006 case TGSI_OPCODE_ATOMAND: return NV50_IR_SUBOP_ATOM_AND;
1007 case TGSI_OPCODE_ATOMOR: return NV50_IR_SUBOP_ATOM_OR;
1008 case TGSI_OPCODE_ATOMXOR: return NV50_IR_SUBOP_ATOM_XOR;
1009 case TGSI_OPCODE_ATOMUMIN: return NV50_IR_SUBOP_ATOM_MIN;
1010 case TGSI_OPCODE_ATOMIMIN: return NV50_IR_SUBOP_ATOM_MIN;
1011 case TGSI_OPCODE_ATOMUMAX: return NV50_IR_SUBOP_ATOM_MAX;
1012 case TGSI_OPCODE_ATOMIMAX: return NV50_IR_SUBOP_ATOM_MAX;
1013 case TGSI_OPCODE_IMUL_HI:
1014 case TGSI_OPCODE_UMUL_HI:
1015 return NV50_IR_SUBOP_MUL_HIGH;
1016 case TGSI_OPCODE_VOTE_ALL: return NV50_IR_SUBOP_VOTE_ALL;
1017 case TGSI_OPCODE_VOTE_ANY: return NV50_IR_SUBOP_VOTE_ANY;
1018 case TGSI_OPCODE_VOTE_EQ: return NV50_IR_SUBOP_VOTE_UNI;
1019 default:
1020 return 0;
1021 }
1022 }
1023
checkDstSrcAliasing() const1024 bool Instruction::checkDstSrcAliasing() const
1025 {
1026 if (insn->Dst[0].Register.Indirect) // no danger if indirect, using memory
1027 return false;
1028
1029 for (int s = 0; s < TGSI_FULL_MAX_SRC_REGISTERS; ++s) {
1030 if (insn->Src[s].Register.File == TGSI_FILE_NULL)
1031 break;
1032 if (insn->Src[s].Register.File == insn->Dst[0].Register.File &&
1033 insn->Src[s].Register.Index == insn->Dst[0].Register.Index)
1034 return true;
1035 }
1036 return false;
1037 }
1038
1039 class Source
1040 {
1041 public:
1042 Source(struct nv50_ir_prog_info *);
1043 ~Source();
1044
1045 public:
1046 bool scanSource();
fileSize(unsigned file) const1047 unsigned fileSize(unsigned file) const { return scan.file_max[file] + 1; }
1048
1049 public:
1050 struct tgsi_shader_info scan;
1051 struct tgsi_full_instruction *insns;
1052 const struct tgsi_token *tokens;
1053 struct nv50_ir_prog_info *info;
1054
1055 nv50_ir::DynArray tempArrays;
1056 nv50_ir::DynArray immdArrays;
1057
1058 typedef nv50_ir::BuildUtil::Location Location;
1059 // these registers are per-subroutine, cannot be used for parameter passing
1060 std::set<Location> locals;
1061
1062 std::set<int> indirectTempArrays;
1063 std::map<int, int> indirectTempOffsets;
1064 std::map<int, std::pair<int, int> > tempArrayInfo;
1065 std::vector<int> tempArrayId;
1066
1067 int clipVertexOutput;
1068
1069 struct TextureView {
1070 uint8_t target; // TGSI_TEXTURE_*
1071 };
1072 std::vector<TextureView> textureViews;
1073
1074 /*
1075 struct Resource {
1076 uint8_t target; // TGSI_TEXTURE_*
1077 bool raw;
1078 uint8_t slot; // $surface index
1079 };
1080 std::vector<Resource> resources;
1081 */
1082
1083 struct MemoryFile {
1084 uint8_t mem_type; // TGSI_MEMORY_TYPE_*
1085 };
1086 std::vector<MemoryFile> memoryFiles;
1087
1088 private:
1089 int inferSysValDirection(unsigned sn) const;
1090 bool scanDeclaration(const struct tgsi_full_declaration *);
1091 bool scanInstruction(const struct tgsi_full_instruction *);
1092 void scanInstructionSrc(const Instruction& insn,
1093 const Instruction::SrcRegister& src,
1094 unsigned mask);
1095 void scanProperty(const struct tgsi_full_property *);
1096 void scanImmediate(const struct tgsi_full_immediate *);
1097
1098 inline bool isEdgeFlagPassthrough(const Instruction&) const;
1099 };
1100
Source(struct nv50_ir_prog_info * prog)1101 Source::Source(struct nv50_ir_prog_info *prog) : info(prog)
1102 {
1103 tokens = (const struct tgsi_token *)info->bin.source;
1104
1105 if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
1106 tgsi_dump(tokens, 0);
1107 }
1108
~Source()1109 Source::~Source()
1110 {
1111 if (insns)
1112 FREE(insns);
1113
1114 if (info->immd.data)
1115 FREE(info->immd.data);
1116 if (info->immd.type)
1117 FREE(info->immd.type);
1118 }
1119
scanSource()1120 bool Source::scanSource()
1121 {
1122 unsigned insnCount = 0;
1123 struct tgsi_parse_context parse;
1124
1125 tgsi_scan_shader(tokens, &scan);
1126
1127 insns = (struct tgsi_full_instruction *)MALLOC(scan.num_instructions *
1128 sizeof(insns[0]));
1129 if (!insns)
1130 return false;
1131
1132 clipVertexOutput = -1;
1133
1134 textureViews.resize(scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1);
1135 //resources.resize(scan.file_max[TGSI_FILE_RESOURCE] + 1);
1136 tempArrayId.resize(scan.file_max[TGSI_FILE_TEMPORARY] + 1);
1137 memoryFiles.resize(scan.file_max[TGSI_FILE_MEMORY] + 1);
1138
1139 info->immd.bufSize = 0;
1140
1141 info->numInputs = scan.file_max[TGSI_FILE_INPUT] + 1;
1142 info->numOutputs = scan.file_max[TGSI_FILE_OUTPUT] + 1;
1143 info->numSysVals = scan.file_max[TGSI_FILE_SYSTEM_VALUE] + 1;
1144
1145 if (info->type == PIPE_SHADER_FRAGMENT) {
1146 info->prop.fp.writesDepth = scan.writes_z;
1147 info->prop.fp.usesDiscard = scan.uses_kill || info->io.alphaRefBase;
1148 } else
1149 if (info->type == PIPE_SHADER_GEOMETRY) {
1150 info->prop.gp.instanceCount = 1; // default value
1151 }
1152
1153 info->io.viewportId = -1;
1154
1155 info->immd.data = (uint32_t *)MALLOC(scan.immediate_count * 16);
1156 info->immd.type = (ubyte *)MALLOC(scan.immediate_count * sizeof(ubyte));
1157
1158 tgsi_parse_init(&parse, tokens);
1159 while (!tgsi_parse_end_of_tokens(&parse)) {
1160 tgsi_parse_token(&parse);
1161
1162 switch (parse.FullToken.Token.Type) {
1163 case TGSI_TOKEN_TYPE_IMMEDIATE:
1164 scanImmediate(&parse.FullToken.FullImmediate);
1165 break;
1166 case TGSI_TOKEN_TYPE_DECLARATION:
1167 scanDeclaration(&parse.FullToken.FullDeclaration);
1168 break;
1169 case TGSI_TOKEN_TYPE_INSTRUCTION:
1170 insns[insnCount++] = parse.FullToken.FullInstruction;
1171 scanInstruction(&parse.FullToken.FullInstruction);
1172 break;
1173 case TGSI_TOKEN_TYPE_PROPERTY:
1174 scanProperty(&parse.FullToken.FullProperty);
1175 break;
1176 default:
1177 INFO("unknown TGSI token type: %d\n", parse.FullToken.Token.Type);
1178 break;
1179 }
1180 }
1181 tgsi_parse_free(&parse);
1182
1183 if (indirectTempArrays.size()) {
1184 int tempBase = 0;
1185 for (std::set<int>::const_iterator it = indirectTempArrays.begin();
1186 it != indirectTempArrays.end(); ++it) {
1187 std::pair<int, int>& info = tempArrayInfo[*it];
1188 indirectTempOffsets.insert(std::make_pair(*it, tempBase - info.first));
1189 tempBase += info.second;
1190 }
1191 info->bin.tlsSpace += tempBase * 16;
1192 }
1193
1194 if (info->io.genUserClip > 0) {
1195 info->io.clipDistances = info->io.genUserClip;
1196
1197 const unsigned int nOut = (info->io.genUserClip + 3) / 4;
1198
1199 for (unsigned int n = 0; n < nOut; ++n) {
1200 unsigned int i = info->numOutputs++;
1201 info->out[i].id = i;
1202 info->out[i].sn = TGSI_SEMANTIC_CLIPDIST;
1203 info->out[i].si = n;
1204 info->out[i].mask = ((1 << info->io.clipDistances) - 1) >> (n * 4);
1205 }
1206 }
1207
1208 return info->assignSlots(info) == 0;
1209 }
1210
scanProperty(const struct tgsi_full_property * prop)1211 void Source::scanProperty(const struct tgsi_full_property *prop)
1212 {
1213 switch (prop->Property.PropertyName) {
1214 case TGSI_PROPERTY_GS_OUTPUT_PRIM:
1215 info->prop.gp.outputPrim = prop->u[0].Data;
1216 break;
1217 case TGSI_PROPERTY_GS_INPUT_PRIM:
1218 info->prop.gp.inputPrim = prop->u[0].Data;
1219 break;
1220 case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
1221 info->prop.gp.maxVertices = prop->u[0].Data;
1222 break;
1223 case TGSI_PROPERTY_GS_INVOCATIONS:
1224 info->prop.gp.instanceCount = prop->u[0].Data;
1225 break;
1226 case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
1227 info->prop.fp.separateFragData = true;
1228 break;
1229 case TGSI_PROPERTY_FS_COORD_ORIGIN:
1230 case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER:
1231 case TGSI_PROPERTY_FS_DEPTH_LAYOUT:
1232 // we don't care
1233 break;
1234 case TGSI_PROPERTY_VS_PROHIBIT_UCPS:
1235 info->io.genUserClip = -1;
1236 break;
1237 case TGSI_PROPERTY_TCS_VERTICES_OUT:
1238 info->prop.tp.outputPatchSize = prop->u[0].Data;
1239 break;
1240 case TGSI_PROPERTY_TES_PRIM_MODE:
1241 info->prop.tp.domain = prop->u[0].Data;
1242 break;
1243 case TGSI_PROPERTY_TES_SPACING:
1244 info->prop.tp.partitioning = prop->u[0].Data;
1245 break;
1246 case TGSI_PROPERTY_TES_VERTEX_ORDER_CW:
1247 info->prop.tp.winding = prop->u[0].Data;
1248 break;
1249 case TGSI_PROPERTY_TES_POINT_MODE:
1250 if (prop->u[0].Data)
1251 info->prop.tp.outputPrim = PIPE_PRIM_POINTS;
1252 else
1253 info->prop.tp.outputPrim = PIPE_PRIM_TRIANGLES; /* anything but points */
1254 break;
1255 case TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH:
1256 info->prop.cp.numThreads[0] = prop->u[0].Data;
1257 break;
1258 case TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT:
1259 info->prop.cp.numThreads[1] = prop->u[0].Data;
1260 break;
1261 case TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH:
1262 info->prop.cp.numThreads[2] = prop->u[0].Data;
1263 break;
1264 case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED:
1265 info->io.clipDistances = prop->u[0].Data;
1266 break;
1267 case TGSI_PROPERTY_NUM_CULLDIST_ENABLED:
1268 info->io.cullDistances = prop->u[0].Data;
1269 break;
1270 case TGSI_PROPERTY_NEXT_SHADER:
1271 /* Do not need to know the next shader stage. */
1272 break;
1273 case TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL:
1274 info->prop.fp.earlyFragTests = prop->u[0].Data;
1275 break;
1276 case TGSI_PROPERTY_FS_POST_DEPTH_COVERAGE:
1277 info->prop.fp.postDepthCoverage = prop->u[0].Data;
1278 break;
1279 case TGSI_PROPERTY_MUL_ZERO_WINS:
1280 info->io.mul_zero_wins = prop->u[0].Data;
1281 break;
1282 default:
1283 INFO("unhandled TGSI property %d\n", prop->Property.PropertyName);
1284 break;
1285 }
1286 }
1287
scanImmediate(const struct tgsi_full_immediate * imm)1288 void Source::scanImmediate(const struct tgsi_full_immediate *imm)
1289 {
1290 const unsigned n = info->immd.count++;
1291
1292 assert(n < scan.immediate_count);
1293
1294 for (int c = 0; c < 4; ++c)
1295 info->immd.data[n * 4 + c] = imm->u[c].Uint;
1296
1297 info->immd.type[n] = imm->Immediate.DataType;
1298 }
1299
inferSysValDirection(unsigned sn) const1300 int Source::inferSysValDirection(unsigned sn) const
1301 {
1302 switch (sn) {
1303 case TGSI_SEMANTIC_INSTANCEID:
1304 case TGSI_SEMANTIC_VERTEXID:
1305 return 1;
1306 case TGSI_SEMANTIC_LAYER:
1307 #if 0
1308 case TGSI_SEMANTIC_VIEWPORTINDEX:
1309 return 0;
1310 #endif
1311 case TGSI_SEMANTIC_PRIMID:
1312 return (info->type == PIPE_SHADER_FRAGMENT) ? 1 : 0;
1313 default:
1314 return 0;
1315 }
1316 }
1317
scanDeclaration(const struct tgsi_full_declaration * decl)1318 bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
1319 {
1320 unsigned i, c;
1321 unsigned sn = TGSI_SEMANTIC_GENERIC;
1322 unsigned si = 0;
1323 const unsigned first = decl->Range.First, last = decl->Range.Last;
1324 const int arrayId = decl->Array.ArrayID;
1325
1326 if (decl->Declaration.Semantic) {
1327 sn = decl->Semantic.Name;
1328 si = decl->Semantic.Index;
1329 }
1330
1331 if (decl->Declaration.Local || decl->Declaration.File == TGSI_FILE_ADDRESS) {
1332 for (i = first; i <= last; ++i) {
1333 for (c = 0; c < 4; ++c) {
1334 locals.insert(
1335 Location(decl->Declaration.File, decl->Dim.Index2D, i, c));
1336 }
1337 }
1338 }
1339
1340 switch (decl->Declaration.File) {
1341 case TGSI_FILE_INPUT:
1342 if (info->type == PIPE_SHADER_VERTEX) {
1343 // all vertex attributes are equal
1344 for (i = first; i <= last; ++i) {
1345 info->in[i].sn = TGSI_SEMANTIC_GENERIC;
1346 info->in[i].si = i;
1347 }
1348 } else {
1349 for (i = first; i <= last; ++i, ++si) {
1350 info->in[i].id = i;
1351 info->in[i].sn = sn;
1352 info->in[i].si = si;
1353 if (info->type == PIPE_SHADER_FRAGMENT) {
1354 // translate interpolation mode
1355 switch (decl->Interp.Interpolate) {
1356 case TGSI_INTERPOLATE_CONSTANT:
1357 info->in[i].flat = 1;
1358 break;
1359 case TGSI_INTERPOLATE_COLOR:
1360 info->in[i].sc = 1;
1361 break;
1362 case TGSI_INTERPOLATE_LINEAR:
1363 info->in[i].linear = 1;
1364 break;
1365 default:
1366 break;
1367 }
1368 if (decl->Interp.Location)
1369 info->in[i].centroid = 1;
1370 }
1371
1372 if (sn == TGSI_SEMANTIC_PATCH)
1373 info->in[i].patch = 1;
1374 if (sn == TGSI_SEMANTIC_PATCH)
1375 info->numPatchConstants = MAX2(info->numPatchConstants, si + 1);
1376 }
1377 }
1378 break;
1379 case TGSI_FILE_OUTPUT:
1380 for (i = first; i <= last; ++i, ++si) {
1381 switch (sn) {
1382 case TGSI_SEMANTIC_POSITION:
1383 if (info->type == PIPE_SHADER_FRAGMENT)
1384 info->io.fragDepth = i;
1385 else
1386 if (clipVertexOutput < 0)
1387 clipVertexOutput = i;
1388 break;
1389 case TGSI_SEMANTIC_COLOR:
1390 if (info->type == PIPE_SHADER_FRAGMENT)
1391 info->prop.fp.numColourResults++;
1392 break;
1393 case TGSI_SEMANTIC_EDGEFLAG:
1394 info->io.edgeFlagOut = i;
1395 break;
1396 case TGSI_SEMANTIC_CLIPVERTEX:
1397 clipVertexOutput = i;
1398 break;
1399 case TGSI_SEMANTIC_CLIPDIST:
1400 info->io.genUserClip = -1;
1401 break;
1402 case TGSI_SEMANTIC_SAMPLEMASK:
1403 info->io.sampleMask = i;
1404 break;
1405 case TGSI_SEMANTIC_VIEWPORT_INDEX:
1406 info->io.viewportId = i;
1407 break;
1408 case TGSI_SEMANTIC_PATCH:
1409 info->numPatchConstants = MAX2(info->numPatchConstants, si + 1);
1410 /* fallthrough */
1411 case TGSI_SEMANTIC_TESSOUTER:
1412 case TGSI_SEMANTIC_TESSINNER:
1413 info->out[i].patch = 1;
1414 break;
1415 default:
1416 break;
1417 }
1418 info->out[i].id = i;
1419 info->out[i].sn = sn;
1420 info->out[i].si = si;
1421 }
1422 break;
1423 case TGSI_FILE_SYSTEM_VALUE:
1424 switch (sn) {
1425 case TGSI_SEMANTIC_INSTANCEID:
1426 info->io.instanceId = first;
1427 break;
1428 case TGSI_SEMANTIC_VERTEXID:
1429 info->io.vertexId = first;
1430 break;
1431 case TGSI_SEMANTIC_BASEVERTEX:
1432 case TGSI_SEMANTIC_BASEINSTANCE:
1433 case TGSI_SEMANTIC_DRAWID:
1434 info->prop.vp.usesDrawParameters = true;
1435 break;
1436 case TGSI_SEMANTIC_SAMPLEID:
1437 case TGSI_SEMANTIC_SAMPLEPOS:
1438 info->prop.fp.persampleInvocation = true;
1439 break;
1440 case TGSI_SEMANTIC_SAMPLEMASK:
1441 info->prop.fp.usesSampleMaskIn = true;
1442 break;
1443 default:
1444 break;
1445 }
1446 for (i = first; i <= last; ++i, ++si) {
1447 info->sv[i].sn = sn;
1448 info->sv[i].si = si;
1449 info->sv[i].input = inferSysValDirection(sn);
1450
1451 switch (sn) {
1452 case TGSI_SEMANTIC_TESSOUTER:
1453 case TGSI_SEMANTIC_TESSINNER:
1454 info->sv[i].patch = 1;
1455 break;
1456 }
1457 }
1458 break;
1459 /*
1460 case TGSI_FILE_RESOURCE:
1461 for (i = first; i <= last; ++i) {
1462 resources[i].target = decl->Resource.Resource;
1463 resources[i].raw = decl->Resource.Raw;
1464 resources[i].slot = i;
1465 }
1466 break;
1467 */
1468 case TGSI_FILE_SAMPLER_VIEW:
1469 for (i = first; i <= last; ++i)
1470 textureViews[i].target = decl->SamplerView.Resource;
1471 break;
1472 case TGSI_FILE_MEMORY:
1473 for (i = first; i <= last; ++i)
1474 memoryFiles[i].mem_type = decl->Declaration.MemType;
1475 break;
1476 case TGSI_FILE_NULL:
1477 case TGSI_FILE_TEMPORARY:
1478 for (i = first; i <= last; ++i)
1479 tempArrayId[i] = arrayId;
1480 if (arrayId)
1481 tempArrayInfo.insert(std::make_pair(arrayId, std::make_pair(
1482 first, last - first + 1)));
1483 break;
1484 case TGSI_FILE_ADDRESS:
1485 case TGSI_FILE_CONSTANT:
1486 case TGSI_FILE_IMMEDIATE:
1487 case TGSI_FILE_SAMPLER:
1488 case TGSI_FILE_BUFFER:
1489 case TGSI_FILE_IMAGE:
1490 break;
1491 default:
1492 ERROR("unhandled TGSI_FILE %d\n", decl->Declaration.File);
1493 return false;
1494 }
1495 return true;
1496 }
1497
isEdgeFlagPassthrough(const Instruction & insn) const1498 inline bool Source::isEdgeFlagPassthrough(const Instruction& insn) const
1499 {
1500 return insn.getOpcode() == TGSI_OPCODE_MOV &&
1501 insn.getDst(0).getIndex(0) == info->io.edgeFlagOut &&
1502 insn.getSrc(0).getFile() == TGSI_FILE_INPUT;
1503 }
1504
scanInstructionSrc(const Instruction & insn,const Instruction::SrcRegister & src,unsigned mask)1505 void Source::scanInstructionSrc(const Instruction& insn,
1506 const Instruction::SrcRegister& src,
1507 unsigned mask)
1508 {
1509 if (src.getFile() == TGSI_FILE_TEMPORARY) {
1510 if (src.isIndirect(0))
1511 indirectTempArrays.insert(src.getArrayId());
1512 } else
1513 if (src.getFile() == TGSI_FILE_OUTPUT) {
1514 if (src.isIndirect(0)) {
1515 // We don't know which one is accessed, just mark everything for
1516 // reading. This is an extremely unlikely occurrence.
1517 for (unsigned i = 0; i < info->numOutputs; ++i)
1518 info->out[i].oread = 1;
1519 } else {
1520 info->out[src.getIndex(0)].oread = 1;
1521 }
1522 }
1523 if (src.getFile() != TGSI_FILE_INPUT)
1524 return;
1525
1526 if (src.isIndirect(0)) {
1527 for (unsigned i = 0; i < info->numInputs; ++i)
1528 info->in[i].mask = 0xf;
1529 } else {
1530 const int i = src.getIndex(0);
1531 for (unsigned c = 0; c < 4; ++c) {
1532 if (!(mask & (1 << c)))
1533 continue;
1534 int k = src.getSwizzle(c);
1535 if (k <= TGSI_SWIZZLE_W)
1536 info->in[i].mask |= 1 << k;
1537 }
1538 switch (info->in[i].sn) {
1539 case TGSI_SEMANTIC_PSIZE:
1540 case TGSI_SEMANTIC_PRIMID:
1541 case TGSI_SEMANTIC_FOG:
1542 info->in[i].mask &= 0x1;
1543 break;
1544 case TGSI_SEMANTIC_PCOORD:
1545 info->in[i].mask &= 0x3;
1546 break;
1547 default:
1548 break;
1549 }
1550 }
1551 }
1552
scanInstruction(const struct tgsi_full_instruction * inst)1553 bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
1554 {
1555 Instruction insn(inst);
1556
1557 if (insn.getOpcode() == TGSI_OPCODE_BARRIER)
1558 info->numBarriers = 1;
1559
1560 if (insn.getOpcode() == TGSI_OPCODE_FBFETCH)
1561 info->prop.fp.readsFramebuffer = true;
1562
1563 if (insn.dstCount()) {
1564 Instruction::DstRegister dst = insn.getDst(0);
1565
1566 if (dst.getFile() == TGSI_FILE_OUTPUT) {
1567 if (dst.isIndirect(0))
1568 for (unsigned i = 0; i < info->numOutputs; ++i)
1569 info->out[i].mask = 0xf;
1570 else
1571 info->out[dst.getIndex(0)].mask |= dst.getMask();
1572
1573 if (info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PSIZE ||
1574 info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PRIMID ||
1575 info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_LAYER ||
1576 info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_VIEWPORT_INDEX ||
1577 info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_FOG)
1578 info->out[dst.getIndex(0)].mask &= 1;
1579
1580 if (isEdgeFlagPassthrough(insn))
1581 info->io.edgeFlagIn = insn.getSrc(0).getIndex(0);
1582 } else
1583 if (dst.getFile() != TGSI_FILE_MEMORY &&
1584 insn.getOpcode() == TGSI_OPCODE_STORE) {
1585 info->io.globalAccess |= 0x2;
1586 } else
1587 if (dst.getFile() == TGSI_FILE_TEMPORARY) {
1588 if (dst.isIndirect(0))
1589 indirectTempArrays.insert(dst.getArrayId());
1590 } else
1591 if (dst.getFile() == TGSI_FILE_BUFFER ||
1592 dst.getFile() == TGSI_FILE_IMAGE ||
1593 (dst.getFile() == TGSI_FILE_MEMORY &&
1594 memoryFiles[dst.getIndex(0)].mem_type == TGSI_MEMORY_TYPE_GLOBAL)) {
1595 info->io.globalAccess |= 0x2;
1596 }
1597 }
1598
1599 if (insn.srcCount() && (
1600 insn.getSrc(0).getFile() != TGSI_FILE_MEMORY ||
1601 memoryFiles[insn.getSrc(0).getIndex(0)].mem_type ==
1602 TGSI_MEMORY_TYPE_GLOBAL)) {
1603 switch (insn.getOpcode()) {
1604 case TGSI_OPCODE_ATOMUADD:
1605 case TGSI_OPCODE_ATOMXCHG:
1606 case TGSI_OPCODE_ATOMCAS:
1607 case TGSI_OPCODE_ATOMAND:
1608 case TGSI_OPCODE_ATOMOR:
1609 case TGSI_OPCODE_ATOMXOR:
1610 case TGSI_OPCODE_ATOMUMIN:
1611 case TGSI_OPCODE_ATOMIMIN:
1612 case TGSI_OPCODE_ATOMUMAX:
1613 case TGSI_OPCODE_ATOMIMAX:
1614 case TGSI_OPCODE_LOAD:
1615 info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
1616 0x1 : 0x2;
1617 break;
1618 }
1619 }
1620
1621
1622 for (unsigned s = 0; s < insn.srcCount(); ++s)
1623 scanInstructionSrc(insn, insn.getSrc(s), insn.srcMask(s));
1624
1625 for (unsigned s = 0; s < insn.getNumTexOffsets(); ++s)
1626 scanInstructionSrc(insn, insn.getTexOffset(s), insn.texOffsetMask());
1627
1628 return true;
1629 }
1630
1631 nv50_ir::TexInstruction::Target
getTexture(const tgsi::Source * code,int s) const1632 Instruction::getTexture(const tgsi::Source *code, int s) const
1633 {
1634 // XXX: indirect access
1635 unsigned int r;
1636
1637 switch (getSrc(s).getFile()) {
1638 /*
1639 case TGSI_FILE_RESOURCE:
1640 r = getSrc(s).getIndex(0);
1641 return translateTexture(code->resources.at(r).target);
1642 */
1643 case TGSI_FILE_SAMPLER_VIEW:
1644 r = getSrc(s).getIndex(0);
1645 return translateTexture(code->textureViews.at(r).target);
1646 default:
1647 return translateTexture(insn->Texture.Texture);
1648 }
1649 }
1650
1651 } // namespace tgsi
1652
1653 namespace {
1654
1655 using namespace nv50_ir;
1656
1657 class Converter : public BuildUtil
1658 {
1659 public:
1660 Converter(Program *, const tgsi::Source *);
1661 ~Converter();
1662
1663 bool run();
1664
1665 private:
1666 struct Subroutine
1667 {
Subroutine__anon569d764f0111::Converter::Subroutine1668 Subroutine(Function *f) : f(f) { }
1669 Function *f;
1670 ValueMap values;
1671 };
1672
1673 Value *shiftAddress(Value *);
1674 Value *getVertexBase(int s);
1675 Value *getOutputBase(int s);
1676 DataArray *getArrayForFile(unsigned file, int idx);
1677 Value *fetchSrc(int s, int c);
1678 Value *fetchDst(int d, int c);
1679 Value *acquireDst(int d, int c);
1680 void storeDst(int d, int c, Value *);
1681
1682 Value *fetchSrc(const tgsi::Instruction::SrcRegister src, int c, Value *ptr);
1683 void storeDst(const tgsi::Instruction::DstRegister dst, int c,
1684 Value *val, Value *ptr);
1685
1686 void adjustTempIndex(int arrayId, int &idx, int &idx2d) const;
1687 Value *applySrcMod(Value *, int s, int c);
1688
1689 Symbol *makeSym(uint file, int fileIndex, int idx, int c, uint32_t addr);
1690 Symbol *srcToSym(tgsi::Instruction::SrcRegister, int c);
1691 Symbol *dstToSym(tgsi::Instruction::DstRegister, int c);
1692
1693 bool isSubGroupMask(uint8_t semantic);
1694
1695 bool handleInstruction(const struct tgsi_full_instruction *);
1696 void exportOutputs();
1697 inline Subroutine *getSubroutine(unsigned ip);
1698 inline Subroutine *getSubroutine(Function *);
1699 inline bool isEndOfSubroutine(uint ip);
1700
1701 void loadProjTexCoords(Value *dst[4], Value *src[4], unsigned int mask);
1702
1703 // R,S,L,C,Dx,Dy encode TGSI sources for respective values (0xSf for auto)
1704 void setTexRS(TexInstruction *, unsigned int& s, int R, int S);
1705 void handleTEX(Value *dst0[4], int R, int S, int L, int C, int Dx, int Dy);
1706 void handleTXF(Value *dst0[4], int R, int L_M);
1707 void handleTXQ(Value *dst0[4], enum TexQuery, int R);
1708 void handleFBFETCH(Value *dst0[4]);
1709 void handleLIT(Value *dst0[4]);
1710 void handleUserClipPlanes();
1711
1712 // Symbol *getResourceBase(int r);
1713 void getImageCoords(std::vector<Value *>&, int s);
1714
1715 void handleLOAD(Value *dst0[4]);
1716 void handleSTORE();
1717 void handleATOM(Value *dst0[4], DataType, uint16_t subOp);
1718
1719 void handleINTERP(Value *dst0[4]);
1720
1721 uint8_t translateInterpMode(const struct nv50_ir_varying *var,
1722 operation& op);
1723 Value *interpolate(tgsi::Instruction::SrcRegister, int c, Value *ptr);
1724
1725 void insertConvergenceOps(BasicBlock *conv, BasicBlock *fork);
1726
1727 Value *buildDot(int dim);
1728
1729 class BindArgumentsPass : public Pass {
1730 public:
BindArgumentsPass(Converter & conv)1731 BindArgumentsPass(Converter &conv) : conv(conv) { }
1732
1733 private:
1734 Converter &conv;
1735 Subroutine *sub;
1736
1737 inline const Location *getValueLocation(Subroutine *, Value *);
1738
1739 template<typename T> inline void
1740 updateCallArgs(Instruction *i, void (Instruction::*setArg)(int, Value *),
1741 T (Function::*proto));
1742
1743 template<typename T> inline void
1744 updatePrototype(BitSet *set, void (Function::*updateSet)(),
1745 T (Function::*proto));
1746
1747 protected:
1748 bool visit(Function *);
visit(BasicBlock * bb)1749 bool visit(BasicBlock *bb) { return false; }
1750 };
1751
1752 private:
1753 const tgsi::Source *code;
1754 const struct nv50_ir_prog_info *info;
1755
1756 struct {
1757 std::map<unsigned, Subroutine> map;
1758 Subroutine *cur;
1759 } sub;
1760
1761 uint ip; // instruction pointer
1762
1763 tgsi::Instruction tgsi;
1764
1765 DataType dstTy;
1766 DataType srcTy;
1767
1768 DataArray tData; // TGSI_FILE_TEMPORARY
1769 DataArray lData; // TGSI_FILE_TEMPORARY, for indirect arrays
1770 DataArray aData; // TGSI_FILE_ADDRESS
1771 DataArray oData; // TGSI_FILE_OUTPUT (if outputs in registers)
1772
1773 Value *zero;
1774 Value *fragCoord[4];
1775 Value *clipVtx[4];
1776
1777 Value *vtxBase[5]; // base address of vertex in primitive (for TP/GP)
1778 uint8_t vtxBaseValid;
1779
1780 Value *outBase; // base address of vertex out patch (for TCP)
1781
1782 Stack condBBs; // fork BB, then else clause BB
1783 Stack joinBBs; // fork BB, for inserting join ops on ENDIF
1784 Stack loopBBs; // loop headers
1785 Stack breakBBs; // end of / after loop
1786
1787 Value *viewport;
1788 };
1789
1790 Symbol *
srcToSym(tgsi::Instruction::SrcRegister src,int c)1791 Converter::srcToSym(tgsi::Instruction::SrcRegister src, int c)
1792 {
1793 const int swz = src.getSwizzle(c);
1794
1795 /* TODO: Use Array ID when it's available for the index */
1796 return makeSym(src.getFile(),
1797 src.is2D() ? src.getIndex(1) : 0,
1798 src.getIndex(0), swz,
1799 src.getIndex(0) * 16 + swz * 4);
1800 }
1801
1802 Symbol *
dstToSym(tgsi::Instruction::DstRegister dst,int c)1803 Converter::dstToSym(tgsi::Instruction::DstRegister dst, int c)
1804 {
1805 /* TODO: Use Array ID when it's available for the index */
1806 return makeSym(dst.getFile(),
1807 dst.is2D() ? dst.getIndex(1) : 0,
1808 dst.getIndex(0), c,
1809 dst.getIndex(0) * 16 + c * 4);
1810 }
1811
1812 Symbol *
makeSym(uint tgsiFile,int fileIdx,int idx,int c,uint32_t address)1813 Converter::makeSym(uint tgsiFile, int fileIdx, int idx, int c, uint32_t address)
1814 {
1815 Symbol *sym = new_Symbol(prog, tgsi::translateFile(tgsiFile));
1816
1817 sym->reg.fileIndex = fileIdx;
1818
1819 if (tgsiFile == TGSI_FILE_MEMORY) {
1820 switch (code->memoryFiles[fileIdx].mem_type) {
1821 case TGSI_MEMORY_TYPE_GLOBAL:
1822 /* No-op this is the default for TGSI_FILE_MEMORY */
1823 sym->setFile(FILE_MEMORY_GLOBAL);
1824 break;
1825 case TGSI_MEMORY_TYPE_SHARED:
1826 sym->setFile(FILE_MEMORY_SHARED);
1827 break;
1828 case TGSI_MEMORY_TYPE_INPUT:
1829 assert(prog->getType() == Program::TYPE_COMPUTE);
1830 assert(idx == -1);
1831 sym->setFile(FILE_SHADER_INPUT);
1832 address += info->prop.cp.inputOffset;
1833 break;
1834 default:
1835 assert(0); /* TODO: Add support for global and private memory */
1836 }
1837 }
1838
1839 if (idx >= 0) {
1840 if (sym->reg.file == FILE_SHADER_INPUT)
1841 sym->setOffset(info->in[idx].slot[c] * 4);
1842 else
1843 if (sym->reg.file == FILE_SHADER_OUTPUT)
1844 sym->setOffset(info->out[idx].slot[c] * 4);
1845 else
1846 if (sym->reg.file == FILE_SYSTEM_VALUE)
1847 sym->setSV(tgsi::translateSysVal(info->sv[idx].sn), c);
1848 else
1849 sym->setOffset(address);
1850 } else {
1851 sym->setOffset(address);
1852 }
1853 return sym;
1854 }
1855
1856 uint8_t
translateInterpMode(const struct nv50_ir_varying * var,operation & op)1857 Converter::translateInterpMode(const struct nv50_ir_varying *var, operation& op)
1858 {
1859 uint8_t mode = NV50_IR_INTERP_PERSPECTIVE;
1860
1861 if (var->flat)
1862 mode = NV50_IR_INTERP_FLAT;
1863 else
1864 if (var->linear)
1865 mode = NV50_IR_INTERP_LINEAR;
1866 else
1867 if (var->sc)
1868 mode = NV50_IR_INTERP_SC;
1869
1870 op = (mode == NV50_IR_INTERP_PERSPECTIVE || mode == NV50_IR_INTERP_SC)
1871 ? OP_PINTERP : OP_LINTERP;
1872
1873 if (var->centroid)
1874 mode |= NV50_IR_INTERP_CENTROID;
1875
1876 return mode;
1877 }
1878
1879 Value *
interpolate(tgsi::Instruction::SrcRegister src,int c,Value * ptr)1880 Converter::interpolate(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
1881 {
1882 operation op;
1883
1884 // XXX: no way to know interpolation mode if we don't know what's accessed
1885 const uint8_t mode = translateInterpMode(&info->in[ptr ? 0 :
1886 src.getIndex(0)], op);
1887
1888 Instruction *insn = new_Instruction(func, op, TYPE_F32);
1889
1890 insn->setDef(0, getScratch());
1891 insn->setSrc(0, srcToSym(src, c));
1892 if (op == OP_PINTERP)
1893 insn->setSrc(1, fragCoord[3]);
1894 if (ptr)
1895 insn->setIndirect(0, 0, ptr);
1896
1897 insn->setInterpolate(mode);
1898
1899 bb->insertTail(insn);
1900 return insn->getDef(0);
1901 }
1902
1903 Value *
applySrcMod(Value * val,int s,int c)1904 Converter::applySrcMod(Value *val, int s, int c)
1905 {
1906 Modifier m = tgsi.getSrc(s).getMod(c);
1907 DataType ty = tgsi.inferSrcType();
1908
1909 if (m & Modifier(NV50_IR_MOD_ABS))
1910 val = mkOp1v(OP_ABS, ty, getScratch(), val);
1911
1912 if (m & Modifier(NV50_IR_MOD_NEG))
1913 val = mkOp1v(OP_NEG, ty, getScratch(), val);
1914
1915 return val;
1916 }
1917
1918 Value *
getVertexBase(int s)1919 Converter::getVertexBase(int s)
1920 {
1921 assert(s < 5);
1922 if (!(vtxBaseValid & (1 << s))) {
1923 const int index = tgsi.getSrc(s).getIndex(1);
1924 Value *rel = NULL;
1925 if (tgsi.getSrc(s).isIndirect(1))
1926 rel = fetchSrc(tgsi.getSrc(s).getIndirect(1), 0, NULL);
1927 vtxBaseValid |= 1 << s;
1928 vtxBase[s] = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(4, FILE_ADDRESS),
1929 mkImm(index), rel);
1930 }
1931 return vtxBase[s];
1932 }
1933
1934 Value *
getOutputBase(int s)1935 Converter::getOutputBase(int s)
1936 {
1937 assert(s < 5);
1938 if (!(vtxBaseValid & (1 << s))) {
1939 Value *offset = loadImm(NULL, tgsi.getSrc(s).getIndex(1));
1940 if (tgsi.getSrc(s).isIndirect(1))
1941 offset = mkOp2v(OP_ADD, TYPE_U32, getSSA(),
1942 fetchSrc(tgsi.getSrc(s).getIndirect(1), 0, NULL),
1943 offset);
1944 vtxBaseValid |= 1 << s;
1945 vtxBase[s] = mkOp2v(OP_ADD, TYPE_U32, getSSA(), outBase, offset);
1946 }
1947 return vtxBase[s];
1948 }
1949
1950 Value *
fetchSrc(int s,int c)1951 Converter::fetchSrc(int s, int c)
1952 {
1953 Value *res;
1954 Value *ptr = NULL, *dimRel = NULL;
1955
1956 tgsi::Instruction::SrcRegister src = tgsi.getSrc(s);
1957
1958 if (src.isIndirect(0))
1959 ptr = fetchSrc(src.getIndirect(0), 0, NULL);
1960
1961 if (src.is2D()) {
1962 switch (src.getFile()) {
1963 case TGSI_FILE_OUTPUT:
1964 dimRel = getOutputBase(s);
1965 break;
1966 case TGSI_FILE_INPUT:
1967 dimRel = getVertexBase(s);
1968 break;
1969 case TGSI_FILE_CONSTANT:
1970 // on NVC0, this is valid and c{I+J}[k] == cI[(J << 16) + k]
1971 if (src.isIndirect(1))
1972 dimRel = fetchSrc(src.getIndirect(1), 0, 0);
1973 break;
1974 default:
1975 break;
1976 }
1977 }
1978
1979 res = fetchSrc(src, c, ptr);
1980
1981 if (dimRel)
1982 res->getInsn()->setIndirect(0, 1, dimRel);
1983
1984 return applySrcMod(res, s, c);
1985 }
1986
1987 Value *
fetchDst(int d,int c)1988 Converter::fetchDst(int d, int c)
1989 {
1990 Value *res;
1991 Value *ptr = NULL, *dimRel = NULL;
1992
1993 tgsi::Instruction::DstRegister dst = tgsi.getDst(d);
1994
1995 if (dst.isIndirect(0))
1996 ptr = fetchSrc(dst.getIndirect(0), 0, NULL);
1997
1998 if (dst.is2D()) {
1999 switch (dst.getFile()) {
2000 case TGSI_FILE_OUTPUT:
2001 assert(0); // TODO
2002 dimRel = NULL;
2003 break;
2004 case TGSI_FILE_INPUT:
2005 assert(0); // TODO
2006 dimRel = NULL;
2007 break;
2008 case TGSI_FILE_CONSTANT:
2009 // on NVC0, this is valid and c{I+J}[k] == cI[(J << 16) + k]
2010 if (dst.isIndirect(1))
2011 dimRel = fetchSrc(dst.getIndirect(1), 0, 0);
2012 break;
2013 default:
2014 break;
2015 }
2016 }
2017
2018 struct tgsi_full_src_register fsr = dst.asSrc();
2019 tgsi::Instruction::SrcRegister src(&fsr);
2020 res = fetchSrc(src, c, ptr);
2021
2022 if (dimRel)
2023 res->getInsn()->setIndirect(0, 1, dimRel);
2024
2025 return res;
2026 }
2027
2028 Converter::DataArray *
getArrayForFile(unsigned file,int idx)2029 Converter::getArrayForFile(unsigned file, int idx)
2030 {
2031 switch (file) {
2032 case TGSI_FILE_TEMPORARY:
2033 return idx == 0 ? &tData : &lData;
2034 case TGSI_FILE_ADDRESS:
2035 return &aData;
2036 case TGSI_FILE_OUTPUT:
2037 assert(prog->getType() == Program::TYPE_FRAGMENT);
2038 return &oData;
2039 default:
2040 assert(!"invalid/unhandled TGSI source file");
2041 return NULL;
2042 }
2043 }
2044
2045 Value *
shiftAddress(Value * index)2046 Converter::shiftAddress(Value *index)
2047 {
2048 if (!index)
2049 return NULL;
2050 return mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), index, mkImm(4));
2051 }
2052
2053 void
adjustTempIndex(int arrayId,int & idx,int & idx2d) const2054 Converter::adjustTempIndex(int arrayId, int &idx, int &idx2d) const
2055 {
2056 std::map<int, int>::const_iterator it =
2057 code->indirectTempOffsets.find(arrayId);
2058 if (it == code->indirectTempOffsets.end())
2059 return;
2060
2061 idx2d = 1;
2062 idx += it->second;
2063 }
2064
2065 bool
isSubGroupMask(uint8_t semantic)2066 Converter::isSubGroupMask(uint8_t semantic)
2067 {
2068 switch (semantic) {
2069 case TGSI_SEMANTIC_SUBGROUP_EQ_MASK:
2070 case TGSI_SEMANTIC_SUBGROUP_LT_MASK:
2071 case TGSI_SEMANTIC_SUBGROUP_LE_MASK:
2072 case TGSI_SEMANTIC_SUBGROUP_GT_MASK:
2073 case TGSI_SEMANTIC_SUBGROUP_GE_MASK:
2074 return true;
2075 default:
2076 return false;
2077 }
2078 }
2079
2080 Value *
fetchSrc(tgsi::Instruction::SrcRegister src,int c,Value * ptr)2081 Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
2082 {
2083 int idx2d = src.is2D() ? src.getIndex(1) : 0;
2084 int idx = src.getIndex(0);
2085 const int swz = src.getSwizzle(c);
2086 Instruction *ld;
2087
2088 switch (src.getFile()) {
2089 case TGSI_FILE_IMMEDIATE:
2090 assert(!ptr);
2091 return loadImm(NULL, info->immd.data[idx * 4 + swz]);
2092 case TGSI_FILE_CONSTANT:
2093 return mkLoadv(TYPE_U32, srcToSym(src, c), shiftAddress(ptr));
2094 case TGSI_FILE_INPUT:
2095 if (prog->getType() == Program::TYPE_FRAGMENT) {
2096 // don't load masked inputs, won't be assigned a slot
2097 if (!ptr && !(info->in[idx].mask & (1 << swz)))
2098 return loadImm(NULL, swz == TGSI_SWIZZLE_W ? 1.0f : 0.0f);
2099 return interpolate(src, c, shiftAddress(ptr));
2100 } else
2101 if (prog->getType() == Program::TYPE_GEOMETRY) {
2102 if (!ptr && info->in[idx].sn == TGSI_SEMANTIC_PRIMID)
2103 return mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_PRIMITIVE_ID, 0));
2104 // XXX: This is going to be a problem with scalar arrays, i.e. when
2105 // we cannot assume that the address is given in units of vec4.
2106 //
2107 // nv50 and nvc0 need different things here, so let the lowering
2108 // passes decide what to do with the address
2109 if (ptr)
2110 return mkLoadv(TYPE_U32, srcToSym(src, c), ptr);
2111 }
2112 ld = mkLoad(TYPE_U32, getSSA(), srcToSym(src, c), shiftAddress(ptr));
2113 ld->perPatch = info->in[idx].patch;
2114 return ld->getDef(0);
2115 case TGSI_FILE_OUTPUT:
2116 assert(prog->getType() == Program::TYPE_TESSELLATION_CONTROL);
2117 ld = mkLoad(TYPE_U32, getSSA(), srcToSym(src, c), shiftAddress(ptr));
2118 ld->perPatch = info->out[idx].patch;
2119 return ld->getDef(0);
2120 case TGSI_FILE_SYSTEM_VALUE:
2121 assert(!ptr);
2122 if (info->sv[idx].sn == TGSI_SEMANTIC_THREAD_ID &&
2123 info->prop.cp.numThreads[swz] == 1)
2124 return loadImm(NULL, 0u);
2125 if (isSubGroupMask(info->sv[idx].sn) && swz > 0)
2126 return loadImm(NULL, 0u);
2127 if (info->sv[idx].sn == TGSI_SEMANTIC_SUBGROUP_SIZE)
2128 return loadImm(NULL, 32u);
2129 ld = mkOp1(OP_RDSV, TYPE_U32, getSSA(), srcToSym(src, c));
2130 ld->perPatch = info->sv[idx].patch;
2131 return ld->getDef(0);
2132 case TGSI_FILE_TEMPORARY: {
2133 int arrayid = src.getArrayId();
2134 if (!arrayid)
2135 arrayid = code->tempArrayId[idx];
2136 adjustTempIndex(arrayid, idx, idx2d);
2137 }
2138 /* fallthrough */
2139 default:
2140 return getArrayForFile(src.getFile(), idx2d)->load(
2141 sub.cur->values, idx, swz, shiftAddress(ptr));
2142 }
2143 }
2144
2145 Value *
acquireDst(int d,int c)2146 Converter::acquireDst(int d, int c)
2147 {
2148 const tgsi::Instruction::DstRegister dst = tgsi.getDst(d);
2149 const unsigned f = dst.getFile();
2150 int idx = dst.getIndex(0);
2151 int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
2152
2153 if (dst.isMasked(c) || f == TGSI_FILE_BUFFER || f == TGSI_FILE_MEMORY ||
2154 f == TGSI_FILE_IMAGE)
2155 return NULL;
2156
2157 if (dst.isIndirect(0) ||
2158 f == TGSI_FILE_SYSTEM_VALUE ||
2159 (f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT))
2160 return getScratch();
2161
2162 if (f == TGSI_FILE_TEMPORARY) {
2163 int arrayid = dst.getArrayId();
2164 if (!arrayid)
2165 arrayid = code->tempArrayId[idx];
2166 adjustTempIndex(arrayid, idx, idx2d);
2167 }
2168
2169 return getArrayForFile(f, idx2d)-> acquire(sub.cur->values, idx, c);
2170 }
2171
2172 void
storeDst(int d,int c,Value * val)2173 Converter::storeDst(int d, int c, Value *val)
2174 {
2175 const tgsi::Instruction::DstRegister dst = tgsi.getDst(d);
2176
2177 if (tgsi.getSaturate()) {
2178 mkOp1(OP_SAT, dstTy, val, val);
2179 }
2180
2181 Value *ptr = NULL;
2182 if (dst.isIndirect(0))
2183 ptr = shiftAddress(fetchSrc(dst.getIndirect(0), 0, NULL));
2184
2185 if (info->io.genUserClip > 0 &&
2186 dst.getFile() == TGSI_FILE_OUTPUT &&
2187 !dst.isIndirect(0) && dst.getIndex(0) == code->clipVertexOutput) {
2188 mkMov(clipVtx[c], val);
2189 val = clipVtx[c];
2190 }
2191
2192 storeDst(dst, c, val, ptr);
2193 }
2194
2195 void
storeDst(const tgsi::Instruction::DstRegister dst,int c,Value * val,Value * ptr)2196 Converter::storeDst(const tgsi::Instruction::DstRegister dst, int c,
2197 Value *val, Value *ptr)
2198 {
2199 const unsigned f = dst.getFile();
2200 int idx = dst.getIndex(0);
2201 int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
2202
2203 if (f == TGSI_FILE_SYSTEM_VALUE) {
2204 assert(!ptr);
2205 mkOp2(OP_WRSV, TYPE_U32, NULL, dstToSym(dst, c), val);
2206 } else
2207 if (f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT) {
2208
2209 if (ptr || (info->out[idx].mask & (1 << c))) {
2210 /* Save the viewport index into a scratch register so that it can be
2211 exported at EMIT time */
2212 if (info->out[idx].sn == TGSI_SEMANTIC_VIEWPORT_INDEX &&
2213 prog->getType() == Program::TYPE_GEOMETRY &&
2214 viewport != NULL)
2215 mkOp1(OP_MOV, TYPE_U32, viewport, val);
2216 else
2217 mkStore(OP_EXPORT, TYPE_U32, dstToSym(dst, c), ptr, val)->perPatch =
2218 info->out[idx].patch;
2219 }
2220 } else
2221 if (f == TGSI_FILE_TEMPORARY ||
2222 f == TGSI_FILE_ADDRESS ||
2223 f == TGSI_FILE_OUTPUT) {
2224 if (f == TGSI_FILE_TEMPORARY) {
2225 int arrayid = dst.getArrayId();
2226 if (!arrayid)
2227 arrayid = code->tempArrayId[idx];
2228 adjustTempIndex(arrayid, idx, idx2d);
2229 }
2230
2231 getArrayForFile(f, idx2d)->store(sub.cur->values, idx, c, ptr, val);
2232 } else {
2233 assert(!"invalid dst file");
2234 }
2235 }
2236
2237 #define FOR_EACH_DST_ENABLED_CHANNEL(d, chan, inst) \
2238 for (chan = 0; chan < 4; ++chan) \
2239 if (!inst.getDst(d).isMasked(chan))
2240
2241 Value *
buildDot(int dim)2242 Converter::buildDot(int dim)
2243 {
2244 assert(dim > 0);
2245
2246 Value *src0 = fetchSrc(0, 0), *src1 = fetchSrc(1, 0);
2247 Value *dotp = getScratch();
2248
2249 mkOp2(OP_MUL, TYPE_F32, dotp, src0, src1)
2250 ->dnz = info->io.mul_zero_wins;
2251
2252 for (int c = 1; c < dim; ++c) {
2253 src0 = fetchSrc(0, c);
2254 src1 = fetchSrc(1, c);
2255 mkOp3(OP_MAD, TYPE_F32, dotp, src0, src1, dotp)
2256 ->dnz = info->io.mul_zero_wins;
2257 }
2258 return dotp;
2259 }
2260
2261 void
insertConvergenceOps(BasicBlock * conv,BasicBlock * fork)2262 Converter::insertConvergenceOps(BasicBlock *conv, BasicBlock *fork)
2263 {
2264 FlowInstruction *join = new_FlowInstruction(func, OP_JOIN, NULL);
2265 join->fixed = 1;
2266 conv->insertHead(join);
2267
2268 assert(!fork->joinAt);
2269 fork->joinAt = new_FlowInstruction(func, OP_JOINAT, conv);
2270 fork->insertBefore(fork->getExit(), fork->joinAt);
2271 }
2272
2273 void
setTexRS(TexInstruction * tex,unsigned int & s,int R,int S)2274 Converter::setTexRS(TexInstruction *tex, unsigned int& s, int R, int S)
2275 {
2276 unsigned rIdx = 0, sIdx = 0;
2277
2278 if (R >= 0 && tgsi.getSrc(R).getFile() != TGSI_FILE_SAMPLER) {
2279 // This is the bindless case. We have to get the actual value and pass
2280 // it in. This will be the complete handle.
2281 tex->tex.rIndirectSrc = s;
2282 tex->setSrc(s++, fetchSrc(R, 0));
2283 tex->setTexture(tgsi.getTexture(code, R), 0xff, 0x1f);
2284 tex->tex.bindless = true;
2285 return;
2286 }
2287
2288 if (R >= 0)
2289 rIdx = tgsi.getSrc(R).getIndex(0);
2290 if (S >= 0)
2291 sIdx = tgsi.getSrc(S).getIndex(0);
2292
2293 tex->setTexture(tgsi.getTexture(code, R), rIdx, sIdx);
2294
2295 if (tgsi.getSrc(R).isIndirect(0)) {
2296 tex->tex.rIndirectSrc = s;
2297 tex->setSrc(s++, fetchSrc(tgsi.getSrc(R).getIndirect(0), 0, NULL));
2298 }
2299 if (S >= 0 && tgsi.getSrc(S).isIndirect(0)) {
2300 tex->tex.sIndirectSrc = s;
2301 tex->setSrc(s++, fetchSrc(tgsi.getSrc(S).getIndirect(0), 0, NULL));
2302 }
2303 }
2304
2305 void
handleTXQ(Value * dst0[4],enum TexQuery query,int R)2306 Converter::handleTXQ(Value *dst0[4], enum TexQuery query, int R)
2307 {
2308 TexInstruction *tex = new_TexInstruction(func, OP_TXQ);
2309 tex->tex.query = query;
2310 unsigned int c, d;
2311
2312 for (d = 0, c = 0; c < 4; ++c) {
2313 if (!dst0[c])
2314 continue;
2315 tex->tex.mask |= 1 << c;
2316 tex->setDef(d++, dst0[c]);
2317 }
2318 if (query == TXQ_DIMS)
2319 tex->setSrc((c = 0), fetchSrc(0, 0)); // mip level
2320 else
2321 tex->setSrc((c = 0), zero);
2322
2323 setTexRS(tex, ++c, R, -1);
2324
2325 bb->insertTail(tex);
2326 }
2327
2328 void
loadProjTexCoords(Value * dst[4],Value * src[4],unsigned int mask)2329 Converter::loadProjTexCoords(Value *dst[4], Value *src[4], unsigned int mask)
2330 {
2331 Value *proj = fetchSrc(0, 3);
2332 Instruction *insn = proj->getUniqueInsn();
2333 int c;
2334
2335 if (insn->op == OP_PINTERP) {
2336 bb->insertTail(insn = cloneForward(func, insn));
2337 insn->op = OP_LINTERP;
2338 insn->setInterpolate(NV50_IR_INTERP_LINEAR | insn->getSampleMode());
2339 insn->setSrc(1, NULL);
2340 proj = insn->getDef(0);
2341 }
2342 proj = mkOp1v(OP_RCP, TYPE_F32, getSSA(), proj);
2343
2344 for (c = 0; c < 4; ++c) {
2345 if (!(mask & (1 << c)))
2346 continue;
2347 if ((insn = src[c]->getUniqueInsn())->op != OP_PINTERP)
2348 continue;
2349 mask &= ~(1 << c);
2350
2351 bb->insertTail(insn = cloneForward(func, insn));
2352 insn->setInterpolate(NV50_IR_INTERP_PERSPECTIVE | insn->getSampleMode());
2353 insn->setSrc(1, proj);
2354 dst[c] = insn->getDef(0);
2355 }
2356 if (!mask)
2357 return;
2358
2359 proj = mkOp1v(OP_RCP, TYPE_F32, getSSA(), fetchSrc(0, 3));
2360
2361 for (c = 0; c < 4; ++c)
2362 if (mask & (1 << c))
2363 dst[c] = mkOp2v(OP_MUL, TYPE_F32, getSSA(), src[c], proj);
2364 }
2365
2366 // order of nv50 ir sources: x y z layer lod/bias shadow
2367 // order of TGSI TEX sources: x y z layer shadow lod/bias
2368 // lowering will finally set the hw specific order (like array first on nvc0)
2369 void
handleTEX(Value * dst[4],int R,int S,int L,int C,int Dx,int Dy)2370 Converter::handleTEX(Value *dst[4], int R, int S, int L, int C, int Dx, int Dy)
2371 {
2372 Value *arg[4], *src[8];
2373 Value *lod = NULL, *shd = NULL;
2374 unsigned int s, c, d;
2375 TexInstruction *texi = new_TexInstruction(func, tgsi.getOP());
2376
2377 TexInstruction::Target tgt = tgsi.getTexture(code, R);
2378
2379 for (s = 0; s < tgt.getArgCount(); ++s)
2380 arg[s] = src[s] = fetchSrc(0, s);
2381
2382 if (tgsi.getOpcode() == TGSI_OPCODE_TEX_LZ)
2383 lod = loadImm(NULL, 0);
2384 else if (texi->op == OP_TXL || texi->op == OP_TXB)
2385 lod = fetchSrc(L >> 4, L & 3);
2386
2387 if (C == 0x0f)
2388 C = 0x00 | MAX2(tgt.getArgCount(), 2); // guess DC src
2389
2390 if (tgsi.getOpcode() == TGSI_OPCODE_TG4 &&
2391 tgt == TEX_TARGET_CUBE_ARRAY_SHADOW)
2392 shd = fetchSrc(1, 0);
2393 else if (tgt.isShadow())
2394 shd = fetchSrc(C >> 4, C & 3);
2395
2396 if (texi->op == OP_TXD) {
2397 for (c = 0; c < tgt.getDim() + tgt.isCube(); ++c) {
2398 texi->dPdx[c].set(fetchSrc(Dx >> 4, (Dx & 3) + c));
2399 texi->dPdy[c].set(fetchSrc(Dy >> 4, (Dy & 3) + c));
2400 }
2401 }
2402
2403 // cube textures don't care about projection value, it's divided out
2404 if (tgsi.getOpcode() == TGSI_OPCODE_TXP && !tgt.isCube() && !tgt.isArray()) {
2405 unsigned int n = tgt.getDim();
2406 if (shd) {
2407 arg[n] = shd;
2408 ++n;
2409 assert(tgt.getDim() == tgt.getArgCount());
2410 }
2411 loadProjTexCoords(src, arg, (1 << n) - 1);
2412 if (shd)
2413 shd = src[n - 1];
2414 }
2415
2416 for (c = 0, d = 0; c < 4; ++c) {
2417 if (dst[c]) {
2418 texi->setDef(d++, dst[c]);
2419 texi->tex.mask |= 1 << c;
2420 } else {
2421 // NOTE: maybe hook up def too, for CSE
2422 }
2423 }
2424 for (s = 0; s < tgt.getArgCount(); ++s)
2425 texi->setSrc(s, src[s]);
2426 if (lod)
2427 texi->setSrc(s++, lod);
2428 if (shd)
2429 texi->setSrc(s++, shd);
2430
2431 setTexRS(texi, s, R, S);
2432
2433 if (tgsi.getOpcode() == TGSI_OPCODE_SAMPLE_C_LZ)
2434 texi->tex.levelZero = true;
2435 if (prog->getType() != Program::TYPE_FRAGMENT &&
2436 (tgsi.getOpcode() == TGSI_OPCODE_TEX ||
2437 tgsi.getOpcode() == TGSI_OPCODE_TEX2 ||
2438 tgsi.getOpcode() == TGSI_OPCODE_TXP))
2439 texi->tex.levelZero = true;
2440 if (tgsi.getOpcode() == TGSI_OPCODE_TG4 && !tgt.isShadow())
2441 texi->tex.gatherComp = tgsi.getSrc(1).getValueU32(0, info);
2442
2443 texi->tex.useOffsets = tgsi.getNumTexOffsets();
2444 for (s = 0; s < tgsi.getNumTexOffsets(); ++s) {
2445 for (c = 0; c < 3; ++c) {
2446 texi->offset[s][c].set(fetchSrc(tgsi.getTexOffset(s), c, NULL));
2447 texi->offset[s][c].setInsn(texi);
2448 }
2449 }
2450
2451 bb->insertTail(texi);
2452 }
2453
2454 // 1st source: xyz = coordinates, w = lod/sample
2455 // 2nd source: offset
2456 void
handleTXF(Value * dst[4],int R,int L_M)2457 Converter::handleTXF(Value *dst[4], int R, int L_M)
2458 {
2459 TexInstruction *texi = new_TexInstruction(func, tgsi.getOP());
2460 int ms;
2461 unsigned int c, d, s;
2462
2463 texi->tex.target = tgsi.getTexture(code, R);
2464
2465 ms = texi->tex.target.isMS() ? 1 : 0;
2466 texi->tex.levelZero = ms; /* MS textures don't have mip-maps */
2467
2468 for (c = 0, d = 0; c < 4; ++c) {
2469 if (dst[c]) {
2470 texi->setDef(d++, dst[c]);
2471 texi->tex.mask |= 1 << c;
2472 }
2473 }
2474 for (c = 0; c < (texi->tex.target.getArgCount() - ms); ++c)
2475 texi->setSrc(c, fetchSrc(0, c));
2476 if (!ms && tgsi.getOpcode() == TGSI_OPCODE_TXF_LZ)
2477 texi->setSrc(c++, loadImm(NULL, 0));
2478 else
2479 texi->setSrc(c++, fetchSrc(L_M >> 4, L_M & 3)); // lod or ms
2480
2481 setTexRS(texi, c, R, -1);
2482
2483 texi->tex.useOffsets = tgsi.getNumTexOffsets();
2484 for (s = 0; s < tgsi.getNumTexOffsets(); ++s) {
2485 for (c = 0; c < 3; ++c) {
2486 texi->offset[s][c].set(fetchSrc(tgsi.getTexOffset(s), c, NULL));
2487 texi->offset[s][c].setInsn(texi);
2488 }
2489 }
2490
2491 bb->insertTail(texi);
2492 }
2493
2494 void
handleFBFETCH(Value * dst[4])2495 Converter::handleFBFETCH(Value *dst[4])
2496 {
2497 TexInstruction *texi = new_TexInstruction(func, OP_TXF);
2498 unsigned int c, d;
2499
2500 texi->tex.target = TEX_TARGET_2D_MS_ARRAY;
2501 texi->tex.levelZero = 1;
2502 texi->tex.useOffsets = 0;
2503
2504 for (c = 0, d = 0; c < 4; ++c) {
2505 if (dst[c]) {
2506 texi->setDef(d++, dst[c]);
2507 texi->tex.mask |= 1 << c;
2508 }
2509 }
2510
2511 Value *x = mkOp1v(OP_RDSV, TYPE_F32, getScratch(), mkSysVal(SV_POSITION, 0));
2512 Value *y = mkOp1v(OP_RDSV, TYPE_F32, getScratch(), mkSysVal(SV_POSITION, 1));
2513 Value *z = mkOp1v(OP_RDSV, TYPE_U32, getScratch(), mkSysVal(SV_LAYER, 0));
2514 Value *ms = mkOp1v(OP_RDSV, TYPE_U32, getScratch(), mkSysVal(SV_SAMPLE_INDEX, 0));
2515
2516 mkCvt(OP_CVT, TYPE_U32, x, TYPE_F32, x)->rnd = ROUND_Z;
2517 mkCvt(OP_CVT, TYPE_U32, y, TYPE_F32, y)->rnd = ROUND_Z;
2518 texi->setSrc(0, x);
2519 texi->setSrc(1, y);
2520 texi->setSrc(2, z);
2521 texi->setSrc(3, ms);
2522
2523 texi->tex.r = texi->tex.s = -1;
2524
2525 bb->insertTail(texi);
2526 }
2527
2528 void
handleLIT(Value * dst0[4])2529 Converter::handleLIT(Value *dst0[4])
2530 {
2531 Value *val0 = NULL;
2532 unsigned int mask = tgsi.getDst(0).getMask();
2533
2534 if (mask & (1 << 0))
2535 loadImm(dst0[0], 1.0f);
2536
2537 if (mask & (1 << 3))
2538 loadImm(dst0[3], 1.0f);
2539
2540 if (mask & (3 << 1)) {
2541 val0 = getScratch();
2542 mkOp2(OP_MAX, TYPE_F32, val0, fetchSrc(0, 0), zero);
2543 if (mask & (1 << 1))
2544 mkMov(dst0[1], val0);
2545 }
2546
2547 if (mask & (1 << 2)) {
2548 Value *src1 = fetchSrc(0, 1), *src3 = fetchSrc(0, 3);
2549 Value *val1 = getScratch(), *val3 = getScratch();
2550
2551 Value *pos128 = loadImm(NULL, +127.999999f);
2552 Value *neg128 = loadImm(NULL, -127.999999f);
2553
2554 mkOp2(OP_MAX, TYPE_F32, val1, src1, zero);
2555 mkOp2(OP_MAX, TYPE_F32, val3, src3, neg128);
2556 mkOp2(OP_MIN, TYPE_F32, val3, val3, pos128);
2557 mkOp2(OP_POW, TYPE_F32, val3, val1, val3);
2558
2559 mkCmp(OP_SLCT, CC_GT, TYPE_F32, dst0[2], TYPE_F32, val3, zero, val0);
2560 }
2561 }
2562
2563 /* Keep this around for now as reference when adding img support
2564 static inline bool
2565 isResourceSpecial(const int r)
2566 {
2567 return (r == TGSI_RESOURCE_GLOBAL ||
2568 r == TGSI_RESOURCE_LOCAL ||
2569 r == TGSI_RESOURCE_PRIVATE ||
2570 r == TGSI_RESOURCE_INPUT);
2571 }
2572
2573 static inline bool
2574 isResourceRaw(const tgsi::Source *code, const int r)
2575 {
2576 return isResourceSpecial(r) || code->resources[r].raw;
2577 }
2578
2579 static inline nv50_ir::TexTarget
2580 getResourceTarget(const tgsi::Source *code, int r)
2581 {
2582 if (isResourceSpecial(r))
2583 return nv50_ir::TEX_TARGET_BUFFER;
2584 return tgsi::translateTexture(code->resources.at(r).target);
2585 }
2586
2587 Symbol *
2588 Converter::getResourceBase(const int r)
2589 {
2590 Symbol *sym = NULL;
2591
2592 switch (r) {
2593 case TGSI_RESOURCE_GLOBAL:
2594 sym = new_Symbol(prog, nv50_ir::FILE_MEMORY_GLOBAL,
2595 info->io.auxCBSlot);
2596 break;
2597 case TGSI_RESOURCE_LOCAL:
2598 assert(prog->getType() == Program::TYPE_COMPUTE);
2599 sym = mkSymbol(nv50_ir::FILE_MEMORY_SHARED, 0, TYPE_U32,
2600 info->prop.cp.sharedOffset);
2601 break;
2602 case TGSI_RESOURCE_PRIVATE:
2603 sym = mkSymbol(nv50_ir::FILE_MEMORY_LOCAL, 0, TYPE_U32,
2604 info->bin.tlsSpace);
2605 break;
2606 case TGSI_RESOURCE_INPUT:
2607 assert(prog->getType() == Program::TYPE_COMPUTE);
2608 sym = mkSymbol(nv50_ir::FILE_SHADER_INPUT, 0, TYPE_U32,
2609 info->prop.cp.inputOffset);
2610 break;
2611 default:
2612 sym = new_Symbol(prog,
2613 nv50_ir::FILE_MEMORY_GLOBAL, code->resources.at(r).slot);
2614 break;
2615 }
2616 return sym;
2617 }
2618
2619 void
2620 Converter::getResourceCoords(std::vector<Value *> &coords, int r, int s)
2621 {
2622 const int arg =
2623 TexInstruction::Target(getResourceTarget(code, r)).getArgCount();
2624
2625 for (int c = 0; c < arg; ++c)
2626 coords.push_back(fetchSrc(s, c));
2627
2628 // NOTE: TGSI_RESOURCE_GLOBAL needs FILE_GPR; this is an nv50 quirk
2629 if (r == TGSI_RESOURCE_LOCAL ||
2630 r == TGSI_RESOURCE_PRIVATE ||
2631 r == TGSI_RESOURCE_INPUT)
2632 coords[0] = mkOp1v(OP_MOV, TYPE_U32, getScratch(4, FILE_ADDRESS),
2633 coords[0]);
2634 }
2635 */
2636 static inline int
partitionLoadStore(uint8_t comp[2],uint8_t size[2],uint8_t mask)2637 partitionLoadStore(uint8_t comp[2], uint8_t size[2], uint8_t mask)
2638 {
2639 int n = 0;
2640
2641 while (mask) {
2642 if (mask & 1) {
2643 size[n]++;
2644 } else {
2645 if (size[n])
2646 comp[n = 1] = size[0] + 1;
2647 else
2648 comp[n]++;
2649 }
2650 mask >>= 1;
2651 }
2652 if (size[0] == 3) {
2653 n = 1;
2654 size[0] = (comp[0] == 1) ? 1 : 2;
2655 size[1] = 3 - size[0];
2656 comp[1] = comp[0] + size[0];
2657 }
2658 return n + 1;
2659 }
2660
2661 void
getImageCoords(std::vector<Value * > & coords,int s)2662 Converter::getImageCoords(std::vector<Value *> &coords, int s)
2663 {
2664 TexInstruction::Target t =
2665 TexInstruction::Target(tgsi.getImageTarget());
2666 const int arg = t.getDim() + (t.isArray() || t.isCube());
2667
2668 for (int c = 0; c < arg; ++c)
2669 coords.push_back(fetchSrc(s, c));
2670
2671 if (t.isMS())
2672 coords.push_back(fetchSrc(s, 3));
2673 }
2674
2675 // For raw loads, granularity is 4 byte.
2676 // Usage of the texture read mask on OP_SULDP is not allowed.
2677 void
handleLOAD(Value * dst0[4])2678 Converter::handleLOAD(Value *dst0[4])
2679 {
2680 const int r = tgsi.getSrc(0).getIndex(0);
2681 int c;
2682 std::vector<Value *> off, src, ldv, def;
2683 Value *ind = NULL;
2684
2685 if (tgsi.getSrc(0).isIndirect(0))
2686 ind = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0);
2687
2688 switch (tgsi.getSrc(0).getFile()) {
2689 case TGSI_FILE_BUFFER:
2690 case TGSI_FILE_MEMORY:
2691 for (c = 0; c < 4; ++c) {
2692 if (!dst0[c])
2693 continue;
2694
2695 Value *off;
2696 Symbol *sym;
2697 uint32_t src0_component_offset = tgsi.getSrc(0).getSwizzle(c) * 4;
2698
2699 if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE) {
2700 off = NULL;
2701 sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c,
2702 tgsi.getSrc(1).getValueU32(0, info) +
2703 src0_component_offset);
2704 } else {
2705 // yzw are ignored for buffers
2706 off = fetchSrc(1, 0);
2707 sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c,
2708 src0_component_offset);
2709 }
2710
2711 Instruction *ld = mkLoad(TYPE_U32, dst0[c], sym, off);
2712 ld->cache = tgsi.getCacheMode();
2713 if (ind)
2714 ld->setIndirect(0, 1, ind);
2715 }
2716 break;
2717 default: {
2718 getImageCoords(off, 1);
2719 def.resize(4);
2720
2721 for (c = 0; c < 4; ++c) {
2722 if (!dst0[c] || tgsi.getSrc(0).getSwizzle(c) != (TGSI_SWIZZLE_X + c))
2723 def[c] = getScratch();
2724 else
2725 def[c] = dst0[c];
2726 }
2727
2728 bool bindless = tgsi.getSrc(0).getFile() != TGSI_FILE_IMAGE;
2729 if (bindless)
2730 ind = fetchSrc(0, 0);
2731
2732 TexInstruction *ld =
2733 mkTex(OP_SULDP, tgsi.getImageTarget(), 0, 0, def, off);
2734 ld->tex.mask = tgsi.getDst(0).getMask();
2735 ld->tex.format = tgsi.getImageFormat();
2736 ld->cache = tgsi.getCacheMode();
2737 ld->tex.bindless = bindless;
2738 if (!bindless)
2739 ld->tex.r = r;
2740 if (ind)
2741 ld->setIndirectR(ind);
2742
2743 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2744 if (dst0[c] != def[c])
2745 mkMov(dst0[c], def[tgsi.getSrc(0).getSwizzle(c)]);
2746 break;
2747 }
2748 }
2749
2750
2751 /* Keep this around for now as reference when adding img support
2752 getResourceCoords(off, r, 1);
2753
2754 if (isResourceRaw(code, r)) {
2755 uint8_t mask = 0;
2756 uint8_t comp[2] = { 0, 0 };
2757 uint8_t size[2] = { 0, 0 };
2758
2759 Symbol *base = getResourceBase(r);
2760
2761 // determine the base and size of the at most 2 load ops
2762 for (c = 0; c < 4; ++c)
2763 if (!tgsi.getDst(0).isMasked(c))
2764 mask |= 1 << (tgsi.getSrc(0).getSwizzle(c) - TGSI_SWIZZLE_X);
2765
2766 int n = partitionLoadStore(comp, size, mask);
2767
2768 src = off;
2769
2770 def.resize(4); // index by component, the ones we need will be non-NULL
2771 for (c = 0; c < 4; ++c) {
2772 if (dst0[c] && tgsi.getSrc(0).getSwizzle(c) == (TGSI_SWIZZLE_X + c))
2773 def[c] = dst0[c];
2774 else
2775 if (mask & (1 << c))
2776 def[c] = getScratch();
2777 }
2778
2779 const bool useLd = isResourceSpecial(r) ||
2780 (info->io.nv50styleSurfaces &&
2781 code->resources[r].target == TGSI_TEXTURE_BUFFER);
2782
2783 for (int i = 0; i < n; ++i) {
2784 ldv.assign(def.begin() + comp[i], def.begin() + comp[i] + size[i]);
2785
2786 if (comp[i]) // adjust x component of source address if necessary
2787 src[0] = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, off[0]->reg.file),
2788 off[0], mkImm(comp[i] * 4));
2789 else
2790 src[0] = off[0];
2791
2792 if (useLd) {
2793 Instruction *ld =
2794 mkLoad(typeOfSize(size[i] * 4), ldv[0], base, src[0]);
2795 for (size_t c = 1; c < ldv.size(); ++c)
2796 ld->setDef(c, ldv[c]);
2797 } else {
2798 mkTex(OP_SULDB, getResourceTarget(code, r), code->resources[r].slot,
2799 0, ldv, src)->dType = typeOfSize(size[i] * 4);
2800 }
2801 }
2802 } else {
2803 def.resize(4);
2804 for (c = 0; c < 4; ++c) {
2805 if (!dst0[c] || tgsi.getSrc(0).getSwizzle(c) != (TGSI_SWIZZLE_X + c))
2806 def[c] = getScratch();
2807 else
2808 def[c] = dst0[c];
2809 }
2810
2811 mkTex(OP_SULDP, getResourceTarget(code, r), code->resources[r].slot, 0,
2812 def, off);
2813 }
2814 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2815 if (dst0[c] != def[c])
2816 mkMov(dst0[c], def[tgsi.getSrc(0).getSwizzle(c)]);
2817 */
2818 }
2819
2820 // For formatted stores, the write mask on OP_SUSTP can be used.
2821 // Raw stores have to be split.
2822 void
handleSTORE()2823 Converter::handleSTORE()
2824 {
2825 const int r = tgsi.getDst(0).getIndex(0);
2826 int c;
2827 std::vector<Value *> off, src, dummy;
2828 Value *ind = NULL;
2829
2830 if (tgsi.getDst(0).isIndirect(0))
2831 ind = fetchSrc(tgsi.getDst(0).getIndirect(0), 0, 0);
2832
2833 switch (tgsi.getDst(0).getFile()) {
2834 case TGSI_FILE_BUFFER:
2835 case TGSI_FILE_MEMORY:
2836 for (c = 0; c < 4; ++c) {
2837 if (!(tgsi.getDst(0).getMask() & (1 << c)))
2838 continue;
2839
2840 Symbol *sym;
2841 Value *off;
2842 if (tgsi.getSrc(0).getFile() == TGSI_FILE_IMMEDIATE) {
2843 off = NULL;
2844 sym = makeSym(tgsi.getDst(0).getFile(), r, -1, c,
2845 tgsi.getSrc(0).getValueU32(0, info) + 4 * c);
2846 } else {
2847 // yzw are ignored for buffers
2848 off = fetchSrc(0, 0);
2849 sym = makeSym(tgsi.getDst(0).getFile(), r, -1, c, 4 * c);
2850 }
2851
2852 Instruction *st = mkStore(OP_STORE, TYPE_U32, sym, off, fetchSrc(1, c));
2853 st->cache = tgsi.getCacheMode();
2854 if (ind)
2855 st->setIndirect(0, 1, ind);
2856 }
2857 break;
2858 default: {
2859 getImageCoords(off, 0);
2860 src = off;
2861
2862 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2863 src.push_back(fetchSrc(1, c));
2864
2865 bool bindless = tgsi.getDst(0).getFile() != TGSI_FILE_IMAGE;
2866 if (bindless)
2867 ind = fetchDst(0, 0);
2868
2869 TexInstruction *st =
2870 mkTex(OP_SUSTP, tgsi.getImageTarget(), 0, 0, dummy, src);
2871 st->tex.mask = tgsi.getDst(0).getMask();
2872 st->tex.format = tgsi.getImageFormat();
2873 st->cache = tgsi.getCacheMode();
2874 st->tex.bindless = bindless;
2875 if (!bindless)
2876 st->tex.r = r;
2877 if (ind)
2878 st->setIndirectR(ind);
2879
2880 break;
2881 }
2882 }
2883
2884 /* Keep this around for now as reference when adding img support
2885 getResourceCoords(off, r, 0);
2886 src = off;
2887 const int s = src.size();
2888
2889 if (isResourceRaw(code, r)) {
2890 uint8_t comp[2] = { 0, 0 };
2891 uint8_t size[2] = { 0, 0 };
2892
2893 int n = partitionLoadStore(comp, size, tgsi.getDst(0).getMask());
2894
2895 Symbol *base = getResourceBase(r);
2896
2897 const bool useSt = isResourceSpecial(r) ||
2898 (info->io.nv50styleSurfaces &&
2899 code->resources[r].target == TGSI_TEXTURE_BUFFER);
2900
2901 for (int i = 0; i < n; ++i) {
2902 if (comp[i]) // adjust x component of source address if necessary
2903 src[0] = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, off[0]->reg.file),
2904 off[0], mkImm(comp[i] * 4));
2905 else
2906 src[0] = off[0];
2907
2908 const DataType stTy = typeOfSize(size[i] * 4);
2909
2910 if (useSt) {
2911 Instruction *st =
2912 mkStore(OP_STORE, stTy, base, NULL, fetchSrc(1, comp[i]));
2913 for (c = 1; c < size[i]; ++c)
2914 st->setSrc(1 + c, fetchSrc(1, comp[i] + c));
2915 st->setIndirect(0, 0, src[0]);
2916 } else {
2917 // attach values to be stored
2918 src.resize(s + size[i]);
2919 for (c = 0; c < size[i]; ++c)
2920 src[s + c] = fetchSrc(1, comp[i] + c);
2921 mkTex(OP_SUSTB, getResourceTarget(code, r), code->resources[r].slot,
2922 0, dummy, src)->setType(stTy);
2923 }
2924 }
2925 } else {
2926 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2927 src.push_back(fetchSrc(1, c));
2928
2929 mkTex(OP_SUSTP, getResourceTarget(code, r), code->resources[r].slot, 0,
2930 dummy, src)->tex.mask = tgsi.getDst(0).getMask();
2931 }
2932 */
2933 }
2934
2935 // XXX: These only work on resources with the single-component u32/s32 formats.
2936 // Therefore the result is replicated. This might not be intended by TGSI, but
2937 // operating on more than 1 component would produce undefined results because
2938 // they do not exist.
2939 void
handleATOM(Value * dst0[4],DataType ty,uint16_t subOp)2940 Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp)
2941 {
2942 const int r = tgsi.getSrc(0).getIndex(0);
2943 std::vector<Value *> srcv;
2944 std::vector<Value *> defv;
2945 LValue *dst = getScratch();
2946 Value *ind = NULL;
2947
2948 if (tgsi.getSrc(0).isIndirect(0))
2949 ind = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0);
2950
2951 switch (tgsi.getSrc(0).getFile()) {
2952 case TGSI_FILE_BUFFER:
2953 case TGSI_FILE_MEMORY:
2954 for (int c = 0; c < 4; ++c) {
2955 if (!dst0[c])
2956 continue;
2957
2958 Instruction *insn;
2959 Value *off = fetchSrc(1, c);
2960 Value *sym;
2961 if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE)
2962 sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c,
2963 tgsi.getSrc(1).getValueU32(c, info));
2964 else
2965 sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c, 0);
2966 if (subOp == NV50_IR_SUBOP_ATOM_CAS)
2967 insn = mkOp3(OP_ATOM, ty, dst, sym, fetchSrc(2, c), fetchSrc(3, c));
2968 else
2969 insn = mkOp2(OP_ATOM, ty, dst, sym, fetchSrc(2, c));
2970 if (tgsi.getSrc(1).getFile() != TGSI_FILE_IMMEDIATE)
2971 insn->setIndirect(0, 0, off);
2972 if (ind)
2973 insn->setIndirect(0, 1, ind);
2974 insn->subOp = subOp;
2975 }
2976 for (int c = 0; c < 4; ++c)
2977 if (dst0[c])
2978 dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov
2979 break;
2980 default: {
2981 getImageCoords(srcv, 1);
2982 defv.push_back(dst);
2983 srcv.push_back(fetchSrc(2, 0));
2984
2985 if (subOp == NV50_IR_SUBOP_ATOM_CAS)
2986 srcv.push_back(fetchSrc(3, 0));
2987
2988 bool bindless = tgsi.getSrc(0).getFile() != TGSI_FILE_IMAGE;
2989 if (bindless)
2990 ind = fetchSrc(0, 0);
2991
2992 TexInstruction *tex = mkTex(OP_SUREDP, tgsi.getImageTarget(),
2993 0, 0, defv, srcv);
2994 tex->subOp = subOp;
2995 tex->tex.mask = 1;
2996 tex->tex.format = tgsi.getImageFormat();
2997 tex->setType(ty);
2998 tex->tex.bindless = bindless;
2999 if (!bindless)
3000 tex->tex.r = r;
3001 if (ind)
3002 tex->setIndirectR(ind);
3003
3004 for (int c = 0; c < 4; ++c)
3005 if (dst0[c])
3006 dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov
3007 break;
3008 }
3009 }
3010
3011 /* Keep this around for now as reference when adding img support
3012 getResourceCoords(srcv, r, 1);
3013
3014 if (isResourceSpecial(r)) {
3015 assert(r != TGSI_RESOURCE_INPUT);
3016 Instruction *insn;
3017 insn = mkOp2(OP_ATOM, ty, dst, getResourceBase(r), fetchSrc(2, 0));
3018 insn->subOp = subOp;
3019 if (subOp == NV50_IR_SUBOP_ATOM_CAS)
3020 insn->setSrc(2, fetchSrc(3, 0));
3021 insn->setIndirect(0, 0, srcv.at(0));
3022 } else {
3023 operation op = isResourceRaw(code, r) ? OP_SUREDB : OP_SUREDP;
3024 TexTarget targ = getResourceTarget(code, r);
3025 int idx = code->resources[r].slot;
3026 defv.push_back(dst);
3027 srcv.push_back(fetchSrc(2, 0));
3028 if (subOp == NV50_IR_SUBOP_ATOM_CAS)
3029 srcv.push_back(fetchSrc(3, 0));
3030 TexInstruction *tex = mkTex(op, targ, idx, 0, defv, srcv);
3031 tex->subOp = subOp;
3032 tex->tex.mask = 1;
3033 tex->setType(ty);
3034 }
3035
3036 for (int c = 0; c < 4; ++c)
3037 if (dst0[c])
3038 dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov
3039 */
3040 }
3041
3042 void
handleINTERP(Value * dst[4])3043 Converter::handleINTERP(Value *dst[4])
3044 {
3045 // Check whether the input is linear. All other attributes ignored.
3046 Instruction *insn;
3047 Value *offset = NULL, *ptr = NULL, *w = NULL;
3048 Symbol *sym[4] = { NULL };
3049 bool linear;
3050 operation op = OP_NOP;
3051 int c, mode = 0;
3052
3053 tgsi::Instruction::SrcRegister src = tgsi.getSrc(0);
3054
3055 // In some odd cases, in large part due to varying packing, the source
3056 // might not actually be an input. This is illegal TGSI, but it's easier to
3057 // account for it here than it is to fix it where the TGSI is being
3058 // generated. In that case, it's going to be a straight up mov (or sequence
3059 // of mov's) from the input in question. We follow the mov chain to see
3060 // which input we need to use.
3061 if (src.getFile() != TGSI_FILE_INPUT) {
3062 if (src.isIndirect(0)) {
3063 ERROR("Ignoring indirect input interpolation\n");
3064 return;
3065 }
3066 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3067 Value *val = fetchSrc(0, c);
3068 assert(val->defs.size() == 1);
3069 insn = val->getInsn();
3070 while (insn->op == OP_MOV) {
3071 assert(insn->getSrc(0)->defs.size() == 1);
3072 insn = insn->getSrc(0)->getInsn();
3073 if (!insn) {
3074 ERROR("Miscompiling shader due to unhandled INTERP\n");
3075 return;
3076 }
3077 }
3078 if (insn->op != OP_LINTERP && insn->op != OP_PINTERP) {
3079 ERROR("Trying to interpolate non-input, this is not allowed.\n");
3080 return;
3081 }
3082 sym[c] = insn->getSrc(0)->asSym();
3083 assert(sym[c]);
3084 op = insn->op;
3085 mode = insn->ipa;
3086 }
3087 } else {
3088 if (src.isIndirect(0))
3089 ptr = fetchSrc(src.getIndirect(0), 0, NULL);
3090
3091 // We can assume that the fixed index will point to an input of the same
3092 // interpolation type in case of an indirect.
3093 // TODO: Make use of ArrayID.
3094 linear = info->in[src.getIndex(0)].linear;
3095 if (linear) {
3096 op = OP_LINTERP;
3097 mode = NV50_IR_INTERP_LINEAR;
3098 } else {
3099 op = OP_PINTERP;
3100 mode = NV50_IR_INTERP_PERSPECTIVE;
3101 }
3102 }
3103
3104 switch (tgsi.getOpcode()) {
3105 case TGSI_OPCODE_INTERP_CENTROID:
3106 mode |= NV50_IR_INTERP_CENTROID;
3107 break;
3108 case TGSI_OPCODE_INTERP_SAMPLE:
3109 insn = mkOp1(OP_PIXLD, TYPE_U32, (offset = getScratch()), fetchSrc(1, 0));
3110 insn->subOp = NV50_IR_SUBOP_PIXLD_OFFSET;
3111 mode |= NV50_IR_INTERP_OFFSET;
3112 break;
3113 case TGSI_OPCODE_INTERP_OFFSET: {
3114 // The input in src1.xy is float, but we need a single 32-bit value
3115 // where the upper and lower 16 bits are encoded in S0.12 format. We need
3116 // to clamp the input coordinates to (-0.5, 0.4375), multiply by 4096,
3117 // and then convert to s32.
3118 Value *offs[2];
3119 for (c = 0; c < 2; c++) {
3120 offs[c] = getScratch();
3121 mkOp2(OP_MIN, TYPE_F32, offs[c], fetchSrc(1, c), loadImm(NULL, 0.4375f));
3122 mkOp2(OP_MAX, TYPE_F32, offs[c], offs[c], loadImm(NULL, -0.5f));
3123 mkOp2(OP_MUL, TYPE_F32, offs[c], offs[c], loadImm(NULL, 4096.0f));
3124 mkCvt(OP_CVT, TYPE_S32, offs[c], TYPE_F32, offs[c]);
3125 }
3126 offset = mkOp3v(OP_INSBF, TYPE_U32, getScratch(),
3127 offs[1], mkImm(0x1010), offs[0]);
3128 mode |= NV50_IR_INTERP_OFFSET;
3129 break;
3130 }
3131 }
3132
3133 if (op == OP_PINTERP) {
3134 if (offset) {
3135 w = mkOp2v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 3), offset);
3136 mkOp1(OP_RCP, TYPE_F32, w, w);
3137 } else {
3138 w = fragCoord[3];
3139 }
3140 }
3141
3142
3143 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3144 insn = mkOp1(op, TYPE_F32, dst[c], sym[c] ? sym[c] : srcToSym(src, c));
3145 if (op == OP_PINTERP)
3146 insn->setSrc(1, w);
3147 if (ptr)
3148 insn->setIndirect(0, 0, ptr);
3149 if (offset)
3150 insn->setSrc(op == OP_PINTERP ? 2 : 1, offset);
3151
3152 insn->setInterpolate(mode);
3153 }
3154 }
3155
3156 Converter::Subroutine *
getSubroutine(unsigned ip)3157 Converter::getSubroutine(unsigned ip)
3158 {
3159 std::map<unsigned, Subroutine>::iterator it = sub.map.find(ip);
3160
3161 if (it == sub.map.end())
3162 it = sub.map.insert(std::make_pair(
3163 ip, Subroutine(new Function(prog, "SUB", ip)))).first;
3164
3165 return &it->second;
3166 }
3167
3168 Converter::Subroutine *
getSubroutine(Function * f)3169 Converter::getSubroutine(Function *f)
3170 {
3171 unsigned ip = f->getLabel();
3172 std::map<unsigned, Subroutine>::iterator it = sub.map.find(ip);
3173
3174 if (it == sub.map.end())
3175 it = sub.map.insert(std::make_pair(ip, Subroutine(f))).first;
3176
3177 return &it->second;
3178 }
3179
3180 bool
isEndOfSubroutine(uint ip)3181 Converter::isEndOfSubroutine(uint ip)
3182 {
3183 assert(ip < code->scan.num_instructions);
3184 tgsi::Instruction insn(&code->insns[ip]);
3185 return (insn.getOpcode() == TGSI_OPCODE_END ||
3186 insn.getOpcode() == TGSI_OPCODE_ENDSUB ||
3187 // does END occur at end of main or the very end ?
3188 insn.getOpcode() == TGSI_OPCODE_BGNSUB);
3189 }
3190
3191 bool
handleInstruction(const struct tgsi_full_instruction * insn)3192 Converter::handleInstruction(const struct tgsi_full_instruction *insn)
3193 {
3194 Instruction *geni;
3195
3196 Value *dst0[4], *rDst0[4];
3197 Value *src0, *src1, *src2, *src3;
3198 Value *val0, *val1;
3199 int c;
3200
3201 tgsi = tgsi::Instruction(insn);
3202
3203 bool useScratchDst = tgsi.checkDstSrcAliasing();
3204
3205 operation op = tgsi.getOP();
3206 dstTy = tgsi.inferDstType();
3207 srcTy = tgsi.inferSrcType();
3208
3209 unsigned int mask = tgsi.dstCount() ? tgsi.getDst(0).getMask() : 0;
3210
3211 if (tgsi.dstCount() && tgsi.getOpcode() != TGSI_OPCODE_STORE) {
3212 for (c = 0; c < 4; ++c) {
3213 rDst0[c] = acquireDst(0, c);
3214 dst0[c] = (useScratchDst && rDst0[c]) ? getScratch() : rDst0[c];
3215 }
3216 }
3217
3218 switch (tgsi.getOpcode()) {
3219 case TGSI_OPCODE_ADD:
3220 case TGSI_OPCODE_UADD:
3221 case TGSI_OPCODE_AND:
3222 case TGSI_OPCODE_DIV:
3223 case TGSI_OPCODE_IDIV:
3224 case TGSI_OPCODE_UDIV:
3225 case TGSI_OPCODE_MAX:
3226 case TGSI_OPCODE_MIN:
3227 case TGSI_OPCODE_IMAX:
3228 case TGSI_OPCODE_IMIN:
3229 case TGSI_OPCODE_UMAX:
3230 case TGSI_OPCODE_UMIN:
3231 case TGSI_OPCODE_MOD:
3232 case TGSI_OPCODE_UMOD:
3233 case TGSI_OPCODE_MUL:
3234 case TGSI_OPCODE_UMUL:
3235 case TGSI_OPCODE_IMUL_HI:
3236 case TGSI_OPCODE_UMUL_HI:
3237 case TGSI_OPCODE_OR:
3238 case TGSI_OPCODE_SHL:
3239 case TGSI_OPCODE_ISHR:
3240 case TGSI_OPCODE_USHR:
3241 case TGSI_OPCODE_XOR:
3242 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3243 src0 = fetchSrc(0, c);
3244 src1 = fetchSrc(1, c);
3245 geni = mkOp2(op, dstTy, dst0[c], src0, src1);
3246 geni->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode());
3247 if (op == OP_MUL && dstTy == TYPE_F32)
3248 geni->dnz = info->io.mul_zero_wins;
3249 geni->precise = insn->Instruction.Precise;
3250 }
3251 break;
3252 case TGSI_OPCODE_MAD:
3253 case TGSI_OPCODE_UMAD:
3254 case TGSI_OPCODE_FMA:
3255 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3256 src0 = fetchSrc(0, c);
3257 src1 = fetchSrc(1, c);
3258 src2 = fetchSrc(2, c);
3259 geni = mkOp3(op, dstTy, dst0[c], src0, src1, src2);
3260 if (dstTy == TYPE_F32)
3261 geni->dnz = info->io.mul_zero_wins;
3262 geni->precise = insn->Instruction.Precise;
3263 }
3264 break;
3265 case TGSI_OPCODE_MOV:
3266 case TGSI_OPCODE_CEIL:
3267 case TGSI_OPCODE_FLR:
3268 case TGSI_OPCODE_TRUNC:
3269 case TGSI_OPCODE_RCP:
3270 case TGSI_OPCODE_SQRT:
3271 case TGSI_OPCODE_IABS:
3272 case TGSI_OPCODE_INEG:
3273 case TGSI_OPCODE_NOT:
3274 case TGSI_OPCODE_DDX:
3275 case TGSI_OPCODE_DDY:
3276 case TGSI_OPCODE_DDX_FINE:
3277 case TGSI_OPCODE_DDY_FINE:
3278 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3279 mkOp1(op, dstTy, dst0[c], fetchSrc(0, c));
3280 break;
3281 case TGSI_OPCODE_RSQ:
3282 src0 = fetchSrc(0, 0);
3283 val0 = getScratch();
3284 mkOp1(OP_ABS, TYPE_F32, val0, src0);
3285 mkOp1(OP_RSQ, TYPE_F32, val0, val0);
3286 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3287 mkMov(dst0[c], val0);
3288 break;
3289 case TGSI_OPCODE_ARL:
3290 case TGSI_OPCODE_ARR:
3291 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3292 const RoundMode rnd =
3293 tgsi.getOpcode() == TGSI_OPCODE_ARR ? ROUND_N : ROUND_M;
3294 src0 = fetchSrc(0, c);
3295 mkCvt(OP_CVT, TYPE_S32, dst0[c], TYPE_F32, src0)->rnd = rnd;
3296 }
3297 break;
3298 case TGSI_OPCODE_UARL:
3299 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3300 mkOp1(OP_MOV, TYPE_U32, dst0[c], fetchSrc(0, c));
3301 break;
3302 case TGSI_OPCODE_POW:
3303 val0 = mkOp2v(op, TYPE_F32, getScratch(), fetchSrc(0, 0), fetchSrc(1, 0));
3304 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3305 mkOp1(OP_MOV, TYPE_F32, dst0[c], val0);
3306 break;
3307 case TGSI_OPCODE_EX2:
3308 case TGSI_OPCODE_LG2:
3309 val0 = mkOp1(op, TYPE_F32, getScratch(), fetchSrc(0, 0))->getDef(0);
3310 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3311 mkOp1(OP_MOV, TYPE_F32, dst0[c], val0);
3312 break;
3313 case TGSI_OPCODE_COS:
3314 case TGSI_OPCODE_SIN:
3315 val0 = getScratch();
3316 if (mask & 7) {
3317 mkOp1(OP_PRESIN, TYPE_F32, val0, fetchSrc(0, 0));
3318 mkOp1(op, TYPE_F32, val0, val0);
3319 for (c = 0; c < 3; ++c)
3320 if (dst0[c])
3321 mkMov(dst0[c], val0);
3322 }
3323 if (dst0[3]) {
3324 mkOp1(OP_PRESIN, TYPE_F32, val0, fetchSrc(0, 3));
3325 mkOp1(op, TYPE_F32, dst0[3], val0);
3326 }
3327 break;
3328 case TGSI_OPCODE_EXP:
3329 src0 = fetchSrc(0, 0);
3330 val0 = mkOp1v(OP_FLOOR, TYPE_F32, getSSA(), src0);
3331 if (dst0[1])
3332 mkOp2(OP_SUB, TYPE_F32, dst0[1], src0, val0);
3333 if (dst0[0])
3334 mkOp1(OP_EX2, TYPE_F32, dst0[0], val0);
3335 if (dst0[2])
3336 mkOp1(OP_EX2, TYPE_F32, dst0[2], src0);
3337 if (dst0[3])
3338 loadImm(dst0[3], 1.0f);
3339 break;
3340 case TGSI_OPCODE_LOG:
3341 src0 = mkOp1v(OP_ABS, TYPE_F32, getSSA(), fetchSrc(0, 0));
3342 val0 = mkOp1v(OP_LG2, TYPE_F32, dst0[2] ? dst0[2] : getSSA(), src0);
3343 if (dst0[0] || dst0[1])
3344 val1 = mkOp1v(OP_FLOOR, TYPE_F32, dst0[0] ? dst0[0] : getSSA(), val0);
3345 if (dst0[1]) {
3346 mkOp1(OP_EX2, TYPE_F32, dst0[1], val1);
3347 mkOp1(OP_RCP, TYPE_F32, dst0[1], dst0[1]);
3348 mkOp2(OP_MUL, TYPE_F32, dst0[1], dst0[1], src0)
3349 ->dnz = info->io.mul_zero_wins;
3350 }
3351 if (dst0[3])
3352 loadImm(dst0[3], 1.0f);
3353 break;
3354 case TGSI_OPCODE_DP2:
3355 val0 = buildDot(2);
3356 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3357 mkMov(dst0[c], val0);
3358 break;
3359 case TGSI_OPCODE_DP3:
3360 val0 = buildDot(3);
3361 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3362 mkMov(dst0[c], val0);
3363 break;
3364 case TGSI_OPCODE_DP4:
3365 val0 = buildDot(4);
3366 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3367 mkMov(dst0[c], val0);
3368 break;
3369 case TGSI_OPCODE_DST:
3370 if (dst0[0])
3371 loadImm(dst0[0], 1.0f);
3372 if (dst0[1]) {
3373 src0 = fetchSrc(0, 1);
3374 src1 = fetchSrc(1, 1);
3375 mkOp2(OP_MUL, TYPE_F32, dst0[1], src0, src1)
3376 ->dnz = info->io.mul_zero_wins;
3377 }
3378 if (dst0[2])
3379 mkMov(dst0[2], fetchSrc(0, 2));
3380 if (dst0[3])
3381 mkMov(dst0[3], fetchSrc(1, 3));
3382 break;
3383 case TGSI_OPCODE_LRP:
3384 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3385 src0 = fetchSrc(0, c);
3386 src1 = fetchSrc(1, c);
3387 src2 = fetchSrc(2, c);
3388 mkOp3(OP_MAD, TYPE_F32, dst0[c],
3389 mkOp2v(OP_SUB, TYPE_F32, getSSA(), src1, src2), src0, src2)
3390 ->dnz = info->io.mul_zero_wins;
3391 }
3392 break;
3393 case TGSI_OPCODE_LIT:
3394 handleLIT(dst0);
3395 break;
3396 case TGSI_OPCODE_ISSG:
3397 case TGSI_OPCODE_SSG:
3398 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3399 src0 = fetchSrc(0, c);
3400 val0 = getScratch();
3401 val1 = getScratch();
3402 mkCmp(OP_SET, CC_GT, srcTy, val0, srcTy, src0, zero);
3403 mkCmp(OP_SET, CC_LT, srcTy, val1, srcTy, src0, zero);
3404 if (srcTy == TYPE_F32)
3405 mkOp2(OP_SUB, TYPE_F32, dst0[c], val0, val1);
3406 else
3407 mkOp2(OP_SUB, TYPE_S32, dst0[c], val1, val0);
3408 }
3409 break;
3410 case TGSI_OPCODE_UCMP:
3411 srcTy = TYPE_U32;
3412 /* fallthrough */
3413 case TGSI_OPCODE_CMP:
3414 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3415 src0 = fetchSrc(0, c);
3416 src1 = fetchSrc(1, c);
3417 src2 = fetchSrc(2, c);
3418 if (src1 == src2)
3419 mkMov(dst0[c], src1);
3420 else
3421 mkCmp(OP_SLCT, (srcTy == TYPE_F32) ? CC_LT : CC_NE,
3422 srcTy, dst0[c], srcTy, src1, src2, src0);
3423 }
3424 break;
3425 case TGSI_OPCODE_FRC:
3426 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3427 src0 = fetchSrc(0, c);
3428 val0 = getScratch();
3429 mkOp1(OP_FLOOR, TYPE_F32, val0, src0);
3430 mkOp2(OP_SUB, TYPE_F32, dst0[c], src0, val0);
3431 }
3432 break;
3433 case TGSI_OPCODE_ROUND:
3434 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3435 mkCvt(OP_CVT, TYPE_F32, dst0[c], TYPE_F32, fetchSrc(0, c))
3436 ->rnd = ROUND_NI;
3437 break;
3438 case TGSI_OPCODE_SLT:
3439 case TGSI_OPCODE_SGE:
3440 case TGSI_OPCODE_SEQ:
3441 case TGSI_OPCODE_SGT:
3442 case TGSI_OPCODE_SLE:
3443 case TGSI_OPCODE_SNE:
3444 case TGSI_OPCODE_FSEQ:
3445 case TGSI_OPCODE_FSGE:
3446 case TGSI_OPCODE_FSLT:
3447 case TGSI_OPCODE_FSNE:
3448 case TGSI_OPCODE_ISGE:
3449 case TGSI_OPCODE_ISLT:
3450 case TGSI_OPCODE_USEQ:
3451 case TGSI_OPCODE_USGE:
3452 case TGSI_OPCODE_USLT:
3453 case TGSI_OPCODE_USNE:
3454 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3455 src0 = fetchSrc(0, c);
3456 src1 = fetchSrc(1, c);
3457 mkCmp(op, tgsi.getSetCond(), dstTy, dst0[c], srcTy, src0, src1);
3458 }
3459 break;
3460 case TGSI_OPCODE_VOTE_ALL:
3461 case TGSI_OPCODE_VOTE_ANY:
3462 case TGSI_OPCODE_VOTE_EQ:
3463 val0 = new_LValue(func, FILE_PREDICATE);
3464 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3465 mkCmp(OP_SET, CC_NE, TYPE_U32, val0, TYPE_U32, fetchSrc(0, c), zero);
3466 mkOp1(op, dstTy, val0, val0)
3467 ->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode());
3468 mkCvt(OP_CVT, TYPE_U32, dst0[c], TYPE_U8, val0);
3469 }
3470 break;
3471 case TGSI_OPCODE_BALLOT:
3472 if (!tgsi.getDst(0).isMasked(0)) {
3473 val0 = new_LValue(func, FILE_PREDICATE);
3474 mkCmp(OP_SET, CC_NE, TYPE_U32, val0, TYPE_U32, fetchSrc(0, 0), zero);
3475 mkOp1(op, TYPE_U32, dst0[0], val0)->subOp = NV50_IR_SUBOP_VOTE_ANY;
3476 }
3477 if (!tgsi.getDst(0).isMasked(1))
3478 mkMov(dst0[1], zero, TYPE_U32);
3479 break;
3480 case TGSI_OPCODE_READ_FIRST:
3481 // ReadFirstInvocationARB(src) is implemented as
3482 // ReadInvocationARB(src, findLSB(ballot(true)))
3483 val0 = getScratch();
3484 mkOp1(OP_VOTE, TYPE_U32, val0, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY;
3485 mkOp2(OP_EXTBF, TYPE_U32, val0, val0, mkImm(0x2000))
3486 ->subOp = NV50_IR_SUBOP_EXTBF_REV;
3487 mkOp1(OP_BFIND, TYPE_U32, val0, val0)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
3488 src1 = val0;
3489 /* fallthrough */
3490 case TGSI_OPCODE_READ_INVOC:
3491 if (tgsi.getOpcode() == TGSI_OPCODE_READ_INVOC)
3492 src1 = fetchSrc(1, 0);
3493 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3494 geni = mkOp3(op, dstTy, dst0[c], fetchSrc(0, c), src1, mkImm(0x1f));
3495 geni->subOp = NV50_IR_SUBOP_SHFL_IDX;
3496 }
3497 break;
3498 case TGSI_OPCODE_CLOCK:
3499 // Stick the 32-bit clock into the high dword of the logical result.
3500 if (!tgsi.getDst(0).isMasked(0))
3501 mkOp1(OP_MOV, TYPE_U32, dst0[0], zero);
3502 if (!tgsi.getDst(0).isMasked(1))
3503 mkOp1(OP_RDSV, TYPE_U32, dst0[1], mkSysVal(SV_CLOCK, 0))->fixed = 1;
3504 break;
3505 case TGSI_OPCODE_KILL_IF:
3506 val0 = new_LValue(func, FILE_PREDICATE);
3507 mask = 0;
3508 for (c = 0; c < 4; ++c) {
3509 const int s = tgsi.getSrc(0).getSwizzle(c);
3510 if (mask & (1 << s))
3511 continue;
3512 mask |= 1 << s;
3513 mkCmp(OP_SET, CC_LT, TYPE_F32, val0, TYPE_F32, fetchSrc(0, c), zero);
3514 mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, val0);
3515 }
3516 break;
3517 case TGSI_OPCODE_KILL:
3518 mkOp(OP_DISCARD, TYPE_NONE, NULL);
3519 break;
3520 case TGSI_OPCODE_TEX:
3521 case TGSI_OPCODE_TEX_LZ:
3522 case TGSI_OPCODE_TXB:
3523 case TGSI_OPCODE_TXL:
3524 case TGSI_OPCODE_TXP:
3525 case TGSI_OPCODE_LODQ:
3526 // R S L C Dx Dy
3527 handleTEX(dst0, 1, 1, 0x03, 0x0f, 0x00, 0x00);
3528 break;
3529 case TGSI_OPCODE_TXD:
3530 handleTEX(dst0, 3, 3, 0x03, 0x0f, 0x10, 0x20);
3531 break;
3532 case TGSI_OPCODE_TG4:
3533 handleTEX(dst0, 2, 2, 0x03, 0x0f, 0x00, 0x00);
3534 break;
3535 case TGSI_OPCODE_TEX2:
3536 handleTEX(dst0, 2, 2, 0x03, 0x10, 0x00, 0x00);
3537 break;
3538 case TGSI_OPCODE_TXB2:
3539 case TGSI_OPCODE_TXL2:
3540 handleTEX(dst0, 2, 2, 0x10, 0x0f, 0x00, 0x00);
3541 break;
3542 case TGSI_OPCODE_SAMPLE:
3543 case TGSI_OPCODE_SAMPLE_B:
3544 case TGSI_OPCODE_SAMPLE_D:
3545 case TGSI_OPCODE_SAMPLE_L:
3546 case TGSI_OPCODE_SAMPLE_C:
3547 case TGSI_OPCODE_SAMPLE_C_LZ:
3548 handleTEX(dst0, 1, 2, 0x30, 0x30, 0x30, 0x40);
3549 break;
3550 case TGSI_OPCODE_TXF_LZ:
3551 case TGSI_OPCODE_TXF:
3552 handleTXF(dst0, 1, 0x03);
3553 break;
3554 case TGSI_OPCODE_SAMPLE_I:
3555 handleTXF(dst0, 1, 0x03);
3556 break;
3557 case TGSI_OPCODE_SAMPLE_I_MS:
3558 handleTXF(dst0, 1, 0x20);
3559 break;
3560 case TGSI_OPCODE_TXQ:
3561 case TGSI_OPCODE_SVIEWINFO:
3562 handleTXQ(dst0, TXQ_DIMS, 1);
3563 break;
3564 case TGSI_OPCODE_TXQS:
3565 // The TXQ_TYPE query returns samples in its 3rd arg, but we need it to
3566 // be in .x
3567 dst0[1] = dst0[2] = dst0[3] = NULL;
3568 std::swap(dst0[0], dst0[2]);
3569 handleTXQ(dst0, TXQ_TYPE, 0);
3570 std::swap(dst0[0], dst0[2]);
3571 break;
3572 case TGSI_OPCODE_FBFETCH:
3573 handleFBFETCH(dst0);
3574 break;
3575 case TGSI_OPCODE_F2I:
3576 case TGSI_OPCODE_F2U:
3577 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3578 mkCvt(OP_CVT, dstTy, dst0[c], srcTy, fetchSrc(0, c))->rnd = ROUND_Z;
3579 break;
3580 case TGSI_OPCODE_I2F:
3581 case TGSI_OPCODE_U2F:
3582 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3583 mkCvt(OP_CVT, dstTy, dst0[c], srcTy, fetchSrc(0, c));
3584 break;
3585 case TGSI_OPCODE_PK2H:
3586 val0 = getScratch();
3587 val1 = getScratch();
3588 mkCvt(OP_CVT, TYPE_F16, val0, TYPE_F32, fetchSrc(0, 0));
3589 mkCvt(OP_CVT, TYPE_F16, val1, TYPE_F32, fetchSrc(0, 1));
3590 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3591 mkOp3(OP_INSBF, TYPE_U32, dst0[c], val1, mkImm(0x1010), val0);
3592 break;
3593 case TGSI_OPCODE_UP2H:
3594 src0 = fetchSrc(0, 0);
3595 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3596 geni = mkCvt(OP_CVT, TYPE_F32, dst0[c], TYPE_F16, src0);
3597 geni->subOp = c & 1;
3598 }
3599 break;
3600 case TGSI_OPCODE_EMIT:
3601 /* export the saved viewport index */
3602 if (viewport != NULL) {
3603 Symbol *vpSym = mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_U32,
3604 info->out[info->io.viewportId].slot[0] * 4);
3605 mkStore(OP_EXPORT, TYPE_U32, vpSym, NULL, viewport);
3606 }
3607 /* fallthrough */
3608 case TGSI_OPCODE_ENDPRIM:
3609 {
3610 // get vertex stream (must be immediate)
3611 unsigned int stream = tgsi.getSrc(0).getValueU32(0, info);
3612 if (stream && op == OP_RESTART)
3613 break;
3614 if (info->prop.gp.maxVertices == 0)
3615 break;
3616 src0 = mkImm(stream);
3617 mkOp1(op, TYPE_U32, NULL, src0)->fixed = 1;
3618 break;
3619 }
3620 case TGSI_OPCODE_IF:
3621 case TGSI_OPCODE_UIF:
3622 {
3623 BasicBlock *ifBB = new BasicBlock(func);
3624
3625 bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE);
3626 condBBs.push(bb);
3627 joinBBs.push(bb);
3628
3629 mkFlow(OP_BRA, NULL, CC_NOT_P, fetchSrc(0, 0))->setType(srcTy);
3630
3631 setPosition(ifBB, true);
3632 }
3633 break;
3634 case TGSI_OPCODE_ELSE:
3635 {
3636 BasicBlock *elseBB = new BasicBlock(func);
3637 BasicBlock *forkBB = reinterpret_cast<BasicBlock *>(condBBs.pop().u.p);
3638
3639 forkBB->cfg.attach(&elseBB->cfg, Graph::Edge::TREE);
3640 condBBs.push(bb);
3641
3642 forkBB->getExit()->asFlow()->target.bb = elseBB;
3643 if (!bb->isTerminated())
3644 mkFlow(OP_BRA, NULL, CC_ALWAYS, NULL);
3645
3646 setPosition(elseBB, true);
3647 }
3648 break;
3649 case TGSI_OPCODE_ENDIF:
3650 {
3651 BasicBlock *convBB = new BasicBlock(func);
3652 BasicBlock *prevBB = reinterpret_cast<BasicBlock *>(condBBs.pop().u.p);
3653 BasicBlock *forkBB = reinterpret_cast<BasicBlock *>(joinBBs.pop().u.p);
3654
3655 if (!bb->isTerminated()) {
3656 // we only want join if none of the clauses ended with CONT/BREAK/RET
3657 if (prevBB->getExit()->op == OP_BRA && joinBBs.getSize() < 6)
3658 insertConvergenceOps(convBB, forkBB);
3659 mkFlow(OP_BRA, convBB, CC_ALWAYS, NULL);
3660 bb->cfg.attach(&convBB->cfg, Graph::Edge::FORWARD);
3661 }
3662
3663 if (prevBB->getExit()->op == OP_BRA) {
3664 prevBB->cfg.attach(&convBB->cfg, Graph::Edge::FORWARD);
3665 prevBB->getExit()->asFlow()->target.bb = convBB;
3666 }
3667 setPosition(convBB, true);
3668 }
3669 break;
3670 case TGSI_OPCODE_BGNLOOP:
3671 {
3672 BasicBlock *lbgnBB = new BasicBlock(func);
3673 BasicBlock *lbrkBB = new BasicBlock(func);
3674
3675 loopBBs.push(lbgnBB);
3676 breakBBs.push(lbrkBB);
3677 if (loopBBs.getSize() > func->loopNestingBound)
3678 func->loopNestingBound++;
3679
3680 mkFlow(OP_PREBREAK, lbrkBB, CC_ALWAYS, NULL);
3681
3682 bb->cfg.attach(&lbgnBB->cfg, Graph::Edge::TREE);
3683 setPosition(lbgnBB, true);
3684 mkFlow(OP_PRECONT, lbgnBB, CC_ALWAYS, NULL);
3685 }
3686 break;
3687 case TGSI_OPCODE_ENDLOOP:
3688 {
3689 BasicBlock *loopBB = reinterpret_cast<BasicBlock *>(loopBBs.pop().u.p);
3690
3691 if (!bb->isTerminated()) {
3692 mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);
3693 bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
3694 }
3695 setPosition(reinterpret_cast<BasicBlock *>(breakBBs.pop().u.p), true);
3696
3697 // If the loop never breaks (e.g. only has RET's inside), then there
3698 // will be no way to get to the break bb. However BGNLOOP will have
3699 // already made a PREBREAK to it, so it must be in the CFG.
3700 if (getBB()->cfg.incidentCount() == 0)
3701 loopBB->cfg.attach(&getBB()->cfg, Graph::Edge::TREE);
3702 }
3703 break;
3704 case TGSI_OPCODE_BRK:
3705 {
3706 if (bb->isTerminated())
3707 break;
3708 BasicBlock *brkBB = reinterpret_cast<BasicBlock *>(breakBBs.peek().u.p);
3709 mkFlow(OP_BREAK, brkBB, CC_ALWAYS, NULL);
3710 bb->cfg.attach(&brkBB->cfg, Graph::Edge::CROSS);
3711 }
3712 break;
3713 case TGSI_OPCODE_CONT:
3714 {
3715 if (bb->isTerminated())
3716 break;
3717 BasicBlock *contBB = reinterpret_cast<BasicBlock *>(loopBBs.peek().u.p);
3718 mkFlow(OP_CONT, contBB, CC_ALWAYS, NULL);
3719 contBB->explicitCont = true;
3720 bb->cfg.attach(&contBB->cfg, Graph::Edge::BACK);
3721 }
3722 break;
3723 case TGSI_OPCODE_BGNSUB:
3724 {
3725 Subroutine *s = getSubroutine(ip);
3726 BasicBlock *entry = new BasicBlock(s->f);
3727 BasicBlock *leave = new BasicBlock(s->f);
3728
3729 // multiple entrypoints possible, keep the graph connected
3730 if (prog->getType() == Program::TYPE_COMPUTE)
3731 prog->main->call.attach(&s->f->call, Graph::Edge::TREE);
3732
3733 sub.cur = s;
3734 s->f->setEntry(entry);
3735 s->f->setExit(leave);
3736 setPosition(entry, true);
3737 return true;
3738 }
3739 case TGSI_OPCODE_ENDSUB:
3740 {
3741 sub.cur = getSubroutine(prog->main);
3742 setPosition(BasicBlock::get(sub.cur->f->cfg.getRoot()), true);
3743 return true;
3744 }
3745 case TGSI_OPCODE_CAL:
3746 {
3747 Subroutine *s = getSubroutine(tgsi.getLabel());
3748 mkFlow(OP_CALL, s->f, CC_ALWAYS, NULL);
3749 func->call.attach(&s->f->call, Graph::Edge::TREE);
3750 return true;
3751 }
3752 case TGSI_OPCODE_RET:
3753 {
3754 if (bb->isTerminated())
3755 return true;
3756 BasicBlock *leave = BasicBlock::get(func->cfgExit);
3757
3758 if (!isEndOfSubroutine(ip + 1)) {
3759 // insert a PRERET at the entry if this is an early return
3760 // (only needed for sharing code in the epilogue)
3761 BasicBlock *root = BasicBlock::get(func->cfg.getRoot());
3762 if (root->getEntry() == NULL || root->getEntry()->op != OP_PRERET) {
3763 BasicBlock *pos = getBB();
3764 setPosition(root, false);
3765 mkFlow(OP_PRERET, leave, CC_ALWAYS, NULL)->fixed = 1;
3766 setPosition(pos, true);
3767 }
3768 }
3769 mkFlow(OP_RET, NULL, CC_ALWAYS, NULL)->fixed = 1;
3770 bb->cfg.attach(&leave->cfg, Graph::Edge::CROSS);
3771 }
3772 break;
3773 case TGSI_OPCODE_END:
3774 {
3775 // attach and generate epilogue code
3776 BasicBlock *epilogue = BasicBlock::get(func->cfgExit);
3777 bb->cfg.attach(&epilogue->cfg, Graph::Edge::TREE);
3778 setPosition(epilogue, true);
3779 if (prog->getType() == Program::TYPE_FRAGMENT)
3780 exportOutputs();
3781 if (info->io.genUserClip > 0)
3782 handleUserClipPlanes();
3783 mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
3784 }
3785 break;
3786 case TGSI_OPCODE_SWITCH:
3787 case TGSI_OPCODE_CASE:
3788 ERROR("switch/case opcode encountered, should have been lowered\n");
3789 abort();
3790 break;
3791 case TGSI_OPCODE_LOAD:
3792 handleLOAD(dst0);
3793 break;
3794 case TGSI_OPCODE_STORE:
3795 handleSTORE();
3796 break;
3797 case TGSI_OPCODE_BARRIER:
3798 geni = mkOp2(OP_BAR, TYPE_U32, NULL, mkImm(0), mkImm(0));
3799 geni->fixed = 1;
3800 geni->subOp = NV50_IR_SUBOP_BAR_SYNC;
3801 break;
3802 case TGSI_OPCODE_MEMBAR:
3803 {
3804 uint32_t level = tgsi.getSrc(0).getValueU32(0, info);
3805 geni = mkOp(OP_MEMBAR, TYPE_NONE, NULL);
3806 geni->fixed = 1;
3807 if (!(level & ~(TGSI_MEMBAR_THREAD_GROUP | TGSI_MEMBAR_SHARED)))
3808 geni->subOp = NV50_IR_SUBOP_MEMBAR(M, CTA);
3809 else
3810 geni->subOp = NV50_IR_SUBOP_MEMBAR(M, GL);
3811 }
3812 break;
3813 case TGSI_OPCODE_ATOMUADD:
3814 case TGSI_OPCODE_ATOMXCHG:
3815 case TGSI_OPCODE_ATOMCAS:
3816 case TGSI_OPCODE_ATOMAND:
3817 case TGSI_OPCODE_ATOMOR:
3818 case TGSI_OPCODE_ATOMXOR:
3819 case TGSI_OPCODE_ATOMUMIN:
3820 case TGSI_OPCODE_ATOMIMIN:
3821 case TGSI_OPCODE_ATOMUMAX:
3822 case TGSI_OPCODE_ATOMIMAX:
3823 handleATOM(dst0, dstTy, tgsi::opcodeToSubOp(tgsi.getOpcode()));
3824 break;
3825 case TGSI_OPCODE_RESQ:
3826 if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER) {
3827 Value *ind = NULL;
3828 if (tgsi.getSrc(0).isIndirect(0))
3829 ind = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0);
3830 geni = mkOp1(OP_BUFQ, TYPE_U32, dst0[0],
3831 makeSym(tgsi.getSrc(0).getFile(),
3832 tgsi.getSrc(0).getIndex(0), -1, 0, 0));
3833 if (ind)
3834 geni->setIndirect(0, 1, ind);
3835 } else {
3836 TexInstruction *texi = new_TexInstruction(func, OP_SUQ);
3837 for (int c = 0, d = 0; c < 4; ++c) {
3838 if (dst0[c]) {
3839 texi->setDef(d++, dst0[c]);
3840 texi->tex.mask |= 1 << c;
3841 }
3842 }
3843 if (tgsi.getSrc(0).getFile() == TGSI_FILE_IMAGE) {
3844 texi->tex.r = tgsi.getSrc(0).getIndex(0);
3845 if (tgsi.getSrc(0).isIndirect(0))
3846 texi->setIndirectR(fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, NULL));
3847 } else {
3848 texi->tex.bindless = true;
3849 texi->setIndirectR(fetchSrc(0, 0));
3850 }
3851 texi->tex.target = tgsi.getImageTarget();
3852
3853 bb->insertTail(texi);
3854 }
3855 break;
3856 case TGSI_OPCODE_IBFE:
3857 case TGSI_OPCODE_UBFE:
3858 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3859 src0 = fetchSrc(0, c);
3860 val0 = getScratch();
3861 if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE &&
3862 tgsi.getSrc(2).getFile() == TGSI_FILE_IMMEDIATE) {
3863 loadImm(val0, (tgsi.getSrc(2).getValueU32(c, info) << 8) |
3864 tgsi.getSrc(1).getValueU32(c, info));
3865 } else {
3866 src1 = fetchSrc(1, c);
3867 src2 = fetchSrc(2, c);
3868 mkOp3(OP_INSBF, TYPE_U32, val0, src2, mkImm(0x808), src1);
3869 }
3870 mkOp2(OP_EXTBF, dstTy, dst0[c], src0, val0);
3871 }
3872 break;
3873 case TGSI_OPCODE_BFI:
3874 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3875 src0 = fetchSrc(0, c);
3876 src1 = fetchSrc(1, c);
3877 src2 = fetchSrc(2, c);
3878 src3 = fetchSrc(3, c);
3879 val0 = getScratch();
3880 mkOp3(OP_INSBF, TYPE_U32, val0, src3, mkImm(0x808), src2);
3881 mkOp3(OP_INSBF, TYPE_U32, dst0[c], src1, val0, src0);
3882 }
3883 break;
3884 case TGSI_OPCODE_LSB:
3885 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3886 src0 = fetchSrc(0, c);
3887 val0 = getScratch();
3888 geni = mkOp2(OP_EXTBF, TYPE_U32, val0, src0, mkImm(0x2000));
3889 geni->subOp = NV50_IR_SUBOP_EXTBF_REV;
3890 geni = mkOp1(OP_BFIND, TYPE_U32, dst0[c], val0);
3891 geni->subOp = NV50_IR_SUBOP_BFIND_SAMT;
3892 }
3893 break;
3894 case TGSI_OPCODE_IMSB:
3895 case TGSI_OPCODE_UMSB:
3896 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3897 src0 = fetchSrc(0, c);
3898 mkOp1(OP_BFIND, srcTy, dst0[c], src0);
3899 }
3900 break;
3901 case TGSI_OPCODE_BREV:
3902 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3903 src0 = fetchSrc(0, c);
3904 geni = mkOp2(OP_EXTBF, TYPE_U32, dst0[c], src0, mkImm(0x2000));
3905 geni->subOp = NV50_IR_SUBOP_EXTBF_REV;
3906 }
3907 break;
3908 case TGSI_OPCODE_POPC:
3909 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3910 src0 = fetchSrc(0, c);
3911 mkOp2(OP_POPCNT, TYPE_U32, dst0[c], src0, src0);
3912 }
3913 break;
3914 case TGSI_OPCODE_INTERP_CENTROID:
3915 case TGSI_OPCODE_INTERP_SAMPLE:
3916 case TGSI_OPCODE_INTERP_OFFSET:
3917 handleINTERP(dst0);
3918 break;
3919 case TGSI_OPCODE_I642F:
3920 case TGSI_OPCODE_U642F:
3921 case TGSI_OPCODE_D2I:
3922 case TGSI_OPCODE_D2U:
3923 case TGSI_OPCODE_D2F: {
3924 int pos = 0;
3925 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3926 Value *dreg = getSSA(8);
3927 src0 = fetchSrc(0, pos);
3928 src1 = fetchSrc(0, pos + 1);
3929 mkOp2(OP_MERGE, TYPE_U64, dreg, src0, src1);
3930 Instruction *cvt = mkCvt(OP_CVT, dstTy, dst0[c], srcTy, dreg);
3931 if (!isFloatType(dstTy))
3932 cvt->rnd = ROUND_Z;
3933 pos += 2;
3934 }
3935 break;
3936 }
3937 case TGSI_OPCODE_I2I64:
3938 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3939 dst0[c] = fetchSrc(0, c / 2);
3940 mkOp2(OP_SHR, TYPE_S32, dst0[c + 1], dst0[c], loadImm(NULL, 31));
3941 c++;
3942 }
3943 break;
3944 case TGSI_OPCODE_U2I64:
3945 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3946 dst0[c] = fetchSrc(0, c / 2);
3947 dst0[c + 1] = zero;
3948 c++;
3949 }
3950 break;
3951 case TGSI_OPCODE_F2I64:
3952 case TGSI_OPCODE_F2U64:
3953 case TGSI_OPCODE_I2D:
3954 case TGSI_OPCODE_U2D:
3955 case TGSI_OPCODE_F2D:
3956 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3957 Value *dreg = getSSA(8);
3958 Instruction *cvt = mkCvt(OP_CVT, dstTy, dreg, srcTy, fetchSrc(0, c / 2));
3959 if (!isFloatType(dstTy))
3960 cvt->rnd = ROUND_Z;
3961 mkSplit(&dst0[c], 4, dreg);
3962 c++;
3963 }
3964 break;
3965 case TGSI_OPCODE_D2I64:
3966 case TGSI_OPCODE_D2U64:
3967 case TGSI_OPCODE_I642D:
3968 case TGSI_OPCODE_U642D:
3969 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3970 src0 = getSSA(8);
3971 Value *dst = getSSA(8), *tmp[2];
3972 tmp[0] = fetchSrc(0, c);
3973 tmp[1] = fetchSrc(0, c + 1);
3974 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
3975 Instruction *cvt = mkCvt(OP_CVT, dstTy, dst, srcTy, src0);
3976 if (!isFloatType(dstTy))
3977 cvt->rnd = ROUND_Z;
3978 mkSplit(&dst0[c], 4, dst);
3979 c++;
3980 }
3981 break;
3982 case TGSI_OPCODE_I64NEG:
3983 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3984 src0 = getSSA(8);
3985 Value *dst = getSSA(8), *tmp[2];
3986 tmp[0] = fetchSrc(0, c);
3987 tmp[1] = fetchSrc(0, c + 1);
3988 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
3989 mkOp2(OP_SUB, dstTy, dst, zero, src0);
3990 mkSplit(&dst0[c], 4, dst);
3991 c++;
3992 }
3993 break;
3994 case TGSI_OPCODE_I64ABS:
3995 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3996 src0 = getSSA(8);
3997 Value *neg = getSSA(8), *srcComp[2], *negComp[2];
3998 srcComp[0] = fetchSrc(0, c);
3999 srcComp[1] = fetchSrc(0, c + 1);
4000 mkOp2(OP_MERGE, TYPE_U64, src0, srcComp[0], srcComp[1]);
4001 mkOp2(OP_SUB, dstTy, neg, zero, src0);
4002 mkSplit(negComp, 4, neg);
4003 mkCmp(OP_SLCT, CC_LT, TYPE_S32, dst0[c], TYPE_S32,
4004 negComp[0], srcComp[0], srcComp[1]);
4005 mkCmp(OP_SLCT, CC_LT, TYPE_S32, dst0[c + 1], TYPE_S32,
4006 negComp[1], srcComp[1], srcComp[1]);
4007 c++;
4008 }
4009 break;
4010 case TGSI_OPCODE_DABS:
4011 case TGSI_OPCODE_DNEG:
4012 case TGSI_OPCODE_DRCP:
4013 case TGSI_OPCODE_DSQRT:
4014 case TGSI_OPCODE_DRSQ:
4015 case TGSI_OPCODE_DTRUNC:
4016 case TGSI_OPCODE_DCEIL:
4017 case TGSI_OPCODE_DFLR:
4018 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4019 src0 = getSSA(8);
4020 Value *dst = getSSA(8), *tmp[2];
4021 tmp[0] = fetchSrc(0, c);
4022 tmp[1] = fetchSrc(0, c + 1);
4023 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
4024 mkOp1(op, dstTy, dst, src0);
4025 mkSplit(&dst0[c], 4, dst);
4026 c++;
4027 }
4028 break;
4029 case TGSI_OPCODE_DFRAC:
4030 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4031 src0 = getSSA(8);
4032 Value *dst = getSSA(8), *tmp[2];
4033 tmp[0] = fetchSrc(0, c);
4034 tmp[1] = fetchSrc(0, c + 1);
4035 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
4036 mkOp1(OP_FLOOR, TYPE_F64, dst, src0);
4037 mkOp2(OP_SUB, TYPE_F64, dst, src0, dst);
4038 mkSplit(&dst0[c], 4, dst);
4039 c++;
4040 }
4041 break;
4042 case TGSI_OPCODE_U64SEQ:
4043 case TGSI_OPCODE_U64SNE:
4044 case TGSI_OPCODE_U64SLT:
4045 case TGSI_OPCODE_U64SGE:
4046 case TGSI_OPCODE_I64SLT:
4047 case TGSI_OPCODE_I64SGE:
4048 case TGSI_OPCODE_DSLT:
4049 case TGSI_OPCODE_DSGE:
4050 case TGSI_OPCODE_DSEQ:
4051 case TGSI_OPCODE_DSNE: {
4052 int pos = 0;
4053 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4054 Value *tmp[2];
4055
4056 src0 = getSSA(8);
4057 src1 = getSSA(8);
4058 tmp[0] = fetchSrc(0, pos);
4059 tmp[1] = fetchSrc(0, pos + 1);
4060 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
4061 tmp[0] = fetchSrc(1, pos);
4062 tmp[1] = fetchSrc(1, pos + 1);
4063 mkOp2(OP_MERGE, TYPE_U64, src1, tmp[0], tmp[1]);
4064 mkCmp(op, tgsi.getSetCond(), dstTy, dst0[c], srcTy, src0, src1);
4065 pos += 2;
4066 }
4067 break;
4068 }
4069 case TGSI_OPCODE_U64MIN:
4070 case TGSI_OPCODE_U64MAX:
4071 case TGSI_OPCODE_I64MIN:
4072 case TGSI_OPCODE_I64MAX: {
4073 dstTy = isSignedIntType(dstTy) ? TYPE_S32 : TYPE_U32;
4074 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4075 Value *flag = getSSA(1, FILE_FLAGS);
4076 src0 = fetchSrc(0, c + 1);
4077 src1 = fetchSrc(1, c + 1);
4078 geni = mkOp2(op, dstTy, dst0[c + 1], src0, src1);
4079 geni->subOp = NV50_IR_SUBOP_MINMAX_HIGH;
4080 geni->setFlagsDef(1, flag);
4081
4082 src0 = fetchSrc(0, c);
4083 src1 = fetchSrc(1, c);
4084 geni = mkOp2(op, TYPE_U32, dst0[c], src0, src1);
4085 geni->subOp = NV50_IR_SUBOP_MINMAX_LOW;
4086 geni->setFlagsSrc(2, flag);
4087
4088 c++;
4089 }
4090 break;
4091 }
4092 case TGSI_OPCODE_U64SHL:
4093 case TGSI_OPCODE_I64SHR:
4094 case TGSI_OPCODE_U64SHR:
4095 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4096 src0 = getSSA(8);
4097 Value *dst = getSSA(8), *tmp[2];
4098 tmp[0] = fetchSrc(0, c);
4099 tmp[1] = fetchSrc(0, c + 1);
4100 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
4101 // Theoretically src1 is a 64-bit value but in practice only the low
4102 // bits matter. The IR expects this to be a 32-bit value.
4103 src1 = fetchSrc(1, c);
4104 mkOp2(op, dstTy, dst, src0, src1);
4105 mkSplit(&dst0[c], 4, dst);
4106 c++;
4107 }
4108 break;
4109 case TGSI_OPCODE_U64ADD:
4110 case TGSI_OPCODE_U64MUL:
4111 case TGSI_OPCODE_DADD:
4112 case TGSI_OPCODE_DMUL:
4113 case TGSI_OPCODE_DDIV:
4114 case TGSI_OPCODE_DMAX:
4115 case TGSI_OPCODE_DMIN:
4116 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4117 src0 = getSSA(8);
4118 src1 = getSSA(8);
4119 Value *dst = getSSA(8), *tmp[2];
4120 tmp[0] = fetchSrc(0, c);
4121 tmp[1] = fetchSrc(0, c + 1);
4122 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
4123 tmp[0] = fetchSrc(1, c);
4124 tmp[1] = fetchSrc(1, c + 1);
4125 mkOp2(OP_MERGE, TYPE_U64, src1, tmp[0], tmp[1]);
4126 mkOp2(op, dstTy, dst, src0, src1);
4127 mkSplit(&dst0[c], 4, dst);
4128 c++;
4129 }
4130 break;
4131 case TGSI_OPCODE_DMAD:
4132 case TGSI_OPCODE_DFMA:
4133 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4134 src0 = getSSA(8);
4135 src1 = getSSA(8);
4136 src2 = getSSA(8);
4137 Value *dst = getSSA(8), *tmp[2];
4138 tmp[0] = fetchSrc(0, c);
4139 tmp[1] = fetchSrc(0, c + 1);
4140 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
4141 tmp[0] = fetchSrc(1, c);
4142 tmp[1] = fetchSrc(1, c + 1);
4143 mkOp2(OP_MERGE, TYPE_U64, src1, tmp[0], tmp[1]);
4144 tmp[0] = fetchSrc(2, c);
4145 tmp[1] = fetchSrc(2, c + 1);
4146 mkOp2(OP_MERGE, TYPE_U64, src2, tmp[0], tmp[1]);
4147 mkOp3(op, dstTy, dst, src0, src1, src2);
4148 mkSplit(&dst0[c], 4, dst);
4149 c++;
4150 }
4151 break;
4152 case TGSI_OPCODE_DROUND:
4153 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4154 src0 = getSSA(8);
4155 Value *dst = getSSA(8), *tmp[2];
4156 tmp[0] = fetchSrc(0, c);
4157 tmp[1] = fetchSrc(0, c + 1);
4158 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
4159 mkCvt(OP_CVT, TYPE_F64, dst, TYPE_F64, src0)
4160 ->rnd = ROUND_NI;
4161 mkSplit(&dst0[c], 4, dst);
4162 c++;
4163 }
4164 break;
4165 case TGSI_OPCODE_DSSG:
4166 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4167 src0 = getSSA(8);
4168 Value *dst = getSSA(8), *dstF32 = getSSA(), *tmp[2];
4169 tmp[0] = fetchSrc(0, c);
4170 tmp[1] = fetchSrc(0, c + 1);
4171 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
4172
4173 val0 = getScratch();
4174 val1 = getScratch();
4175 // The zero is wrong here since it's only 32-bit, but it works out in
4176 // the end since it gets replaced with $r63.
4177 mkCmp(OP_SET, CC_GT, TYPE_F32, val0, TYPE_F64, src0, zero);
4178 mkCmp(OP_SET, CC_LT, TYPE_F32, val1, TYPE_F64, src0, zero);
4179 mkOp2(OP_SUB, TYPE_F32, dstF32, val0, val1);
4180 mkCvt(OP_CVT, TYPE_F64, dst, TYPE_F32, dstF32);
4181 mkSplit(&dst0[c], 4, dst);
4182 c++;
4183 }
4184 break;
4185 case TGSI_OPCODE_I64SSG:
4186 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4187 src0 = getSSA(8);
4188 Value *tmp[2];
4189 tmp[0] = fetchSrc(0, c);
4190 tmp[1] = fetchSrc(0, c + 1);
4191 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
4192
4193 val0 = getScratch();
4194 val1 = getScratch();
4195 mkCmp(OP_SET, CC_GT, TYPE_U32, val0, TYPE_S64, src0, zero);
4196 mkCmp(OP_SET, CC_LT, TYPE_U32, val1, TYPE_S64, src0, zero);
4197 mkOp2(OP_SUB, TYPE_S32, dst0[c], val1, val0);
4198 mkOp2(OP_SHR, TYPE_S32, dst0[c + 1], dst0[c], loadImm(0, 31));
4199 c++;
4200 }
4201 break;
4202 default:
4203 ERROR("unhandled TGSI opcode: %u\n", tgsi.getOpcode());
4204 assert(0);
4205 break;
4206 }
4207
4208 if (tgsi.dstCount() && tgsi.getOpcode() != TGSI_OPCODE_STORE) {
4209 for (c = 0; c < 4; ++c) {
4210 if (!dst0[c])
4211 continue;
4212 if (dst0[c] != rDst0[c])
4213 mkMov(rDst0[c], dst0[c]);
4214 storeDst(0, c, rDst0[c]);
4215 }
4216 }
4217 vtxBaseValid = 0;
4218
4219 return true;
4220 }
4221
4222 void
handleUserClipPlanes()4223 Converter::handleUserClipPlanes()
4224 {
4225 Value *res[8];
4226 int n, i, c;
4227
4228 for (c = 0; c < 4; ++c) {
4229 for (i = 0; i < info->io.genUserClip; ++i) {
4230 Symbol *sym = mkSymbol(FILE_MEMORY_CONST, info->io.auxCBSlot,
4231 TYPE_F32, info->io.ucpBase + i * 16 + c * 4);
4232 Value *ucp = mkLoadv(TYPE_F32, sym, NULL);
4233 if (c == 0)
4234 res[i] = mkOp2v(OP_MUL, TYPE_F32, getScratch(), clipVtx[c], ucp);
4235 else
4236 mkOp3(OP_MAD, TYPE_F32, res[i], clipVtx[c], ucp, res[i]);
4237 }
4238 }
4239
4240 const int first = info->numOutputs - (info->io.genUserClip + 3) / 4;
4241
4242 for (i = 0; i < info->io.genUserClip; ++i) {
4243 n = i / 4 + first;
4244 c = i % 4;
4245 Symbol *sym =
4246 mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32, info->out[n].slot[c] * 4);
4247 mkStore(OP_EXPORT, TYPE_F32, sym, NULL, res[i]);
4248 }
4249 }
4250
4251 void
exportOutputs()4252 Converter::exportOutputs()
4253 {
4254 if (info->io.alphaRefBase) {
4255 for (unsigned int i = 0; i < info->numOutputs; ++i) {
4256 if (info->out[i].sn != TGSI_SEMANTIC_COLOR ||
4257 info->out[i].si != 0)
4258 continue;
4259 const unsigned int c = 3;
4260 if (!oData.exists(sub.cur->values, i, c))
4261 continue;
4262 Value *val = oData.load(sub.cur->values, i, c, NULL);
4263 if (!val)
4264 continue;
4265
4266 Symbol *ref = mkSymbol(FILE_MEMORY_CONST, info->io.auxCBSlot,
4267 TYPE_U32, info->io.alphaRefBase);
4268 Value *pred = new_LValue(func, FILE_PREDICATE);
4269 mkCmp(OP_SET, CC_TR, TYPE_U32, pred, TYPE_F32, val,
4270 mkLoadv(TYPE_U32, ref, NULL))
4271 ->subOp = 1;
4272 mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_NOT_P, pred);
4273 }
4274 }
4275
4276 for (unsigned int i = 0; i < info->numOutputs; ++i) {
4277 for (unsigned int c = 0; c < 4; ++c) {
4278 if (!oData.exists(sub.cur->values, i, c))
4279 continue;
4280 Symbol *sym = mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32,
4281 info->out[i].slot[c] * 4);
4282 Value *val = oData.load(sub.cur->values, i, c, NULL);
4283 if (val) {
4284 if (info->out[i].sn == TGSI_SEMANTIC_POSITION)
4285 mkOp1(OP_SAT, TYPE_F32, val, val);
4286 mkStore(OP_EXPORT, TYPE_F32, sym, NULL, val);
4287 }
4288 }
4289 }
4290 }
4291
Converter(Program * ir,const tgsi::Source * code)4292 Converter::Converter(Program *ir, const tgsi::Source *code) : BuildUtil(ir),
4293 code(code),
4294 tgsi(NULL),
4295 tData(this), lData(this), aData(this), oData(this)
4296 {
4297 info = code->info;
4298
4299 const unsigned tSize = code->fileSize(TGSI_FILE_TEMPORARY);
4300 const unsigned aSize = code->fileSize(TGSI_FILE_ADDRESS);
4301 const unsigned oSize = code->fileSize(TGSI_FILE_OUTPUT);
4302
4303 tData.setup(TGSI_FILE_TEMPORARY, 0, 0, tSize, 4, 4, FILE_GPR, 0);
4304 lData.setup(TGSI_FILE_TEMPORARY, 1, 0, tSize, 4, 4, FILE_MEMORY_LOCAL, 0);
4305 aData.setup(TGSI_FILE_ADDRESS, 0, 0, aSize, 4, 4, FILE_GPR, 0);
4306 oData.setup(TGSI_FILE_OUTPUT, 0, 0, oSize, 4, 4, FILE_GPR, 0);
4307
4308 zero = mkImm((uint32_t)0);
4309
4310 vtxBaseValid = 0;
4311 }
4312
~Converter()4313 Converter::~Converter()
4314 {
4315 }
4316
4317 inline const Converter::Location *
getValueLocation(Subroutine * s,Value * v)4318 Converter::BindArgumentsPass::getValueLocation(Subroutine *s, Value *v)
4319 {
4320 ValueMap::l_iterator it = s->values.l.find(v);
4321 return it == s->values.l.end() ? NULL : &it->second;
4322 }
4323
4324 template<typename T> inline void
updateCallArgs(Instruction * i,void (Instruction::* setArg)(int,Value *),T (Function::* proto))4325 Converter::BindArgumentsPass::updateCallArgs(
4326 Instruction *i, void (Instruction::*setArg)(int, Value *),
4327 T (Function::*proto))
4328 {
4329 Function *g = i->asFlow()->target.fn;
4330 Subroutine *subg = conv.getSubroutine(g);
4331
4332 for (unsigned a = 0; a < (g->*proto).size(); ++a) {
4333 Value *v = (g->*proto)[a].get();
4334 const Converter::Location &l = *getValueLocation(subg, v);
4335 Converter::DataArray *array = conv.getArrayForFile(l.array, l.arrayIdx);
4336
4337 (i->*setArg)(a, array->acquire(sub->values, l.i, l.c));
4338 }
4339 }
4340
4341 template<typename T> inline void
updatePrototype(BitSet * set,void (Function::* updateSet)(),T (Function::* proto))4342 Converter::BindArgumentsPass::updatePrototype(
4343 BitSet *set, void (Function::*updateSet)(), T (Function::*proto))
4344 {
4345 (func->*updateSet)();
4346
4347 for (unsigned i = 0; i < set->getSize(); ++i) {
4348 Value *v = func->getLValue(i);
4349 const Converter::Location *l = getValueLocation(sub, v);
4350
4351 // only include values with a matching TGSI register
4352 if (set->test(i) && l && !conv.code->locals.count(*l))
4353 (func->*proto).push_back(v);
4354 }
4355 }
4356
4357 bool
visit(Function * f)4358 Converter::BindArgumentsPass::visit(Function *f)
4359 {
4360 sub = conv.getSubroutine(f);
4361
4362 for (ArrayList::Iterator bi = f->allBBlocks.iterator();
4363 !bi.end(); bi.next()) {
4364 for (Instruction *i = BasicBlock::get(bi)->getFirst();
4365 i; i = i->next) {
4366 if (i->op == OP_CALL && !i->asFlow()->builtin) {
4367 updateCallArgs(i, &Instruction::setSrc, &Function::ins);
4368 updateCallArgs(i, &Instruction::setDef, &Function::outs);
4369 }
4370 }
4371 }
4372
4373 if (func == prog->main && prog->getType() != Program::TYPE_COMPUTE)
4374 return true;
4375 updatePrototype(&BasicBlock::get(f->cfg.getRoot())->liveSet,
4376 &Function::buildLiveSets, &Function::ins);
4377 updatePrototype(&BasicBlock::get(f->cfgExit)->defSet,
4378 &Function::buildDefSets, &Function::outs);
4379
4380 return true;
4381 }
4382
4383 bool
run()4384 Converter::run()
4385 {
4386 BasicBlock *entry = new BasicBlock(prog->main);
4387 BasicBlock *leave = new BasicBlock(prog->main);
4388
4389 prog->main->setEntry(entry);
4390 prog->main->setExit(leave);
4391
4392 setPosition(entry, true);
4393 sub.cur = getSubroutine(prog->main);
4394
4395 if (info->io.genUserClip > 0) {
4396 for (int c = 0; c < 4; ++c)
4397 clipVtx[c] = getScratch();
4398 }
4399
4400 switch (prog->getType()) {
4401 case Program::TYPE_TESSELLATION_CONTROL:
4402 outBase = mkOp2v(
4403 OP_SUB, TYPE_U32, getSSA(),
4404 mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)),
4405 mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0)));
4406 break;
4407 case Program::TYPE_FRAGMENT: {
4408 Symbol *sv = mkSysVal(SV_POSITION, 3);
4409 fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
4410 mkOp1(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
4411 break;
4412 }
4413 default:
4414 break;
4415 }
4416
4417 if (info->io.viewportId >= 0)
4418 viewport = getScratch();
4419 else
4420 viewport = NULL;
4421
4422 for (ip = 0; ip < code->scan.num_instructions; ++ip) {
4423 if (!handleInstruction(&code->insns[ip]))
4424 return false;
4425 }
4426
4427 if (!BindArgumentsPass(*this).run(prog))
4428 return false;
4429
4430 return true;
4431 }
4432
4433 } // unnamed namespace
4434
4435 namespace nv50_ir {
4436
4437 bool
makeFromTGSI(struct nv50_ir_prog_info * info)4438 Program::makeFromTGSI(struct nv50_ir_prog_info *info)
4439 {
4440 tgsi::Source src(info);
4441 if (!src.scanSource())
4442 return false;
4443 tlsSize = info->bin.tlsSpace;
4444
4445 Converter builder(this, &src);
4446 return builder.run();
4447 }
4448
4449 } // namespace nv50_ir
4450