1 /*
2  * Copyright 2011 Christoph Bumiller
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19  * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20  * SOFTWARE.
21  */
22 
23 extern "C" {
24 #include "tgsi/tgsi_dump.h"
25 #include "tgsi/tgsi_scan.h"
26 }
27 
28 #include "nv50_ir.h"
29 #include "nv50_ir_util.h"
30 #include "nv50_ir_build_util.h"
31 
32 namespace tgsi {
33 
34 class Source;
35 
36 static nv50_ir::operation translateOpcode(uint opcode);
37 static nv50_ir::DataFile translateFile(uint file);
38 static nv50_ir::TexTarget translateTexture(uint texTarg);
39 static nv50_ir::SVSemantic translateSysVal(uint sysval);
40 
41 class Instruction
42 {
43 public:
Instruction(const struct tgsi_full_instruction * inst)44    Instruction(const struct tgsi_full_instruction *inst) : insn(inst) { }
45 
46    class SrcRegister
47    {
48    public:
SrcRegister(const struct tgsi_full_src_register * src)49       SrcRegister(const struct tgsi_full_src_register *src)
50          : reg(src->Register),
51            fsr(src)
52       { }
53 
SrcRegister(const struct tgsi_src_register & src)54       SrcRegister(const struct tgsi_src_register& src) : reg(src), fsr(NULL) { }
55 
offsetToSrc(struct tgsi_texture_offset off)56       struct tgsi_src_register offsetToSrc(struct tgsi_texture_offset off)
57       {
58          struct tgsi_src_register reg;
59          memset(&reg, 0, sizeof(reg));
60          reg.Index = off.Index;
61          reg.File = off.File;
62          reg.SwizzleX = off.SwizzleX;
63          reg.SwizzleY = off.SwizzleY;
64          reg.SwizzleZ = off.SwizzleZ;
65          return reg;
66       }
67 
SrcRegister(const struct tgsi_texture_offset & off)68       SrcRegister(const struct tgsi_texture_offset& off) :
69          reg(offsetToSrc(off)),
70          fsr(NULL)
71       { }
72 
getFile() const73       uint getFile() const { return reg.File; }
74 
is2D() const75       bool is2D() const { return reg.Dimension; }
76 
isIndirect(int dim) const77       bool isIndirect(int dim) const
78       {
79          return (dim && fsr) ? fsr->Dimension.Indirect : reg.Indirect;
80       }
81 
getIndex(int dim) const82       int getIndex(int dim) const
83       {
84          return (dim && fsr) ? fsr->Dimension.Index : reg.Index;
85       }
86 
getSwizzle(int chan) const87       int getSwizzle(int chan) const
88       {
89          return tgsi_util_get_src_register_swizzle(&reg, chan);
90       }
91 
92       nv50_ir::Modifier getMod(int chan) const;
93 
getIndirect(int dim) const94       SrcRegister getIndirect(int dim) const
95       {
96          assert(fsr && isIndirect(dim));
97          if (dim)
98             return SrcRegister(fsr->DimIndirect);
99          return SrcRegister(fsr->Indirect);
100       }
101 
getValueU32(int c,const struct nv50_ir_prog_info * info) const102       uint32_t getValueU32(int c, const struct nv50_ir_prog_info *info) const
103       {
104          assert(reg.File == TGSI_FILE_IMMEDIATE);
105          assert(!reg.Absolute);
106          assert(!reg.Negate);
107          return info->immd.data[reg.Index * 4 + getSwizzle(c)];
108       }
109 
110    private:
111       const struct tgsi_src_register reg;
112       const struct tgsi_full_src_register *fsr;
113    };
114 
115    class DstRegister
116    {
117    public:
DstRegister(const struct tgsi_full_dst_register * dst)118       DstRegister(const struct tgsi_full_dst_register *dst)
119          : reg(dst->Register),
120            fdr(dst)
121       { }
122 
DstRegister(const struct tgsi_dst_register & dst)123       DstRegister(const struct tgsi_dst_register& dst) : reg(dst), fdr(NULL) { }
124 
getFile() const125       uint getFile() const { return reg.File; }
126 
is2D() const127       bool is2D() const { return reg.Dimension; }
128 
isIndirect(int dim) const129       bool isIndirect(int dim) const
130       {
131          return (dim && fdr) ? fdr->Dimension.Indirect : reg.Indirect;
132       }
133 
getIndex(int dim) const134       int getIndex(int dim) const
135       {
136          return (dim && fdr) ? fdr->Dimension.Dimension : reg.Index;
137       }
138 
getMask() const139       unsigned int getMask() const { return reg.WriteMask; }
140 
isMasked(int chan) const141       bool isMasked(int chan) const { return !(getMask() & (1 << chan)); }
142 
getIndirect(int dim) const143       SrcRegister getIndirect(int dim) const
144       {
145          assert(fdr && isIndirect(dim));
146          if (dim)
147             return SrcRegister(fdr->DimIndirect);
148          return SrcRegister(fdr->Indirect);
149       }
150 
151    private:
152       const struct tgsi_dst_register reg;
153       const struct tgsi_full_dst_register *fdr;
154    };
155 
getOpcode() const156    inline uint getOpcode() const { return insn->Instruction.Opcode; }
157 
srcCount() const158    unsigned int srcCount() const { return insn->Instruction.NumSrcRegs; }
dstCount() const159    unsigned int dstCount() const { return insn->Instruction.NumDstRegs; }
160 
161    // mask of used components of source s
162    unsigned int srcMask(unsigned int s) const;
163 
getSrc(unsigned int s) const164    SrcRegister getSrc(unsigned int s) const
165    {
166       assert(s < srcCount());
167       return SrcRegister(&insn->Src[s]);
168    }
169 
getDst(unsigned int d) const170    DstRegister getDst(unsigned int d) const
171    {
172       assert(d < dstCount());
173       return DstRegister(&insn->Dst[d]);
174    }
175 
getTexOffset(unsigned int i) const176    SrcRegister getTexOffset(unsigned int i) const
177    {
178       assert(i < TGSI_FULL_MAX_TEX_OFFSETS);
179       return SrcRegister(insn->TexOffsets[i]);
180    }
181 
getNumTexOffsets() const182    unsigned int getNumTexOffsets() const { return insn->Texture.NumOffsets; }
183 
184    bool checkDstSrcAliasing() const;
185 
getOP() const186    inline nv50_ir::operation getOP() const {
187       return translateOpcode(getOpcode()); }
188 
189    nv50_ir::DataType inferSrcType() const;
190    nv50_ir::DataType inferDstType() const;
191 
192    nv50_ir::CondCode getSetCond() const;
193 
194    nv50_ir::TexInstruction::Target getTexture(const Source *, int s) const;
195 
getLabel()196    inline uint getLabel() { return insn->Label.Label; }
197 
getSaturate() const198    unsigned getSaturate() const { return insn->Instruction.Saturate; }
199 
print() const200    void print() const
201    {
202       tgsi_dump_instruction(insn, 1);
203    }
204 
205 private:
206    const struct tgsi_full_instruction *insn;
207 };
208 
srcMask(unsigned int s) const209 unsigned int Instruction::srcMask(unsigned int s) const
210 {
211    unsigned int mask = insn->Dst[0].Register.WriteMask;
212 
213    switch (insn->Instruction.Opcode) {
214    case TGSI_OPCODE_COS:
215    case TGSI_OPCODE_SIN:
216       return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0);
217    case TGSI_OPCODE_DP2:
218       return 0x3;
219    case TGSI_OPCODE_DP3:
220       return 0x7;
221    case TGSI_OPCODE_DP4:
222    case TGSI_OPCODE_DPH:
223    case TGSI_OPCODE_KIL: /* WriteMask ignored */
224       return 0xf;
225    case TGSI_OPCODE_DST:
226       return mask & (s ? 0xa : 0x6);
227    case TGSI_OPCODE_EX2:
228    case TGSI_OPCODE_EXP:
229    case TGSI_OPCODE_LG2:
230    case TGSI_OPCODE_LOG:
231    case TGSI_OPCODE_POW:
232    case TGSI_OPCODE_RCP:
233    case TGSI_OPCODE_RSQ:
234    case TGSI_OPCODE_SCS:
235       return 0x1;
236    case TGSI_OPCODE_IF:
237       return 0x1;
238    case TGSI_OPCODE_LIT:
239       return 0xb;
240    case TGSI_OPCODE_TEX:
241    case TGSI_OPCODE_TXB:
242    case TGSI_OPCODE_TXD:
243    case TGSI_OPCODE_TXL:
244    case TGSI_OPCODE_TXP:
245    {
246       const struct tgsi_instruction_texture *tex = &insn->Texture;
247 
248       assert(insn->Instruction.Texture);
249 
250       mask = 0x7;
251       if (insn->Instruction.Opcode != TGSI_OPCODE_TEX &&
252           insn->Instruction.Opcode != TGSI_OPCODE_TXD)
253          mask |= 0x8; /* bias, lod or proj */
254 
255       switch (tex->Texture) {
256       case TGSI_TEXTURE_1D:
257          mask &= 0x9;
258          break;
259       case TGSI_TEXTURE_SHADOW1D:
260          mask &= 0xd;
261          break;
262       case TGSI_TEXTURE_1D_ARRAY:
263       case TGSI_TEXTURE_2D:
264       case TGSI_TEXTURE_RECT:
265          mask &= 0xb;
266          break;
267       default:
268          break;
269       }
270    }
271       return mask;
272    case TGSI_OPCODE_XPD:
273    {
274       unsigned int x = 0;
275       if (mask & 1) x |= 0x6;
276       if (mask & 2) x |= 0x5;
277       if (mask & 4) x |= 0x3;
278       return x;
279    }
280    default:
281       break;
282    }
283 
284    return mask;
285 }
286 
getMod(int chan) const287 nv50_ir::Modifier Instruction::SrcRegister::getMod(int chan) const
288 {
289    nv50_ir::Modifier m(0);
290 
291    if (reg.Absolute)
292       m = m | nv50_ir::Modifier(NV50_IR_MOD_ABS);
293    if (reg.Negate)
294       m = m | nv50_ir::Modifier(NV50_IR_MOD_NEG);
295    return m;
296 }
297 
translateFile(uint file)298 static nv50_ir::DataFile translateFile(uint file)
299 {
300    switch (file) {
301    case TGSI_FILE_CONSTANT:        return nv50_ir::FILE_MEMORY_CONST;
302    case TGSI_FILE_INPUT:           return nv50_ir::FILE_SHADER_INPUT;
303    case TGSI_FILE_OUTPUT:          return nv50_ir::FILE_SHADER_OUTPUT;
304    case TGSI_FILE_TEMPORARY:       return nv50_ir::FILE_GPR;
305    case TGSI_FILE_ADDRESS:         return nv50_ir::FILE_ADDRESS;
306    case TGSI_FILE_PREDICATE:       return nv50_ir::FILE_PREDICATE;
307    case TGSI_FILE_IMMEDIATE:       return nv50_ir::FILE_IMMEDIATE;
308    case TGSI_FILE_SYSTEM_VALUE:    return nv50_ir::FILE_SYSTEM_VALUE;
309    case TGSI_FILE_IMMEDIATE_ARRAY: return nv50_ir::FILE_IMMEDIATE;
310    case TGSI_FILE_TEMPORARY_ARRAY: return nv50_ir::FILE_MEMORY_LOCAL;
311    case TGSI_FILE_RESOURCE:        return nv50_ir::FILE_MEMORY_GLOBAL;
312    case TGSI_FILE_SAMPLER:
313    case TGSI_FILE_NULL:
314    default:
315       return nv50_ir::FILE_NULL;
316    }
317 }
318 
translateSysVal(uint sysval)319 static nv50_ir::SVSemantic translateSysVal(uint sysval)
320 {
321    switch (sysval) {
322    case TGSI_SEMANTIC_FACE:       return nv50_ir::SV_FACE;
323    case TGSI_SEMANTIC_PSIZE:      return nv50_ir::SV_POINT_SIZE;
324    case TGSI_SEMANTIC_PRIMID:     return nv50_ir::SV_PRIMITIVE_ID;
325    case TGSI_SEMANTIC_INSTANCEID: return nv50_ir::SV_INSTANCE_ID;
326    case TGSI_SEMANTIC_VERTEXID:   return nv50_ir::SV_VERTEX_ID;
327    default:
328       assert(0);
329       return nv50_ir::SV_CLOCK;
330    }
331 }
332 
333 #define NV50_IR_TEX_TARG_CASE(a, b) \
334    case TGSI_TEXTURE_##a: return nv50_ir::TEX_TARGET_##b;
335 
translateTexture(uint tex)336 static nv50_ir::TexTarget translateTexture(uint tex)
337 {
338    switch (tex) {
339    NV50_IR_TEX_TARG_CASE(1D, 1D);
340    NV50_IR_TEX_TARG_CASE(2D, 2D);
341    NV50_IR_TEX_TARG_CASE(3D, 3D);
342    NV50_IR_TEX_TARG_CASE(CUBE, CUBE);
343    NV50_IR_TEX_TARG_CASE(RECT, RECT);
344    NV50_IR_TEX_TARG_CASE(1D_ARRAY, 1D_ARRAY);
345    NV50_IR_TEX_TARG_CASE(2D_ARRAY, 2D_ARRAY);
346    NV50_IR_TEX_TARG_CASE(SHADOW1D, 1D_SHADOW);
347    NV50_IR_TEX_TARG_CASE(SHADOW2D, 2D_SHADOW);
348    NV50_IR_TEX_TARG_CASE(SHADOW1D_ARRAY, 1D_ARRAY_SHADOW);
349    NV50_IR_TEX_TARG_CASE(SHADOW2D_ARRAY, 2D_ARRAY_SHADOW);
350    NV50_IR_TEX_TARG_CASE(SHADOWCUBE, CUBE_SHADOW);
351    NV50_IR_TEX_TARG_CASE(SHADOWRECT, RECT_SHADOW);
352    NV50_IR_TEX_TARG_CASE(BUFFER, BUFFER);
353 
354    case TGSI_TEXTURE_UNKNOWN:
355    default:
356       assert(!"invalid texture target");
357       return nv50_ir::TEX_TARGET_2D;
358    }
359 }
360 
inferSrcType() const361 nv50_ir::DataType Instruction::inferSrcType() const
362 {
363    switch (getOpcode()) {
364    case TGSI_OPCODE_AND:
365    case TGSI_OPCODE_OR:
366    case TGSI_OPCODE_XOR:
367    case TGSI_OPCODE_NOT:
368    case TGSI_OPCODE_U2F:
369    case TGSI_OPCODE_UADD:
370    case TGSI_OPCODE_UDIV:
371    case TGSI_OPCODE_UMOD:
372    case TGSI_OPCODE_UMAD:
373    case TGSI_OPCODE_UMUL:
374    case TGSI_OPCODE_UMAX:
375    case TGSI_OPCODE_UMIN:
376    case TGSI_OPCODE_USEQ:
377    case TGSI_OPCODE_USGE:
378    case TGSI_OPCODE_USLT:
379    case TGSI_OPCODE_USNE:
380    case TGSI_OPCODE_USHR:
381    case TGSI_OPCODE_UCMP:
382       return nv50_ir::TYPE_U32;
383    case TGSI_OPCODE_I2F:
384    case TGSI_OPCODE_IDIV:
385    case TGSI_OPCODE_IMAX:
386    case TGSI_OPCODE_IMIN:
387    case TGSI_OPCODE_IABS:
388    case TGSI_OPCODE_INEG:
389    case TGSI_OPCODE_ISGE:
390    case TGSI_OPCODE_ISHR:
391    case TGSI_OPCODE_ISLT:
392    case TGSI_OPCODE_ISSG:
393    case TGSI_OPCODE_SAD: // not sure about SAD, but no one has a float version
394    case TGSI_OPCODE_MOD:
395    case TGSI_OPCODE_UARL:
396       return nv50_ir::TYPE_S32;
397    default:
398       return nv50_ir::TYPE_F32;
399    }
400 }
401 
inferDstType() const402 nv50_ir::DataType Instruction::inferDstType() const
403 {
404    switch (getOpcode()) {
405    case TGSI_OPCODE_F2U: return nv50_ir::TYPE_U32;
406    case TGSI_OPCODE_F2I: return nv50_ir::TYPE_S32;
407    case TGSI_OPCODE_I2F:
408    case TGSI_OPCODE_U2F:
409       return nv50_ir::TYPE_F32;
410    default:
411       return inferSrcType();
412    }
413 }
414 
getSetCond() const415 nv50_ir::CondCode Instruction::getSetCond() const
416 {
417    using namespace nv50_ir;
418 
419    switch (getOpcode()) {
420    case TGSI_OPCODE_SLT:
421    case TGSI_OPCODE_ISLT:
422    case TGSI_OPCODE_USLT:
423       return CC_LT;
424    case TGSI_OPCODE_SLE:
425       return CC_LE;
426    case TGSI_OPCODE_SGE:
427    case TGSI_OPCODE_ISGE:
428    case TGSI_OPCODE_USGE:
429       return CC_GE;
430    case TGSI_OPCODE_SGT:
431       return CC_GT;
432    case TGSI_OPCODE_SEQ:
433    case TGSI_OPCODE_USEQ:
434       return CC_EQ;
435    case TGSI_OPCODE_SNE:
436       return CC_NEU;
437    case TGSI_OPCODE_USNE:
438       return CC_NE;
439    case TGSI_OPCODE_SFL:
440       return CC_NEVER;
441    case TGSI_OPCODE_STR:
442    default:
443       return CC_ALWAYS;
444    }
445 }
446 
447 #define NV50_IR_OPCODE_CASE(a, b) case TGSI_OPCODE_##a: return nv50_ir::OP_##b
448 
translateOpcode(uint opcode)449 static nv50_ir::operation translateOpcode(uint opcode)
450 {
451    switch (opcode) {
452    NV50_IR_OPCODE_CASE(ARL, SHL);
453    NV50_IR_OPCODE_CASE(MOV, MOV);
454 
455    NV50_IR_OPCODE_CASE(RCP, RCP);
456    NV50_IR_OPCODE_CASE(RSQ, RSQ);
457 
458    NV50_IR_OPCODE_CASE(MUL, MUL);
459    NV50_IR_OPCODE_CASE(ADD, ADD);
460 
461    NV50_IR_OPCODE_CASE(MIN, MIN);
462    NV50_IR_OPCODE_CASE(MAX, MAX);
463    NV50_IR_OPCODE_CASE(SLT, SET);
464    NV50_IR_OPCODE_CASE(SGE, SET);
465    NV50_IR_OPCODE_CASE(MAD, MAD);
466    NV50_IR_OPCODE_CASE(SUB, SUB);
467 
468    NV50_IR_OPCODE_CASE(FLR, FLOOR);
469    NV50_IR_OPCODE_CASE(ROUND, CVT);
470    NV50_IR_OPCODE_CASE(EX2, EX2);
471    NV50_IR_OPCODE_CASE(LG2, LG2);
472    NV50_IR_OPCODE_CASE(POW, POW);
473 
474    NV50_IR_OPCODE_CASE(ABS, ABS);
475 
476    NV50_IR_OPCODE_CASE(COS, COS);
477    NV50_IR_OPCODE_CASE(DDX, DFDX);
478    NV50_IR_OPCODE_CASE(DDY, DFDY);
479    NV50_IR_OPCODE_CASE(KILP, DISCARD);
480 
481    NV50_IR_OPCODE_CASE(SEQ, SET);
482    NV50_IR_OPCODE_CASE(SFL, SET);
483    NV50_IR_OPCODE_CASE(SGT, SET);
484    NV50_IR_OPCODE_CASE(SIN, SIN);
485    NV50_IR_OPCODE_CASE(SLE, SET);
486    NV50_IR_OPCODE_CASE(SNE, SET);
487    NV50_IR_OPCODE_CASE(STR, SET);
488    NV50_IR_OPCODE_CASE(TEX, TEX);
489    NV50_IR_OPCODE_CASE(TXD, TXD);
490    NV50_IR_OPCODE_CASE(TXP, TEX);
491 
492    NV50_IR_OPCODE_CASE(BRA, BRA);
493    NV50_IR_OPCODE_CASE(CAL, CALL);
494    NV50_IR_OPCODE_CASE(RET, RET);
495    NV50_IR_OPCODE_CASE(CMP, SLCT);
496 
497    NV50_IR_OPCODE_CASE(TXB, TXB);
498 
499    NV50_IR_OPCODE_CASE(DIV, DIV);
500 
501    NV50_IR_OPCODE_CASE(TXL, TXL);
502 
503    NV50_IR_OPCODE_CASE(CEIL, CEIL);
504    NV50_IR_OPCODE_CASE(I2F, CVT);
505    NV50_IR_OPCODE_CASE(NOT, NOT);
506    NV50_IR_OPCODE_CASE(TRUNC, TRUNC);
507    NV50_IR_OPCODE_CASE(SHL, SHL);
508 
509    NV50_IR_OPCODE_CASE(AND, AND);
510    NV50_IR_OPCODE_CASE(OR, OR);
511    NV50_IR_OPCODE_CASE(MOD, MOD);
512    NV50_IR_OPCODE_CASE(XOR, XOR);
513    NV50_IR_OPCODE_CASE(SAD, SAD);
514    NV50_IR_OPCODE_CASE(TXF, TXF);
515    NV50_IR_OPCODE_CASE(TXQ, TXQ);
516 
517    NV50_IR_OPCODE_CASE(EMIT, EMIT);
518    NV50_IR_OPCODE_CASE(ENDPRIM, RESTART);
519 
520    NV50_IR_OPCODE_CASE(KIL, DISCARD);
521 
522    NV50_IR_OPCODE_CASE(F2I, CVT);
523    NV50_IR_OPCODE_CASE(IDIV, DIV);
524    NV50_IR_OPCODE_CASE(IMAX, MAX);
525    NV50_IR_OPCODE_CASE(IMIN, MIN);
526    NV50_IR_OPCODE_CASE(IABS, ABS);
527    NV50_IR_OPCODE_CASE(INEG, NEG);
528    NV50_IR_OPCODE_CASE(ISGE, SET);
529    NV50_IR_OPCODE_CASE(ISHR, SHR);
530    NV50_IR_OPCODE_CASE(ISLT, SET);
531    NV50_IR_OPCODE_CASE(F2U, CVT);
532    NV50_IR_OPCODE_CASE(U2F, CVT);
533    NV50_IR_OPCODE_CASE(UADD, ADD);
534    NV50_IR_OPCODE_CASE(UDIV, DIV);
535    NV50_IR_OPCODE_CASE(UMAD, MAD);
536    NV50_IR_OPCODE_CASE(UMAX, MAX);
537    NV50_IR_OPCODE_CASE(UMIN, MIN);
538    NV50_IR_OPCODE_CASE(UMOD, MOD);
539    NV50_IR_OPCODE_CASE(UMUL, MUL);
540    NV50_IR_OPCODE_CASE(USEQ, SET);
541    NV50_IR_OPCODE_CASE(USGE, SET);
542    NV50_IR_OPCODE_CASE(USHR, SHR);
543    NV50_IR_OPCODE_CASE(USLT, SET);
544    NV50_IR_OPCODE_CASE(USNE, SET);
545 
546    NV50_IR_OPCODE_CASE(LOAD, TXF);
547    NV50_IR_OPCODE_CASE(SAMPLE, TEX);
548    NV50_IR_OPCODE_CASE(SAMPLE_B, TXB);
549    NV50_IR_OPCODE_CASE(SAMPLE_C, TEX);
550    NV50_IR_OPCODE_CASE(SAMPLE_C_LZ, TEX);
551    NV50_IR_OPCODE_CASE(SAMPLE_D, TXD);
552    NV50_IR_OPCODE_CASE(SAMPLE_L, TXL);
553    NV50_IR_OPCODE_CASE(GATHER4, TXG);
554    NV50_IR_OPCODE_CASE(SVIEWINFO, TXQ);
555 
556    NV50_IR_OPCODE_CASE(END, EXIT);
557 
558    default:
559       return nv50_ir::OP_NOP;
560    }
561 }
562 
checkDstSrcAliasing() const563 bool Instruction::checkDstSrcAliasing() const
564 {
565    if (insn->Dst[0].Register.Indirect) // no danger if indirect, using memory
566       return false;
567 
568    for (int s = 0; s < TGSI_FULL_MAX_SRC_REGISTERS; ++s) {
569       if (insn->Src[s].Register.File == TGSI_FILE_NULL)
570          break;
571       if (insn->Src[s].Register.File == insn->Dst[0].Register.File &&
572           insn->Src[s].Register.Index == insn->Dst[0].Register.Index)
573          return true;
574    }
575    return false;
576 }
577 
578 class Source
579 {
580 public:
581    Source(struct nv50_ir_prog_info *);
582    ~Source();
583 
584 public:
585    bool scanSource();
fileSize(unsigned file) const586    unsigned fileSize(unsigned file) const { return scan.file_max[file] + 1; }
587 
588 public:
589    struct tgsi_shader_info scan;
590    struct tgsi_full_instruction *insns;
591    const struct tgsi_token *tokens;
592    struct nv50_ir_prog_info *info;
593 
594    nv50_ir::DynArray tempArrays;
595    nv50_ir::DynArray immdArrays;
596    int tempArrayCount;
597    int immdArrayCount;
598 
599    bool mainTempsInLMem;
600 
601    int clipVertexOutput;
602 
603    uint8_t *samplerViewTargets; // TGSI_TEXTURE_*
604    unsigned samplerViewCount;
605 
606 private:
607    int inferSysValDirection(unsigned sn) const;
608    bool scanDeclaration(const struct tgsi_full_declaration *);
609    bool scanInstruction(const struct tgsi_full_instruction *);
610    void scanProperty(const struct tgsi_full_property *);
611    void scanImmediate(const struct tgsi_full_immediate *);
612 
613    inline bool isEdgeFlagPassthrough(const Instruction&) const;
614 };
615 
Source(struct nv50_ir_prog_info * prog)616 Source::Source(struct nv50_ir_prog_info *prog) : info(prog)
617 {
618    tokens = (const struct tgsi_token *)info->bin.source;
619 
620    if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
621       tgsi_dump(tokens, 0);
622 
623    samplerViewTargets = NULL;
624 
625    mainTempsInLMem = FALSE;
626 }
627 
~Source()628 Source::~Source()
629 {
630    if (insns)
631       FREE(insns);
632 
633    if (info->immd.data)
634       FREE(info->immd.data);
635    if (info->immd.type)
636       FREE(info->immd.type);
637 
638    if (samplerViewTargets)
639       delete[] samplerViewTargets;
640 }
641 
scanSource()642 bool Source::scanSource()
643 {
644    unsigned insnCount = 0;
645    struct tgsi_parse_context parse;
646 
647    tgsi_scan_shader(tokens, &scan);
648 
649    insns = (struct tgsi_full_instruction *)MALLOC(scan.num_instructions *
650                                                   sizeof(insns[0]));
651    if (!insns)
652       return false;
653 
654    clipVertexOutput = -1;
655 
656    samplerViewCount = scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
657    samplerViewTargets = new uint8_t[samplerViewCount];
658 
659    info->immd.bufSize = 0;
660    tempArrayCount = 0;
661    immdArrayCount = 0;
662 
663    info->numInputs = scan.file_max[TGSI_FILE_INPUT] + 1;
664    info->numOutputs = scan.file_max[TGSI_FILE_OUTPUT] + 1;
665    info->numSysVals = scan.file_max[TGSI_FILE_SYSTEM_VALUE] + 1;
666 
667    if (info->type == PIPE_SHADER_FRAGMENT) {
668       info->prop.fp.writesDepth = scan.writes_z;
669       info->prop.fp.usesDiscard = scan.uses_kill;
670    } else
671    if (info->type == PIPE_SHADER_GEOMETRY) {
672       info->prop.gp.instanceCount = 1; // default value
673    }
674 
675    info->immd.data = (uint32_t *)MALLOC(scan.immediate_count * 16);
676    info->immd.type = (ubyte *)MALLOC(scan.immediate_count * sizeof(ubyte));
677 
678    tgsi_parse_init(&parse, tokens);
679    while (!tgsi_parse_end_of_tokens(&parse)) {
680       tgsi_parse_token(&parse);
681 
682       switch (parse.FullToken.Token.Type) {
683       case TGSI_TOKEN_TYPE_IMMEDIATE:
684          scanImmediate(&parse.FullToken.FullImmediate);
685          break;
686       case TGSI_TOKEN_TYPE_DECLARATION:
687          scanDeclaration(&parse.FullToken.FullDeclaration);
688          break;
689       case TGSI_TOKEN_TYPE_INSTRUCTION:
690          insns[insnCount++] = parse.FullToken.FullInstruction;
691          scanInstruction(&parse.FullToken.FullInstruction);
692          break;
693       case TGSI_TOKEN_TYPE_PROPERTY:
694          scanProperty(&parse.FullToken.FullProperty);
695          break;
696       default:
697          INFO("unknown TGSI token type: %d\n", parse.FullToken.Token.Type);
698          break;
699       }
700    }
701    tgsi_parse_free(&parse);
702 
703    if (mainTempsInLMem)
704       info->bin.tlsSpace += (scan.file_max[TGSI_FILE_TEMPORARY] + 1) * 16;
705 
706    if (info->io.genUserClip > 0) {
707       info->io.clipDistanceMask = (1 << info->io.genUserClip) - 1;
708 
709       for (unsigned int n = 0; n < ((info->io.genUserClip + 3) / 4); ++n) {
710          unsigned int i = info->numOutputs++;
711          info->out[i].id = i;
712          info->out[i].sn = TGSI_SEMANTIC_CLIPDIST;
713          info->out[i].si = n;
714          info->out[i].mask = info->io.clipDistanceMask >> (n * 4);
715       }
716    }
717 
718    return info->assignSlots(info) == 0;
719 }
720 
scanProperty(const struct tgsi_full_property * prop)721 void Source::scanProperty(const struct tgsi_full_property *prop)
722 {
723    switch (prop->Property.PropertyName) {
724    case TGSI_PROPERTY_GS_OUTPUT_PRIM:
725       info->prop.gp.outputPrim = prop->u[0].Data;
726       break;
727    case TGSI_PROPERTY_GS_INPUT_PRIM:
728       info->prop.gp.inputPrim = prop->u[0].Data;
729       break;
730    case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
731       info->prop.gp.maxVertices = prop->u[0].Data;
732       break;
733 #if 0
734    case TGSI_PROPERTY_GS_INSTANCE_COUNT:
735       info->prop.gp.instanceCount = prop->u[0].Data;
736       break;
737 #endif
738    case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
739       info->prop.fp.separateFragData = TRUE;
740       break;
741    case TGSI_PROPERTY_FS_COORD_ORIGIN:
742    case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER:
743       // we don't care
744       break;
745    case TGSI_PROPERTY_VS_PROHIBIT_UCPS:
746       info->io.genUserClip = -1;
747       break;
748    default:
749       INFO("unhandled TGSI property %d\n", prop->Property.PropertyName);
750       break;
751    }
752 }
753 
scanImmediate(const struct tgsi_full_immediate * imm)754 void Source::scanImmediate(const struct tgsi_full_immediate *imm)
755 {
756    const unsigned n = info->immd.count++;
757 
758    assert(n < scan.immediate_count);
759 
760    for (int c = 0; c < 4; ++c)
761       info->immd.data[n * 4 + c] = imm->u[c].Uint;
762 
763    info->immd.type[n] = imm->Immediate.DataType;
764 }
765 
inferSysValDirection(unsigned sn) const766 int Source::inferSysValDirection(unsigned sn) const
767 {
768    switch (sn) {
769    case TGSI_SEMANTIC_INSTANCEID:
770    case TGSI_SEMANTIC_VERTEXID:
771       return 1;
772 #if 0
773    case TGSI_SEMANTIC_LAYER:
774    case TGSI_SEMANTIC_VIEWPORTINDEX:
775       return 0;
776 #endif
777    case TGSI_SEMANTIC_PRIMID:
778       return (info->type == PIPE_SHADER_FRAGMENT) ? 1 : 0;
779    default:
780       return 0;
781    }
782 }
783 
scanDeclaration(const struct tgsi_full_declaration * decl)784 bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
785 {
786    unsigned i;
787    unsigned sn = TGSI_SEMANTIC_GENERIC;
788    unsigned si = 0;
789    const unsigned first = decl->Range.First, last = decl->Range.Last;
790 
791    if (decl->Declaration.Semantic) {
792       sn = decl->Semantic.Name;
793       si = decl->Semantic.Index;
794    }
795 
796    switch (decl->Declaration.File) {
797    case TGSI_FILE_INPUT:
798       if (info->type == PIPE_SHADER_VERTEX) {
799          // all vertex attributes are equal
800          for (i = first; i <= last; ++i) {
801             info->in[i].sn = TGSI_SEMANTIC_GENERIC;
802             info->in[i].si = i;
803          }
804       } else {
805          for (i = first; i <= last; ++i, ++si) {
806             info->in[i].id = i;
807             info->in[i].sn = sn;
808             info->in[i].si = si;
809             if (info->type == PIPE_SHADER_FRAGMENT) {
810                // translate interpolation mode
811                switch (decl->Interp.Interpolate) {
812                case TGSI_INTERPOLATE_CONSTANT:
813                   info->in[i].flat = 1;
814                   break;
815                case TGSI_INTERPOLATE_COLOR:
816                   info->in[i].sc = 1;
817                   break;
818                case TGSI_INTERPOLATE_LINEAR:
819                   info->in[i].linear = 1;
820                   break;
821                default:
822                   break;
823                }
824                if (decl->Interp.Centroid)
825                   info->in[i].centroid = 1;
826             }
827          }
828       }
829       break;
830    case TGSI_FILE_OUTPUT:
831       for (i = first; i <= last; ++i, ++si) {
832          switch (sn) {
833          case TGSI_SEMANTIC_POSITION:
834             if (info->type == PIPE_SHADER_FRAGMENT)
835                info->io.fragDepth = i;
836             else
837             if (clipVertexOutput < 0)
838                clipVertexOutput = i;
839             break;
840          case TGSI_SEMANTIC_COLOR:
841             if (info->type == PIPE_SHADER_FRAGMENT)
842                info->prop.fp.numColourResults++;
843             break;
844          case TGSI_SEMANTIC_EDGEFLAG:
845             info->io.edgeFlagOut = i;
846             break;
847          case TGSI_SEMANTIC_CLIPVERTEX:
848             clipVertexOutput = i;
849             break;
850          case TGSI_SEMANTIC_CLIPDIST:
851             info->io.clipDistanceMask |=
852                decl->Declaration.UsageMask << (si * 4);
853             info->io.genUserClip = -1;
854             break;
855          default:
856             break;
857          }
858          info->out[i].id = i;
859          info->out[i].sn = sn;
860          info->out[i].si = si;
861       }
862       break;
863    case TGSI_FILE_SYSTEM_VALUE:
864       switch (sn) {
865       case TGSI_SEMANTIC_INSTANCEID:
866          info->io.instanceId = first;
867          break;
868       case TGSI_SEMANTIC_VERTEXID:
869          info->io.vertexId = first;
870          break;
871       default:
872          break;
873       }
874       for (i = first; i <= last; ++i, ++si) {
875          info->sv[i].sn = sn;
876          info->sv[i].si = si;
877          info->sv[i].input = inferSysValDirection(sn);
878       }
879       break;
880    case TGSI_FILE_SAMPLER_VIEW:
881       for (i = first; i <= last; ++i)
882          samplerViewTargets[i] = decl->SamplerView.Resource;
883       break;
884    case TGSI_FILE_IMMEDIATE_ARRAY:
885    {
886       if (decl->Dim.Index2D >= immdArrayCount)
887          immdArrayCount = decl->Dim.Index2D + 1;
888       immdArrays[decl->Dim.Index2D].u32 = (last + 1) << 2;
889       int c;
890       uint32_t base, count;
891       switch (decl->Declaration.UsageMask) {
892       case 0x1: c = 1; break;
893       case 0x3: c = 2; break;
894       default:
895          c = 4;
896          break;
897       }
898       immdArrays[decl->Dim.Index2D].u32 |= c;
899       count = (last + 1) * c;
900       base = info->immd.bufSize / 4;
901       info->immd.bufSize = (info->immd.bufSize + count * 4 + 0xf) & ~0xf;
902       info->immd.buf = (uint32_t *)REALLOC(info->immd.buf, base * 4,
903                                            info->immd.bufSize);
904       // NOTE: this assumes array declarations are ordered by Dim.Index2D
905       for (i = 0; i < count; ++i)
906          info->immd.buf[base + i] = decl->ImmediateData.u[i].Uint;
907    }
908       break;
909    case TGSI_FILE_TEMPORARY_ARRAY:
910    {
911       if (decl->Dim.Index2D >= tempArrayCount)
912          tempArrayCount = decl->Dim.Index2D + 1;
913       tempArrays[decl->Dim.Index2D].u32 = (last + 1) << 2;
914       int c;
915       uint32_t count;
916       switch (decl->Declaration.UsageMask) {
917       case 0x1: c = 1; break;
918       case 0x3: c = 2; break;
919       default:
920          c = 4;
921          break;
922       }
923       tempArrays[decl->Dim.Index2D].u32 |= c;
924       count = (last + 1) * c;
925       info->bin.tlsSpace += (info->bin.tlsSpace + count * 4 + 0xf) & ~0xf;
926    }
927       break;
928    case TGSI_FILE_NULL:
929    case TGSI_FILE_TEMPORARY:
930    case TGSI_FILE_ADDRESS:
931    case TGSI_FILE_CONSTANT:
932    case TGSI_FILE_IMMEDIATE:
933    case TGSI_FILE_PREDICATE:
934    case TGSI_FILE_SAMPLER:
935       break;
936    default:
937       ERROR("unhandled TGSI_FILE %d\n", decl->Declaration.File);
938       return false;
939    }
940    return true;
941 }
942 
isEdgeFlagPassthrough(const Instruction & insn) const943 inline bool Source::isEdgeFlagPassthrough(const Instruction& insn) const
944 {
945    return insn.getOpcode() == TGSI_OPCODE_MOV &&
946       insn.getDst(0).getIndex(0) == info->io.edgeFlagOut &&
947       insn.getSrc(0).getFile() == TGSI_FILE_INPUT;
948 }
949 
scanInstruction(const struct tgsi_full_instruction * inst)950 bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
951 {
952    Instruction insn(inst);
953 
954    if (insn.dstCount()) {
955       if (insn.getDst(0).getFile() == TGSI_FILE_OUTPUT) {
956          Instruction::DstRegister dst = insn.getDst(0);
957 
958          if (dst.isIndirect(0))
959             for (unsigned i = 0; i < info->numOutputs; ++i)
960                info->out[i].mask = 0xf;
961          else
962             info->out[dst.getIndex(0)].mask |= dst.getMask();
963 
964          if (info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PSIZE)
965             info->out[dst.getIndex(0)].mask &= 1;
966 
967          if (isEdgeFlagPassthrough(insn))
968             info->io.edgeFlagIn = insn.getSrc(0).getIndex(0);
969       } else
970       if (insn.getDst(0).getFile() == TGSI_FILE_TEMPORARY) {
971          if (insn.getDst(0).isIndirect(0))
972             mainTempsInLMem = TRUE;
973       }
974    }
975 
976    for (unsigned s = 0; s < insn.srcCount(); ++s) {
977       Instruction::SrcRegister src = insn.getSrc(s);
978       if (src.getFile() == TGSI_FILE_TEMPORARY)
979          if (src.isIndirect(0))
980             mainTempsInLMem = TRUE;
981       if (src.getFile() != TGSI_FILE_INPUT)
982          continue;
983       unsigned mask = insn.srcMask(s);
984 
985       if (src.isIndirect(0)) {
986          for (unsigned i = 0; i < info->numInputs; ++i)
987             info->in[i].mask = 0xf;
988       } else {
989          for (unsigned c = 0; c < 4; ++c) {
990             if (!(mask & (1 << c)))
991                continue;
992             int k = src.getSwizzle(c);
993             int i = src.getIndex(0);
994             if (info->in[i].sn != TGSI_SEMANTIC_FOG || k == TGSI_SWIZZLE_X)
995                if (k <= TGSI_SWIZZLE_W)
996                   info->in[i].mask |= 1 << k;
997          }
998       }
999    }
1000    return true;
1001 }
1002 
1003 nv50_ir::TexInstruction::Target
getTexture(const tgsi::Source * code,int s) const1004 Instruction::getTexture(const tgsi::Source *code, int s) const
1005 {
1006    switch (getSrc(s).getFile()) {
1007    case TGSI_FILE_SAMPLER_VIEW: {
1008       // XXX: indirect access
1009       unsigned int r = getSrc(s).getIndex(0);
1010       assert(r < code->samplerViewCount);
1011       return translateTexture(code->samplerViewTargets[r]);
1012    }
1013    default:
1014       return translateTexture(insn->Texture.Texture);
1015    }
1016 }
1017 
1018 } // namespace tgsi
1019 
1020 namespace {
1021 
1022 using namespace nv50_ir;
1023 
1024 class Converter : public BuildUtil
1025 {
1026 public:
1027    Converter(Program *, const tgsi::Source *);
1028    ~Converter();
1029 
1030    bool run();
1031 
1032 private:
1033    struct Subroutine
1034    {
Subroutine__anon7013adf50111::Converter::Subroutine1035       Subroutine(Function *f) : f(f) { }
1036       Function *f;
1037       ValueMap values;
1038    };
1039 
1040    Value *getVertexBase(int s);
1041    DataArray *getArrayForFile(unsigned file, int idx);
1042    Value *fetchSrc(int s, int c);
1043    Value *acquireDst(int d, int c);
1044    void storeDst(int d, int c, Value *);
1045 
1046    Value *fetchSrc(const tgsi::Instruction::SrcRegister src, int c, Value *ptr);
1047    void storeDst(const tgsi::Instruction::DstRegister dst, int c,
1048                  Value *val, Value *ptr);
1049 
1050    Value *applySrcMod(Value *, int s, int c);
1051 
1052    Symbol *makeSym(uint file, int fileIndex, int idx, int c, uint32_t addr);
1053    Symbol *srcToSym(tgsi::Instruction::SrcRegister, int c);
1054    Symbol *dstToSym(tgsi::Instruction::DstRegister, int c);
1055 
1056    bool handleInstruction(const struct tgsi_full_instruction *);
1057    void exportOutputs();
1058    inline Subroutine *getSubroutine(unsigned ip);
1059    inline Subroutine *getSubroutine(Function *);
1060    inline bool isEndOfSubroutine(uint ip);
1061 
1062    void loadProjTexCoords(Value *dst[4], Value *src[4], unsigned int mask);
1063 
1064    // R,S,L,C,Dx,Dy encode TGSI sources for respective values (0xSf for auto)
1065    void setTexRS(TexInstruction *, unsigned int& s, int R, int S);
1066    void handleTEX(Value *dst0[4], int R, int S, int L, int C, int Dx, int Dy);
1067    void handleTXF(Value *dst0[4], int R);
1068    void handleTXQ(Value *dst0[4], enum TexQuery);
1069    void handleLIT(Value *dst0[4]);
1070    void handleUserClipPlanes();
1071 
1072    Value *interpolate(tgsi::Instruction::SrcRegister, int c, Value *ptr);
1073 
1074    void insertConvergenceOps(BasicBlock *conv, BasicBlock *fork);
1075 
1076    Value *buildDot(int dim);
1077 
1078    class BindArgumentsPass : public Pass {
1079    public:
BindArgumentsPass(Converter & conv)1080       BindArgumentsPass(Converter &conv) : conv(conv) { }
1081 
1082    private:
1083       Converter &conv;
1084       Subroutine *sub;
1085 
1086       template<typename T> inline void
1087       updateCallArgs(Instruction *i, void (Instruction::*setArg)(int, Value *),
1088                      T (Function::*proto));
1089 
1090       template<typename T> inline void
1091       updatePrototype(BitSet *set, void (Function::*updateSet)(),
1092                       T (Function::*proto));
1093 
1094    protected:
1095       bool visit(Function *);
visit(BasicBlock * bb)1096       bool visit(BasicBlock *bb) { return false; }
1097    };
1098 
1099 private:
1100    const struct tgsi::Source *code;
1101    const struct nv50_ir_prog_info *info;
1102 
1103    struct {
1104       std::map<unsigned, Subroutine> map;
1105       Subroutine *cur;
1106    } sub;
1107 
1108    uint ip; // instruction pointer
1109 
1110    tgsi::Instruction tgsi;
1111 
1112    DataType dstTy;
1113    DataType srcTy;
1114 
1115    DataArray tData; // TGSI_FILE_TEMPORARY
1116    DataArray aData; // TGSI_FILE_ADDRESS
1117    DataArray pData; // TGSI_FILE_PREDICATE
1118    DataArray oData; // TGSI_FILE_OUTPUT (if outputs in registers)
1119    std::vector<DataArray> lData; // TGSI_FILE_TEMPORARY_ARRAY
1120    std::vector<DataArray> iData; // TGSI_FILE_IMMEDIATE_ARRAY
1121 
1122    Value *zero;
1123    Value *fragCoord[4];
1124    Value *clipVtx[4];
1125 
1126    Value *vtxBase[5]; // base address of vertex in primitive (for TP/GP)
1127    uint8_t vtxBaseValid;
1128 
1129    Stack condBBs;  // fork BB, then else clause BB
1130    Stack joinBBs;  // fork BB, for inserting join ops on ENDIF
1131    Stack loopBBs;  // loop headers
1132    Stack breakBBs; // end of / after loop
1133 };
1134 
1135 Symbol *
srcToSym(tgsi::Instruction::SrcRegister src,int c)1136 Converter::srcToSym(tgsi::Instruction::SrcRegister src, int c)
1137 {
1138    const int swz = src.getSwizzle(c);
1139 
1140    return makeSym(src.getFile(),
1141                   src.is2D() ? src.getIndex(1) : 0,
1142                   src.isIndirect(0) ? -1 : src.getIndex(0), swz,
1143                   src.getIndex(0) * 16 + swz * 4);
1144 }
1145 
1146 Symbol *
dstToSym(tgsi::Instruction::DstRegister dst,int c)1147 Converter::dstToSym(tgsi::Instruction::DstRegister dst, int c)
1148 {
1149    return makeSym(dst.getFile(),
1150                   dst.is2D() ? dst.getIndex(1) : 0,
1151                   dst.isIndirect(0) ? -1 : dst.getIndex(0), c,
1152                   dst.getIndex(0) * 16 + c * 4);
1153 }
1154 
1155 Symbol *
makeSym(uint tgsiFile,int fileIdx,int idx,int c,uint32_t address)1156 Converter::makeSym(uint tgsiFile, int fileIdx, int idx, int c, uint32_t address)
1157 {
1158    Symbol *sym = new_Symbol(prog, tgsi::translateFile(tgsiFile));
1159 
1160    sym->reg.fileIndex = fileIdx;
1161 
1162    if (idx >= 0) {
1163       if (sym->reg.file == FILE_SHADER_INPUT)
1164          sym->setOffset(info->in[idx].slot[c] * 4);
1165       else
1166       if (sym->reg.file == FILE_SHADER_OUTPUT)
1167          sym->setOffset(info->out[idx].slot[c] * 4);
1168       else
1169       if (sym->reg.file == FILE_SYSTEM_VALUE)
1170          sym->setSV(tgsi::translateSysVal(info->sv[idx].sn), c);
1171       else
1172          sym->setOffset(address);
1173    } else {
1174       sym->setOffset(address);
1175    }
1176    return sym;
1177 }
1178 
1179 static inline uint8_t
translateInterpMode(const struct nv50_ir_varying * var,operation & op)1180 translateInterpMode(const struct nv50_ir_varying *var, operation& op)
1181 {
1182    uint8_t mode = NV50_IR_INTERP_PERSPECTIVE;
1183 
1184    if (var->flat)
1185       mode = NV50_IR_INTERP_FLAT;
1186    else
1187    if (var->linear)
1188       mode = NV50_IR_INTERP_LINEAR;
1189    else
1190    if (var->sc)
1191       mode = NV50_IR_INTERP_SC;
1192 
1193    op = (mode == NV50_IR_INTERP_PERSPECTIVE || mode == NV50_IR_INTERP_SC)
1194       ? OP_PINTERP : OP_LINTERP;
1195 
1196    if (var->centroid)
1197       mode |= NV50_IR_INTERP_CENTROID;
1198 
1199    return mode;
1200 }
1201 
1202 Value *
interpolate(tgsi::Instruction::SrcRegister src,int c,Value * ptr)1203 Converter::interpolate(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
1204 {
1205    operation op;
1206 
1207    // XXX: no way to know interpolation mode if we don't know what's accessed
1208    const uint8_t mode = translateInterpMode(&info->in[ptr ? 0 :
1209                                                       src.getIndex(0)], op);
1210 
1211    Instruction *insn = new_Instruction(func, op, TYPE_F32);
1212 
1213    insn->setDef(0, getScratch());
1214    insn->setSrc(0, srcToSym(src, c));
1215    if (op == OP_PINTERP)
1216       insn->setSrc(1, fragCoord[3]);
1217    if (ptr)
1218       insn->setIndirect(0, 0, ptr);
1219 
1220    insn->setInterpolate(mode);
1221 
1222    bb->insertTail(insn);
1223    return insn->getDef(0);
1224 }
1225 
1226 Value *
applySrcMod(Value * val,int s,int c)1227 Converter::applySrcMod(Value *val, int s, int c)
1228 {
1229    Modifier m = tgsi.getSrc(s).getMod(c);
1230    DataType ty = tgsi.inferSrcType();
1231 
1232    if (m & Modifier(NV50_IR_MOD_ABS))
1233       val = mkOp1v(OP_ABS, ty, getScratch(), val);
1234 
1235    if (m & Modifier(NV50_IR_MOD_NEG))
1236       val = mkOp1v(OP_NEG, ty, getScratch(), val);
1237 
1238    return val;
1239 }
1240 
1241 Value *
getVertexBase(int s)1242 Converter::getVertexBase(int s)
1243 {
1244    assert(s < 5);
1245    if (!(vtxBaseValid & (1 << s))) {
1246       const int index = tgsi.getSrc(s).getIndex(1);
1247       Value *rel = NULL;
1248       if (tgsi.getSrc(s).isIndirect(1))
1249          rel = fetchSrc(tgsi.getSrc(s).getIndirect(1), 0, NULL);
1250       vtxBaseValid |= 1 << s;
1251       vtxBase[s] = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(), mkImm(index), rel);
1252    }
1253    return vtxBase[s];
1254 }
1255 
1256 Value *
fetchSrc(int s,int c)1257 Converter::fetchSrc(int s, int c)
1258 {
1259    Value *res;
1260    Value *ptr = NULL, *dimRel = NULL;
1261 
1262    tgsi::Instruction::SrcRegister src = tgsi.getSrc(s);
1263 
1264    if (src.isIndirect(0))
1265       ptr = fetchSrc(src.getIndirect(0), 0, NULL);
1266 
1267    if (src.is2D()) {
1268       switch (src.getFile()) {
1269       case TGSI_FILE_INPUT:
1270          dimRel = getVertexBase(s);
1271          break;
1272       case TGSI_FILE_CONSTANT:
1273          // on NVC0, this is valid and c{I+J}[k] == cI[(J << 16) + k]
1274          if (src.isIndirect(1))
1275             dimRel = fetchSrc(src.getIndirect(1), 0, 0);
1276          break;
1277       default:
1278          break;
1279       }
1280    }
1281 
1282    res = fetchSrc(src, c, ptr);
1283 
1284    if (dimRel)
1285       res->getInsn()->setIndirect(0, 1, dimRel);
1286 
1287    return applySrcMod(res, s, c);
1288 }
1289 
1290 Converter::DataArray *
getArrayForFile(unsigned file,int idx)1291 Converter::getArrayForFile(unsigned file, int idx)
1292 {
1293    switch (file) {
1294    case TGSI_FILE_TEMPORARY:
1295       return &tData;
1296    case TGSI_FILE_PREDICATE:
1297       return &pData;
1298    case TGSI_FILE_ADDRESS:
1299       return &aData;
1300    case TGSI_FILE_TEMPORARY_ARRAY:
1301       assert(idx < code->tempArrayCount);
1302       return &lData[idx];
1303    case TGSI_FILE_IMMEDIATE_ARRAY:
1304       assert(idx < code->immdArrayCount);
1305       return &iData[idx];
1306    case TGSI_FILE_OUTPUT:
1307       assert(prog->getType() == Program::TYPE_FRAGMENT);
1308       return &oData;
1309    default:
1310       assert(!"invalid/unhandled TGSI source file");
1311       return NULL;
1312    }
1313 }
1314 
1315 Value *
fetchSrc(tgsi::Instruction::SrcRegister src,int c,Value * ptr)1316 Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
1317 {
1318    const int idx2d = src.is2D() ? src.getIndex(1) : 0;
1319    const int idx = src.getIndex(0);
1320    const int swz = src.getSwizzle(c);
1321 
1322    switch (src.getFile()) {
1323    case TGSI_FILE_IMMEDIATE:
1324       assert(!ptr);
1325       return loadImm(NULL, info->immd.data[idx * 4 + swz]);
1326    case TGSI_FILE_CONSTANT:
1327       return mkLoad(TYPE_U32, srcToSym(src, c), ptr);
1328    case TGSI_FILE_INPUT:
1329       if (prog->getType() == Program::TYPE_FRAGMENT) {
1330          // don't load masked inputs, won't be assigned a slot
1331          if (!ptr && !(info->in[idx].mask & (1 << swz)))
1332             return loadImm(NULL, swz == TGSI_SWIZZLE_W ? 1.0f : 0.0f);
1333 	 if (!ptr && info->in[idx].sn == TGSI_SEMANTIC_FACE)
1334             return mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_FACE, 0));
1335          return interpolate(src, c, ptr);
1336       }
1337       return mkLoad(TYPE_U32, srcToSym(src, c), ptr);
1338    case TGSI_FILE_OUTPUT:
1339       assert(!"load from output file");
1340       return NULL;
1341    case TGSI_FILE_SYSTEM_VALUE:
1342       assert(!ptr);
1343       return mkOp1v(OP_RDSV, TYPE_U32, getSSA(), srcToSym(src, c));
1344    default:
1345       return getArrayForFile(src.getFile(), idx2d)->load(
1346          sub.cur->values, idx, swz, ptr);
1347    }
1348 }
1349 
1350 Value *
acquireDst(int d,int c)1351 Converter::acquireDst(int d, int c)
1352 {
1353    const tgsi::Instruction::DstRegister dst = tgsi.getDst(d);
1354    const unsigned f = dst.getFile();
1355    const int idx = dst.getIndex(0);
1356    const int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
1357 
1358    if (dst.isMasked(c) || f == TGSI_FILE_RESOURCE)
1359       return NULL;
1360 
1361    if (dst.isIndirect(0) ||
1362        f == TGSI_FILE_TEMPORARY_ARRAY ||
1363        f == TGSI_FILE_SYSTEM_VALUE ||
1364        (f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT))
1365       return getScratch();
1366 
1367    return getArrayForFile(f, idx2d)-> acquire(sub.cur->values, idx, c);
1368 }
1369 
1370 void
storeDst(int d,int c,Value * val)1371 Converter::storeDst(int d, int c, Value *val)
1372 {
1373    const tgsi::Instruction::DstRegister dst = tgsi.getDst(d);
1374 
1375    switch (tgsi.getSaturate()) {
1376    case TGSI_SAT_NONE:
1377       break;
1378    case TGSI_SAT_ZERO_ONE:
1379       mkOp1(OP_SAT, dstTy, val, val);
1380       break;
1381    case TGSI_SAT_MINUS_PLUS_ONE:
1382       mkOp2(OP_MAX, dstTy, val, val, mkImm(-1.0f));
1383       mkOp2(OP_MIN, dstTy, val, val, mkImm(+1.0f));
1384       break;
1385    default:
1386       assert(!"invalid saturation mode");
1387       break;
1388    }
1389 
1390    Value *ptr = dst.isIndirect(0) ?
1391       fetchSrc(dst.getIndirect(0), 0, NULL) : NULL;
1392 
1393    if (info->io.genUserClip > 0 &&
1394        dst.getFile() == TGSI_FILE_OUTPUT &&
1395        !dst.isIndirect(0) && dst.getIndex(0) == code->clipVertexOutput) {
1396       mkMov(clipVtx[c], val);
1397       val = clipVtx[c];
1398    }
1399 
1400    storeDst(dst, c, val, ptr);
1401 }
1402 
1403 void
storeDst(const tgsi::Instruction::DstRegister dst,int c,Value * val,Value * ptr)1404 Converter::storeDst(const tgsi::Instruction::DstRegister dst, int c,
1405                     Value *val, Value *ptr)
1406 {
1407    const unsigned f = dst.getFile();
1408    const int idx = dst.getIndex(0);
1409    const int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
1410 
1411    if (f == TGSI_FILE_SYSTEM_VALUE) {
1412       assert(!ptr);
1413       mkOp2(OP_WRSV, TYPE_U32, NULL, dstToSym(dst, c), val);
1414    } else
1415    if (f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT) {
1416       if (ptr || (info->out[idx].mask & (1 << c)))
1417          mkStore(OP_EXPORT, TYPE_U32, dstToSym(dst, c), ptr, val);
1418    } else
1419    if (f == TGSI_FILE_TEMPORARY ||
1420        f == TGSI_FILE_TEMPORARY_ARRAY ||
1421        f == TGSI_FILE_PREDICATE ||
1422        f == TGSI_FILE_ADDRESS ||
1423        f == TGSI_FILE_OUTPUT) {
1424       getArrayForFile(f, idx2d)->store(sub.cur->values, idx, c, ptr, val);
1425    } else {
1426       assert(!"invalid dst file");
1427    }
1428 }
1429 
1430 #define FOR_EACH_DST_ENABLED_CHANNEL(d, chan, inst) \
1431    for (chan = 0; chan < 4; ++chan)                 \
1432       if (!inst.getDst(d).isMasked(chan))
1433 
1434 Value *
buildDot(int dim)1435 Converter::buildDot(int dim)
1436 {
1437    assert(dim > 0);
1438 
1439    Value *src0 = fetchSrc(0, 0), *src1 = fetchSrc(1, 0);
1440    Value *dotp = getScratch();
1441 
1442    mkOp2(OP_MUL, TYPE_F32, dotp, src0, src1);
1443 
1444    for (int c = 1; c < dim; ++c) {
1445       src0 = fetchSrc(0, c);
1446       src1 = fetchSrc(1, c);
1447       mkOp3(OP_MAD, TYPE_F32, dotp, src0, src1, dotp);
1448    }
1449    return dotp;
1450 }
1451 
1452 void
insertConvergenceOps(BasicBlock * conv,BasicBlock * fork)1453 Converter::insertConvergenceOps(BasicBlock *conv, BasicBlock *fork)
1454 {
1455    FlowInstruction *join = new_FlowInstruction(func, OP_JOIN, NULL);
1456    join->fixed = 1;
1457    conv->insertHead(join);
1458 
1459    fork->joinAt = new_FlowInstruction(func, OP_JOINAT, conv);
1460    fork->insertBefore(fork->getExit(), fork->joinAt);
1461 }
1462 
1463 void
setTexRS(TexInstruction * tex,unsigned int & s,int R,int S)1464 Converter::setTexRS(TexInstruction *tex, unsigned int& s, int R, int S)
1465 {
1466    unsigned rIdx = 0, sIdx = 0;
1467 
1468    if (R >= 0)
1469       rIdx = tgsi.getSrc(R).getIndex(0);
1470    if (S >= 0)
1471       sIdx = tgsi.getSrc(S).getIndex(0);
1472 
1473    tex->setTexture(tgsi.getTexture(code, R), rIdx, sIdx);
1474 
1475    if (tgsi.getSrc(R).isIndirect(0)) {
1476       tex->tex.rIndirectSrc = s;
1477       tex->setSrc(s++, fetchSrc(tgsi.getSrc(R).getIndirect(0), 0, NULL));
1478    }
1479    if (S >= 0 && tgsi.getSrc(S).isIndirect(0)) {
1480       tex->tex.sIndirectSrc = s;
1481       tex->setSrc(s++, fetchSrc(tgsi.getSrc(S).getIndirect(0), 0, NULL));
1482    }
1483 }
1484 
1485 void
handleTXQ(Value * dst0[4],enum TexQuery query)1486 Converter::handleTXQ(Value *dst0[4], enum TexQuery query)
1487 {
1488    TexInstruction *tex = new_TexInstruction(func, OP_TXQ);
1489    tex->tex.query = query;
1490    unsigned int c, d;
1491 
1492    for (d = 0, c = 0; c < 4; ++c) {
1493       if (!dst0[c])
1494          continue;
1495       tex->tex.mask |= 1 << c;
1496       tex->setDef(d++, dst0[c]);
1497    }
1498    tex->setSrc((c = 0), fetchSrc(0, 0)); // mip level
1499 
1500    setTexRS(tex, c, 1, -1);
1501 
1502    bb->insertTail(tex);
1503 }
1504 
1505 void
loadProjTexCoords(Value * dst[4],Value * src[4],unsigned int mask)1506 Converter::loadProjTexCoords(Value *dst[4], Value *src[4], unsigned int mask)
1507 {
1508    Value *proj = fetchSrc(0, 3);
1509    Instruction *insn = proj->getUniqueInsn();
1510    int c;
1511 
1512    if (insn->op == OP_PINTERP) {
1513       bb->insertTail(insn = cloneForward(func, insn));
1514       insn->op = OP_LINTERP;
1515       insn->setInterpolate(NV50_IR_INTERP_LINEAR | insn->getSampleMode());
1516       insn->setSrc(1, NULL);
1517       proj = insn->getDef(0);
1518    }
1519    proj = mkOp1v(OP_RCP, TYPE_F32, getSSA(), proj);
1520 
1521    for (c = 0; c < 4; ++c) {
1522       if (!(mask & (1 << c)))
1523          continue;
1524       if ((insn = src[c]->getUniqueInsn())->op != OP_PINTERP)
1525          continue;
1526       mask &= ~(1 << c);
1527 
1528       bb->insertTail(insn = cloneForward(func, insn));
1529       insn->setInterpolate(NV50_IR_INTERP_PERSPECTIVE | insn->getSampleMode());
1530       insn->setSrc(1, proj);
1531       dst[c] = insn->getDef(0);
1532    }
1533    if (!mask)
1534       return;
1535 
1536    proj = mkOp1v(OP_RCP, TYPE_F32, getSSA(), fetchSrc(0, 3));
1537 
1538    for (c = 0; c < 4; ++c)
1539       if (mask & (1 << c))
1540          dst[c] = mkOp2v(OP_MUL, TYPE_F32, getSSA(), src[c], proj);
1541 }
1542 
1543 // order of nv50 ir sources: x y z layer lod/bias shadow
1544 // order of TGSI TEX sources: x y z layer shadow lod/bias
1545 //  lowering will finally set the hw specific order (like array first on nvc0)
1546 void
handleTEX(Value * dst[4],int R,int S,int L,int C,int Dx,int Dy)1547 Converter::handleTEX(Value *dst[4], int R, int S, int L, int C, int Dx, int Dy)
1548 {
1549    Value *val;
1550    Value *arg[4], *src[8];
1551    Value *lod = NULL, *shd = NULL;
1552    unsigned int s, c, d;
1553    TexInstruction *texi = new_TexInstruction(func, tgsi.getOP());
1554 
1555    TexInstruction::Target tgt = tgsi.getTexture(code, R);
1556 
1557    for (s = 0; s < tgt.getArgCount(); ++s)
1558       arg[s] = src[s] = fetchSrc(0, s);
1559 
1560    if (texi->op == OP_TXL || texi->op == OP_TXB)
1561       lod = fetchSrc(L >> 4, L & 3);
1562 
1563    if (C == 0x0f)
1564       C = 0x00 | MAX2(tgt.getArgCount(), 2); // guess DC src
1565 
1566    if (tgt.isShadow())
1567       shd = fetchSrc(C >> 4, C & 3);
1568 
1569    if (texi->op == OP_TXD) {
1570       for (c = 0; c < tgt.getDim(); ++c) {
1571          texi->dPdx[c].set(fetchSrc(Dx >> 4, (Dx & 3) + c));
1572          texi->dPdy[c].set(fetchSrc(Dy >> 4, (Dy & 3) + c));
1573       }
1574    }
1575 
1576    // cube textures don't care about projection value, it's divided out
1577    if (tgsi.getOpcode() == TGSI_OPCODE_TXP && !tgt.isCube() && !tgt.isArray()) {
1578       unsigned int n = tgt.getDim();
1579       if (shd) {
1580          arg[n] = shd;
1581          ++n;
1582          assert(tgt.getDim() == tgt.getArgCount());
1583       }
1584       loadProjTexCoords(src, arg, (1 << n) - 1);
1585       if (shd)
1586          shd = src[n - 1];
1587    }
1588 
1589    if (tgt.isCube()) {
1590       for (c = 0; c < 3; ++c)
1591          src[c] = mkOp1v(OP_ABS, TYPE_F32, getSSA(), arg[c]);
1592       val = getScratch();
1593       mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]);
1594       mkOp2(OP_MAX, TYPE_F32, val, src[2], val);
1595       mkOp1(OP_RCP, TYPE_F32, val, val);
1596       for (c = 0; c < 3; ++c)
1597          src[c] = mkOp2v(OP_MUL, TYPE_F32, getSSA(), arg[c], val);
1598    }
1599 
1600    for (c = 0, d = 0; c < 4; ++c) {
1601       if (dst[c]) {
1602          texi->setDef(d++, dst[c]);
1603          texi->tex.mask |= 1 << c;
1604       } else {
1605          // NOTE: maybe hook up def too, for CSE
1606       }
1607    }
1608    for (s = 0; s < tgt.getArgCount(); ++s)
1609       texi->setSrc(s, src[s]);
1610    if (lod)
1611       texi->setSrc(s++, lod);
1612    if (shd)
1613       texi->setSrc(s++, shd);
1614 
1615    setTexRS(texi, s, R, S);
1616 
1617    if (tgsi.getOpcode() == TGSI_OPCODE_SAMPLE_C_LZ)
1618       texi->tex.levelZero = true;
1619 
1620    bb->insertTail(texi);
1621 }
1622 
1623 // 1st source: xyz = coordinates, w = lod
1624 // 2nd source: offset
1625 void
handleTXF(Value * dst[4],int R)1626 Converter::handleTXF(Value *dst[4], int R)
1627 {
1628    TexInstruction *texi = new_TexInstruction(func, tgsi.getOP());
1629    unsigned int c, d, s;
1630 
1631    texi->tex.target = tgsi.getTexture(code, R);
1632 
1633    for (c = 0, d = 0; c < 4; ++c) {
1634       if (dst[c]) {
1635          texi->setDef(d++, dst[c]);
1636          texi->tex.mask |= 1 << c;
1637       }
1638    }
1639    for (c = 0; c < texi->tex.target.getArgCount(); ++c)
1640       texi->setSrc(c, fetchSrc(0, c));
1641    texi->setSrc(c++, fetchSrc(0, 3)); // lod
1642 
1643    setTexRS(texi, c, R, -1);
1644 
1645    for (s = 0; s < tgsi.getNumTexOffsets(); ++s) {
1646       for (c = 0; c < 3; ++c) {
1647          texi->tex.offset[s][c] = tgsi.getTexOffset(s).getValueU32(c, info);
1648          if (texi->tex.offset[s][c])
1649             texi->tex.useOffsets = s + 1;
1650       }
1651    }
1652 
1653    bb->insertTail(texi);
1654 }
1655 
1656 void
handleLIT(Value * dst0[4])1657 Converter::handleLIT(Value *dst0[4])
1658 {
1659    Value *val0 = NULL;
1660    unsigned int mask = tgsi.getDst(0).getMask();
1661 
1662    if (mask & (1 << 0))
1663       loadImm(dst0[0], 1.0f);
1664 
1665    if (mask & (1 << 3))
1666       loadImm(dst0[3], 1.0f);
1667 
1668    if (mask & (3 << 1)) {
1669       val0 = getScratch();
1670       mkOp2(OP_MAX, TYPE_F32, val0, fetchSrc(0, 0), zero);
1671       if (mask & (1 << 1))
1672          mkMov(dst0[1], val0);
1673    }
1674 
1675    if (mask & (1 << 2)) {
1676       Value *src1 = fetchSrc(0, 1), *src3 = fetchSrc(0, 3);
1677       Value *val1 = getScratch(), *val3 = getScratch();
1678 
1679       Value *pos128 = loadImm(NULL, +127.999999f);
1680       Value *neg128 = loadImm(NULL, -127.999999f);
1681 
1682       mkOp2(OP_MAX, TYPE_F32, val1, src1, zero);
1683       mkOp2(OP_MAX, TYPE_F32, val3, src3, neg128);
1684       mkOp2(OP_MIN, TYPE_F32, val3, val3, pos128);
1685       mkOp2(OP_POW, TYPE_F32, val3, val1, val3);
1686 
1687       mkCmp(OP_SLCT, CC_GT, TYPE_F32, dst0[2], val3, zero, val0);
1688    }
1689 }
1690 
1691 Converter::Subroutine *
getSubroutine(unsigned ip)1692 Converter::getSubroutine(unsigned ip)
1693 {
1694    std::map<unsigned, Subroutine>::iterator it = sub.map.find(ip);
1695 
1696    if (it == sub.map.end())
1697       it = sub.map.insert(std::make_pair(
1698               ip, Subroutine(new Function(prog, "SUB", ip)))).first;
1699 
1700    return &it->second;
1701 }
1702 
1703 Converter::Subroutine *
getSubroutine(Function * f)1704 Converter::getSubroutine(Function *f)
1705 {
1706    unsigned ip = f->getLabel();
1707    std::map<unsigned, Subroutine>::iterator it = sub.map.find(ip);
1708 
1709    if (it == sub.map.end())
1710       it = sub.map.insert(std::make_pair(ip, Subroutine(f))).first;
1711 
1712    return &it->second;
1713 }
1714 
1715 bool
isEndOfSubroutine(uint ip)1716 Converter::isEndOfSubroutine(uint ip)
1717 {
1718    assert(ip < code->scan.num_instructions);
1719    tgsi::Instruction insn(&code->insns[ip]);
1720    return (insn.getOpcode() == TGSI_OPCODE_END ||
1721            insn.getOpcode() == TGSI_OPCODE_ENDSUB ||
1722            // does END occur at end of main or the very end ?
1723            insn.getOpcode() == TGSI_OPCODE_BGNSUB);
1724 }
1725 
1726 bool
handleInstruction(const struct tgsi_full_instruction * insn)1727 Converter::handleInstruction(const struct tgsi_full_instruction *insn)
1728 {
1729    Value *dst0[4], *rDst0[4];
1730    Value *src0, *src1, *src2;
1731    Value *val0, *val1;
1732    int c;
1733 
1734    tgsi = tgsi::Instruction(insn);
1735 
1736    bool useScratchDst = tgsi.checkDstSrcAliasing();
1737 
1738    operation op = tgsi.getOP();
1739    dstTy = tgsi.inferDstType();
1740    srcTy = tgsi.inferSrcType();
1741 
1742    unsigned int mask = tgsi.dstCount() ? tgsi.getDst(0).getMask() : 0;
1743 
1744    if (tgsi.dstCount()) {
1745       for (c = 0; c < 4; ++c) {
1746          rDst0[c] = acquireDst(0, c);
1747          dst0[c] = (useScratchDst && rDst0[c]) ? getScratch() : rDst0[c];
1748       }
1749    }
1750 
1751    switch (tgsi.getOpcode()) {
1752    case TGSI_OPCODE_ADD:
1753    case TGSI_OPCODE_UADD:
1754    case TGSI_OPCODE_AND:
1755    case TGSI_OPCODE_DIV:
1756    case TGSI_OPCODE_IDIV:
1757    case TGSI_OPCODE_UDIV:
1758    case TGSI_OPCODE_MAX:
1759    case TGSI_OPCODE_MIN:
1760    case TGSI_OPCODE_IMAX:
1761    case TGSI_OPCODE_IMIN:
1762    case TGSI_OPCODE_UMAX:
1763    case TGSI_OPCODE_UMIN:
1764    case TGSI_OPCODE_MOD:
1765    case TGSI_OPCODE_UMOD:
1766    case TGSI_OPCODE_MUL:
1767    case TGSI_OPCODE_UMUL:
1768    case TGSI_OPCODE_OR:
1769    case TGSI_OPCODE_POW:
1770    case TGSI_OPCODE_SHL:
1771    case TGSI_OPCODE_ISHR:
1772    case TGSI_OPCODE_USHR:
1773    case TGSI_OPCODE_SUB:
1774    case TGSI_OPCODE_XOR:
1775       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
1776          src0 = fetchSrc(0, c);
1777          src1 = fetchSrc(1, c);
1778          mkOp2(op, dstTy, dst0[c], src0, src1);
1779       }
1780       break;
1781    case TGSI_OPCODE_MAD:
1782    case TGSI_OPCODE_UMAD:
1783    case TGSI_OPCODE_SAD:
1784       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
1785          src0 = fetchSrc(0, c);
1786          src1 = fetchSrc(1, c);
1787          src2 = fetchSrc(2, c);
1788          mkOp3(op, dstTy, dst0[c], src0, src1, src2);
1789       }
1790       break;
1791    case TGSI_OPCODE_MOV:
1792    case TGSI_OPCODE_ABS:
1793    case TGSI_OPCODE_CEIL:
1794    case TGSI_OPCODE_FLR:
1795    case TGSI_OPCODE_TRUNC:
1796    case TGSI_OPCODE_RCP:
1797    case TGSI_OPCODE_IABS:
1798    case TGSI_OPCODE_INEG:
1799    case TGSI_OPCODE_NOT:
1800    case TGSI_OPCODE_DDX:
1801    case TGSI_OPCODE_DDY:
1802       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
1803          mkOp1(op, dstTy, dst0[c], fetchSrc(0, c));
1804       break;
1805    case TGSI_OPCODE_RSQ:
1806       src0 = fetchSrc(0, 0);
1807       val0 = getScratch();
1808       mkOp1(OP_ABS, TYPE_F32, val0, src0);
1809       mkOp1(OP_RSQ, TYPE_F32, val0, val0);
1810       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
1811          mkMov(dst0[c], val0);
1812       break;
1813    case TGSI_OPCODE_ARL:
1814       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
1815          src0 = fetchSrc(0, c);
1816          mkCvt(OP_CVT, TYPE_S32, dst0[c], TYPE_F32, src0)->rnd = ROUND_M;
1817          mkOp2(OP_SHL, TYPE_U32, dst0[c], dst0[c], mkImm(4));
1818       }
1819       break;
1820    case TGSI_OPCODE_UARL:
1821       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
1822          mkOp2(OP_SHL, TYPE_U32, dst0[c], fetchSrc(0, c), mkImm(4));
1823       break;
1824    case TGSI_OPCODE_EX2:
1825    case TGSI_OPCODE_LG2:
1826       val0 = mkOp1(op, TYPE_F32, getScratch(), fetchSrc(0, 0))->getDef(0);
1827       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
1828          mkOp1(OP_MOV, TYPE_F32, dst0[c], val0);
1829       break;
1830    case TGSI_OPCODE_COS:
1831    case TGSI_OPCODE_SIN:
1832       val0 = getScratch();
1833       if (mask & 7) {
1834          mkOp1(OP_PRESIN, TYPE_F32, val0, fetchSrc(0, 0));
1835          mkOp1(op, TYPE_F32, val0, val0);
1836          for (c = 0; c < 3; ++c)
1837             if (dst0[c])
1838                mkMov(dst0[c], val0);
1839       }
1840       if (dst0[3]) {
1841          mkOp1(OP_PRESIN, TYPE_F32, val0, fetchSrc(0, 3));
1842          mkOp1(op, TYPE_F32, dst0[3], val0);
1843       }
1844       break;
1845    case TGSI_OPCODE_SCS:
1846       if (mask & 3) {
1847          val0 = mkOp1v(OP_PRESIN, TYPE_F32, getSSA(), fetchSrc(0, 0));
1848          if (dst0[0])
1849             mkOp1(OP_COS, TYPE_F32, dst0[0], val0);
1850          if (dst0[1])
1851             mkOp1(OP_SIN, TYPE_F32, dst0[1], val0);
1852       }
1853       if (dst0[2])
1854          loadImm(dst0[2], 0.0f);
1855       if (dst0[3])
1856          loadImm(dst0[3], 1.0f);
1857       break;
1858    case TGSI_OPCODE_EXP:
1859       src0 = fetchSrc(0, 0);
1860       val0 = mkOp1v(OP_FLOOR, TYPE_F32, getSSA(), src0);
1861       if (dst0[1])
1862          mkOp2(OP_SUB, TYPE_F32, dst0[1], src0, val0);
1863       if (dst0[0])
1864          mkOp1(OP_EX2, TYPE_F32, dst0[0], val0);
1865       if (dst0[2])
1866          mkOp1(OP_EX2, TYPE_F32, dst0[2], src0);
1867       if (dst0[3])
1868          loadImm(dst0[3], 1.0f);
1869       break;
1870    case TGSI_OPCODE_LOG:
1871       src0 = mkOp1v(OP_ABS, TYPE_F32, getSSA(), fetchSrc(0, 0));
1872       val0 = mkOp1v(OP_LG2, TYPE_F32, dst0[2] ? dst0[2] : getSSA(), src0);
1873       if (dst0[0] || dst0[1])
1874          val1 = mkOp1v(OP_FLOOR, TYPE_F32, dst0[0] ? dst0[0] : getSSA(), val0);
1875       if (dst0[1]) {
1876          mkOp1(OP_EX2, TYPE_F32, dst0[1], val1);
1877          mkOp1(OP_RCP, TYPE_F32, dst0[1], dst0[1]);
1878          mkOp2(OP_MUL, TYPE_F32, dst0[1], dst0[1], src0);
1879       }
1880       if (dst0[3])
1881          loadImm(dst0[3], 1.0f);
1882       break;
1883    case TGSI_OPCODE_DP2:
1884       val0 = buildDot(2);
1885       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
1886          mkMov(dst0[c], val0);
1887       break;
1888    case TGSI_OPCODE_DP3:
1889       val0 = buildDot(3);
1890       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
1891          mkMov(dst0[c], val0);
1892       break;
1893    case TGSI_OPCODE_DP4:
1894       val0 = buildDot(4);
1895       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
1896          mkMov(dst0[c], val0);
1897       break;
1898    case TGSI_OPCODE_DPH:
1899       val0 = buildDot(3);
1900       src1 = fetchSrc(1, 3);
1901       mkOp2(OP_ADD, TYPE_F32, val0, val0, src1);
1902       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
1903          mkMov(dst0[c], val0);
1904       break;
1905    case TGSI_OPCODE_DST:
1906       if (dst0[0])
1907          loadImm(dst0[0], 1.0f);
1908       if (dst0[1]) {
1909          src0 = fetchSrc(0, 1);
1910          src1 = fetchSrc(1, 1);
1911          mkOp2(OP_MUL, TYPE_F32, dst0[1], src0, src1);
1912       }
1913       if (dst0[2])
1914          mkMov(dst0[2], fetchSrc(0, 2));
1915       if (dst0[3])
1916          mkMov(dst0[3], fetchSrc(1, 3));
1917       break;
1918    case TGSI_OPCODE_LRP:
1919       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
1920          src0 = fetchSrc(0, c);
1921          src1 = fetchSrc(1, c);
1922          src2 = fetchSrc(2, c);
1923          mkOp3(OP_MAD, TYPE_F32, dst0[c],
1924                mkOp2v(OP_SUB, TYPE_F32, getSSA(), src1, src2), src0, src2);
1925       }
1926       break;
1927    case TGSI_OPCODE_LIT:
1928       handleLIT(dst0);
1929       break;
1930    case TGSI_OPCODE_XPD:
1931       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
1932          if (c < 3) {
1933             val0 = getSSA();
1934             src0 = fetchSrc(1, (c + 1) % 3);
1935             src1 = fetchSrc(0, (c + 2) % 3);
1936             mkOp2(OP_MUL, TYPE_F32, val0, src0, src1);
1937             mkOp1(OP_NEG, TYPE_F32, val0, val0);
1938 
1939             src0 = fetchSrc(0, (c + 1) % 3);
1940             src1 = fetchSrc(1, (c + 2) % 3);
1941             mkOp3(OP_MAD, TYPE_F32, dst0[c], src0, src1, val0);
1942          } else {
1943             loadImm(dst0[c], 1.0f);
1944          }
1945       }
1946       break;
1947    case TGSI_OPCODE_ISSG:
1948    case TGSI_OPCODE_SSG:
1949       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
1950          src0 = fetchSrc(0, c);
1951          val0 = getScratch();
1952          val1 = getScratch();
1953          mkCmp(OP_SET, CC_GT, srcTy, val0, src0, zero);
1954          mkCmp(OP_SET, CC_LT, srcTy, val1, src0, zero);
1955          if (srcTy == TYPE_F32)
1956             mkOp2(OP_SUB, TYPE_F32, dst0[c], val0, val1);
1957          else
1958             mkOp2(OP_SUB, TYPE_S32, dst0[c], val1, val0);
1959       }
1960       break;
1961    case TGSI_OPCODE_UCMP:
1962    case TGSI_OPCODE_CMP:
1963       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
1964          src0 = fetchSrc(0, c);
1965          src1 = fetchSrc(1, c);
1966          src2 = fetchSrc(2, c);
1967          if (src1 == src2)
1968             mkMov(dst0[c], src1);
1969          else
1970             mkCmp(OP_SLCT, (srcTy == TYPE_F32) ? CC_LT : CC_NE,
1971                   srcTy, dst0[c], src1, src2, src0);
1972       }
1973       break;
1974    case TGSI_OPCODE_FRC:
1975       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
1976          src0 = fetchSrc(0, c);
1977          val0 = getScratch();
1978          mkOp1(OP_FLOOR, TYPE_F32, val0, src0);
1979          mkOp2(OP_SUB, TYPE_F32, dst0[c], src0, val0);
1980       }
1981       break;
1982    case TGSI_OPCODE_ROUND:
1983       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
1984          mkCvt(OP_CVT, TYPE_F32, dst0[c], TYPE_F32, fetchSrc(0, c))
1985          ->rnd = ROUND_NI;
1986       break;
1987    case TGSI_OPCODE_CLAMP:
1988       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
1989          src0 = fetchSrc(0, c);
1990          src1 = fetchSrc(1, c);
1991          src2 = fetchSrc(2, c);
1992          val0 = getScratch();
1993          mkOp2(OP_MIN, TYPE_F32, val0, src0, src1);
1994          mkOp2(OP_MAX, TYPE_F32, dst0[c], val0, src2);
1995       }
1996       break;
1997    case TGSI_OPCODE_SLT:
1998    case TGSI_OPCODE_SGE:
1999    case TGSI_OPCODE_SEQ:
2000    case TGSI_OPCODE_SFL:
2001    case TGSI_OPCODE_SGT:
2002    case TGSI_OPCODE_SLE:
2003    case TGSI_OPCODE_SNE:
2004    case TGSI_OPCODE_STR:
2005    case TGSI_OPCODE_ISGE:
2006    case TGSI_OPCODE_ISLT:
2007    case TGSI_OPCODE_USEQ:
2008    case TGSI_OPCODE_USGE:
2009    case TGSI_OPCODE_USLT:
2010    case TGSI_OPCODE_USNE:
2011       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
2012          src0 = fetchSrc(0, c);
2013          src1 = fetchSrc(1, c);
2014          mkCmp(op, tgsi.getSetCond(), dstTy, dst0[c], src0, src1);
2015       }
2016       break;
2017    case TGSI_OPCODE_KIL:
2018       val0 = new_LValue(func, FILE_PREDICATE);
2019       for (c = 0; c < 4; ++c) {
2020          mkCmp(OP_SET, CC_LT, TYPE_F32, val0, fetchSrc(0, c), zero);
2021          mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, val0);
2022       }
2023       break;
2024    case TGSI_OPCODE_KILP:
2025       mkOp(OP_DISCARD, TYPE_NONE, NULL);
2026       break;
2027    case TGSI_OPCODE_TEX:
2028    case TGSI_OPCODE_TXB:
2029    case TGSI_OPCODE_TXL:
2030    case TGSI_OPCODE_TXP:
2031       //              R  S     L     C    Dx    Dy
2032       handleTEX(dst0, 1, 1, 0x03, 0x0f, 0x00, 0x00);
2033       break;
2034    case TGSI_OPCODE_TXD:
2035       handleTEX(dst0, 3, 3, 0x03, 0x0f, 0x10, 0x20);
2036       break;
2037    case TGSI_OPCODE_SAMPLE:
2038    case TGSI_OPCODE_SAMPLE_B:
2039    case TGSI_OPCODE_SAMPLE_D:
2040    case TGSI_OPCODE_SAMPLE_L:
2041    case TGSI_OPCODE_SAMPLE_C:
2042    case TGSI_OPCODE_SAMPLE_C_LZ:
2043       handleTEX(dst0, 1, 2, 0x30, 0x31, 0x40, 0x50);
2044       break;
2045    case TGSI_OPCODE_TXF:
2046    case TGSI_OPCODE_LOAD:
2047       handleTXF(dst0, 1);
2048       break;
2049    case TGSI_OPCODE_TXQ:
2050    case TGSI_OPCODE_SVIEWINFO:
2051       handleTXQ(dst0, TXQ_DIMS);
2052       break;
2053    case TGSI_OPCODE_F2I:
2054    case TGSI_OPCODE_F2U:
2055       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2056          mkCvt(OP_CVT, dstTy, dst0[c], srcTy, fetchSrc(0, c))->rnd = ROUND_Z;
2057       break;
2058    case TGSI_OPCODE_I2F:
2059    case TGSI_OPCODE_U2F:
2060       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2061          mkCvt(OP_CVT, dstTy, dst0[c], srcTy, fetchSrc(0, c));
2062       break;
2063    case TGSI_OPCODE_EMIT:
2064    case TGSI_OPCODE_ENDPRIM:
2065       // get vertex stream if specified (must be immediate)
2066       src0 = tgsi.srcCount() ?
2067          mkImm(tgsi.getSrc(0).getValueU32(0, info)) : zero;
2068       mkOp1(op, TYPE_U32, NULL, src0)->fixed = 1;
2069       break;
2070    case TGSI_OPCODE_IF:
2071    {
2072       BasicBlock *ifBB = new BasicBlock(func);
2073 
2074       bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE);
2075       condBBs.push(bb);
2076       joinBBs.push(bb);
2077 
2078       mkFlow(OP_BRA, NULL, CC_NOT_P, fetchSrc(0, 0));
2079 
2080       setPosition(ifBB, true);
2081    }
2082       break;
2083    case TGSI_OPCODE_ELSE:
2084    {
2085       BasicBlock *elseBB = new BasicBlock(func);
2086       BasicBlock *forkBB = reinterpret_cast<BasicBlock *>(condBBs.pop().u.p);
2087 
2088       forkBB->cfg.attach(&elseBB->cfg, Graph::Edge::TREE);
2089       condBBs.push(bb);
2090 
2091       forkBB->getExit()->asFlow()->target.bb = elseBB;
2092       if (!bb->isTerminated())
2093          mkFlow(OP_BRA, NULL, CC_ALWAYS, NULL);
2094 
2095       setPosition(elseBB, true);
2096    }
2097       break;
2098    case TGSI_OPCODE_ENDIF:
2099    {
2100       BasicBlock *convBB = new BasicBlock(func);
2101       BasicBlock *prevBB = reinterpret_cast<BasicBlock *>(condBBs.pop().u.p);
2102       BasicBlock *forkBB = reinterpret_cast<BasicBlock *>(joinBBs.pop().u.p);
2103 
2104       if (!bb->isTerminated()) {
2105          // we only want join if none of the clauses ended with CONT/BREAK/RET
2106          if (prevBB->getExit()->op == OP_BRA && joinBBs.getSize() < 6)
2107             insertConvergenceOps(convBB, forkBB);
2108          mkFlow(OP_BRA, convBB, CC_ALWAYS, NULL);
2109          bb->cfg.attach(&convBB->cfg, Graph::Edge::FORWARD);
2110       }
2111 
2112       if (prevBB->getExit()->op == OP_BRA) {
2113          prevBB->cfg.attach(&convBB->cfg, Graph::Edge::FORWARD);
2114          prevBB->getExit()->asFlow()->target.bb = convBB;
2115       }
2116       setPosition(convBB, true);
2117    }
2118       break;
2119    case TGSI_OPCODE_BGNLOOP:
2120    {
2121       BasicBlock *lbgnBB = new BasicBlock(func);
2122       BasicBlock *lbrkBB = new BasicBlock(func);
2123 
2124       loopBBs.push(lbgnBB);
2125       breakBBs.push(lbrkBB);
2126       if (loopBBs.getSize() > func->loopNestingBound)
2127          func->loopNestingBound++;
2128 
2129       mkFlow(OP_PREBREAK, lbrkBB, CC_ALWAYS, NULL);
2130 
2131       bb->cfg.attach(&lbgnBB->cfg, Graph::Edge::TREE);
2132       setPosition(lbgnBB, true);
2133       mkFlow(OP_PRECONT, lbgnBB, CC_ALWAYS, NULL);
2134    }
2135       break;
2136    case TGSI_OPCODE_ENDLOOP:
2137    {
2138       BasicBlock *loopBB = reinterpret_cast<BasicBlock *>(loopBBs.pop().u.p);
2139 
2140       if (!bb->isTerminated()) {
2141          mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);
2142          bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
2143       }
2144       setPosition(reinterpret_cast<BasicBlock *>(breakBBs.pop().u.p), true);
2145    }
2146       break;
2147    case TGSI_OPCODE_BRK:
2148    {
2149       if (bb->isTerminated())
2150          break;
2151       BasicBlock *brkBB = reinterpret_cast<BasicBlock *>(breakBBs.peek().u.p);
2152       mkFlow(OP_BREAK, brkBB, CC_ALWAYS, NULL);
2153       bb->cfg.attach(&brkBB->cfg, Graph::Edge::CROSS);
2154    }
2155       break;
2156    case TGSI_OPCODE_CONT:
2157    {
2158       if (bb->isTerminated())
2159          break;
2160       BasicBlock *contBB = reinterpret_cast<BasicBlock *>(loopBBs.peek().u.p);
2161       mkFlow(OP_CONT, contBB, CC_ALWAYS, NULL);
2162       contBB->explicitCont = true;
2163       bb->cfg.attach(&contBB->cfg, Graph::Edge::BACK);
2164    }
2165       break;
2166    case TGSI_OPCODE_BGNSUB:
2167    {
2168       Subroutine *s = getSubroutine(ip);
2169       BasicBlock *entry = new BasicBlock(s->f);
2170       BasicBlock *leave = new BasicBlock(s->f);
2171 
2172       // multiple entrypoints possible, keep the graph connected
2173       if (prog->getType() == Program::TYPE_COMPUTE)
2174          prog->main->call.attach(&s->f->call, Graph::Edge::TREE);
2175 
2176       sub.cur = s;
2177       s->f->setEntry(entry);
2178       s->f->setExit(leave);
2179       setPosition(entry, true);
2180       return true;
2181    }
2182    case TGSI_OPCODE_ENDSUB:
2183    {
2184       sub.cur = getSubroutine(prog->main);
2185       setPosition(BasicBlock::get(sub.cur->f->cfg.getRoot()), true);
2186       return true;
2187    }
2188    case TGSI_OPCODE_CAL:
2189    {
2190       Subroutine *s = getSubroutine(tgsi.getLabel());
2191       mkFlow(OP_CALL, s->f, CC_ALWAYS, NULL);
2192       func->call.attach(&s->f->call, Graph::Edge::TREE);
2193       return true;
2194    }
2195    case TGSI_OPCODE_RET:
2196    {
2197       if (bb->isTerminated())
2198          return true;
2199       BasicBlock *leave = BasicBlock::get(func->cfgExit);
2200 
2201       if (!isEndOfSubroutine(ip + 1)) {
2202          // insert a PRERET at the entry if this is an early return
2203          // (only needed for sharing code in the epilogue)
2204          BasicBlock *pos = getBB();
2205          setPosition(BasicBlock::get(func->cfg.getRoot()), false);
2206          mkFlow(OP_PRERET, leave, CC_ALWAYS, NULL)->fixed = 1;
2207          setPosition(pos, true);
2208       }
2209       mkFlow(OP_RET, NULL, CC_ALWAYS, NULL)->fixed = 1;
2210       bb->cfg.attach(&leave->cfg, Graph::Edge::CROSS);
2211    }
2212       break;
2213    case TGSI_OPCODE_END:
2214    {
2215       // attach and generate epilogue code
2216       BasicBlock *epilogue = BasicBlock::get(func->cfgExit);
2217       bb->cfg.attach(&epilogue->cfg, Graph::Edge::TREE);
2218       setPosition(epilogue, true);
2219       if (prog->getType() == Program::TYPE_FRAGMENT)
2220          exportOutputs();
2221       if (info->io.genUserClip > 0)
2222          handleUserClipPlanes();
2223       mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
2224    }
2225       break;
2226    case TGSI_OPCODE_SWITCH:
2227    case TGSI_OPCODE_CASE:
2228       ERROR("switch/case opcode encountered, should have been lowered\n");
2229       abort();
2230       break;
2231    default:
2232       ERROR("unhandled TGSI opcode: %u\n", tgsi.getOpcode());
2233       assert(0);
2234       break;
2235    }
2236 
2237    if (tgsi.dstCount()) {
2238       for (c = 0; c < 4; ++c) {
2239          if (!dst0[c])
2240             continue;
2241          if (dst0[c] != rDst0[c])
2242             mkMov(rDst0[c], dst0[c]);
2243          storeDst(0, c, rDst0[c]);
2244       }
2245    }
2246    vtxBaseValid = 0;
2247 
2248    return true;
2249 }
2250 
2251 void
handleUserClipPlanes()2252 Converter::handleUserClipPlanes()
2253 {
2254    Value *res[8];
2255    int n, i, c;
2256 
2257    for (c = 0; c < 4; ++c) {
2258       for (i = 0; i < info->io.genUserClip; ++i) {
2259          Symbol *sym = mkSymbol(FILE_MEMORY_CONST, info->io.ucpBinding,
2260                                 TYPE_F32, info->io.ucpBase + i * 16 + c * 4);
2261          Value *ucp = mkLoad(TYPE_F32, sym, NULL);
2262          if (c == 0)
2263             res[i] = mkOp2v(OP_MUL, TYPE_F32, getScratch(), clipVtx[c], ucp);
2264          else
2265             mkOp3(OP_MAD, TYPE_F32, res[i], clipVtx[c], ucp, res[i]);
2266       }
2267    }
2268 
2269    const int first = info->numOutputs - (info->io.genUserClip + 3) / 4;
2270 
2271    for (i = 0; i < info->io.genUserClip; ++i) {
2272       n = i / 4 + first;
2273       c = i % 4;
2274       Symbol *sym =
2275          mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32, info->out[n].slot[c] * 4);
2276       mkStore(OP_EXPORT, TYPE_F32, sym, NULL, res[i]);
2277    }
2278 }
2279 
2280 void
exportOutputs()2281 Converter::exportOutputs()
2282 {
2283    for (unsigned int i = 0; i < info->numOutputs; ++i) {
2284       for (unsigned int c = 0; c < 4; ++c) {
2285          if (!oData.exists(sub.cur->values, i, c))
2286             continue;
2287          Symbol *sym = mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32,
2288                                 info->out[i].slot[c] * 4);
2289          Value *val = oData.load(sub.cur->values, i, c, NULL);
2290          if (val)
2291             mkStore(OP_EXPORT, TYPE_F32, sym, NULL, val);
2292       }
2293    }
2294 }
2295 
Converter(Program * ir,const tgsi::Source * code)2296 Converter::Converter(Program *ir, const tgsi::Source *code) : BuildUtil(ir),
2297      code(code),
2298      tgsi(NULL),
2299      tData(this), aData(this), pData(this), oData(this)
2300 {
2301    info = code->info;
2302 
2303    const DataFile tFile = code->mainTempsInLMem ? FILE_MEMORY_LOCAL : FILE_GPR;
2304 
2305    const unsigned tSize = code->fileSize(TGSI_FILE_TEMPORARY);
2306    const unsigned pSize = code->fileSize(TGSI_FILE_PREDICATE);
2307    const unsigned aSize = code->fileSize(TGSI_FILE_ADDRESS);
2308    const unsigned oSize = code->fileSize(TGSI_FILE_OUTPUT);
2309 
2310    tData.setup(TGSI_FILE_TEMPORARY, 0, 0, tSize, 4, 4, tFile, 0);
2311    pData.setup(TGSI_FILE_PREDICATE, 0, 0, pSize, 4, 4, FILE_PREDICATE, 0);
2312    aData.setup(TGSI_FILE_ADDRESS, 0, 0, aSize, 4, 4, FILE_ADDRESS, 0);
2313    oData.setup(TGSI_FILE_OUTPUT, 0, 0, oSize, 4, 4, FILE_GPR, 0);
2314 
2315    for (int vol = 0, i = 0; i < code->tempArrayCount; ++i) {
2316       int len = code->tempArrays[i].u32 >> 2;
2317       int dim = code->tempArrays[i].u32 & 3;
2318 
2319       lData.push_back(DataArray(this));
2320       lData.back().setup(TGSI_FILE_TEMPORARY_ARRAY, i, vol, len, dim, 4,
2321                          FILE_MEMORY_LOCAL, 0);
2322 
2323       vol += (len * dim * 4 + 0xf) & ~0xf;
2324    }
2325 
2326    for (int vol = 0, i = 0; i < code->immdArrayCount; ++i) {
2327       int len = code->immdArrays[i].u32 >> 2;
2328       int dim = code->immdArrays[i].u32 & 3;
2329 
2330       lData.push_back(DataArray(this));
2331       lData.back().setup(TGSI_FILE_IMMEDIATE_ARRAY, i, vol, len, dim, 4,
2332                          FILE_MEMORY_CONST, 14);
2333 
2334       vol += (len * dim * 4 + 0xf) & ~0xf;
2335    }
2336 
2337    zero = mkImm((uint32_t)0);
2338 
2339    vtxBaseValid = 0;
2340 }
2341 
~Converter()2342 Converter::~Converter()
2343 {
2344 }
2345 
2346 template<typename T> inline void
updateCallArgs(Instruction * i,void (Instruction::* setArg)(int,Value *),T (Function::* proto))2347 Converter::BindArgumentsPass::updateCallArgs(
2348    Instruction *i, void (Instruction::*setArg)(int, Value *),
2349    T (Function::*proto))
2350 {
2351    Function *g = i->asFlow()->target.fn;
2352    Subroutine *subg = conv.getSubroutine(g);
2353 
2354    for (unsigned a = 0; a < (g->*proto).size(); ++a) {
2355       Value *v = (g->*proto)[a].get();
2356       const Converter::Location &l = subg->values.l.find(v)->second;
2357       Converter::DataArray *array = conv.getArrayForFile(l.array, l.arrayIdx);
2358 
2359       (i->*setArg)(a, array->acquire(sub->values, l.i, l.c));
2360    }
2361 }
2362 
2363 template<typename T> inline void
updatePrototype(BitSet * set,void (Function::* updateSet)(),T (Function::* proto))2364 Converter::BindArgumentsPass::updatePrototype(
2365    BitSet *set, void (Function::*updateSet)(), T (Function::*proto))
2366 {
2367    (func->*updateSet)();
2368 
2369    for (unsigned i = 0; i < set->getSize(); ++i) {
2370       Value *v = func->getLValue(i);
2371 
2372       // only include values with a matching TGSI register
2373       if (set->test(i) && sub->values.l.find(v) != sub->values.l.end())
2374          (func->*proto).push_back(v);
2375    }
2376 }
2377 
2378 bool
visit(Function * f)2379 Converter::BindArgumentsPass::visit(Function *f)
2380 {
2381    sub = conv.getSubroutine(f);
2382 
2383    for (ArrayList::Iterator bi = f->allBBlocks.iterator();
2384         !bi.end(); bi.next()) {
2385       for (Instruction *i = BasicBlock::get(bi)->getFirst();
2386            i; i = i->next) {
2387          if (i->op == OP_CALL && !i->asFlow()->builtin) {
2388             updateCallArgs(i, &Instruction::setSrc, &Function::ins);
2389             updateCallArgs(i, &Instruction::setDef, &Function::outs);
2390          }
2391       }
2392    }
2393 
2394    if (func == prog->main && prog->getType() != Program::TYPE_COMPUTE)
2395       return true;
2396    updatePrototype(&BasicBlock::get(f->cfg.getRoot())->liveSet,
2397                    &Function::buildLiveSets, &Function::ins);
2398    updatePrototype(&BasicBlock::get(f->cfgExit)->defSet,
2399                    &Function::buildDefSets, &Function::outs);
2400 
2401    return true;
2402 }
2403 
2404 bool
run()2405 Converter::run()
2406 {
2407    BasicBlock *entry = new BasicBlock(prog->main);
2408    BasicBlock *leave = new BasicBlock(prog->main);
2409 
2410    prog->main->setEntry(entry);
2411    prog->main->setExit(leave);
2412 
2413    setPosition(entry, true);
2414    sub.cur = getSubroutine(prog->main);
2415 
2416    if (info->io.genUserClip > 0) {
2417       for (int c = 0; c < 4; ++c)
2418          clipVtx[c] = getScratch();
2419    }
2420 
2421    if (prog->getType() == Program::TYPE_FRAGMENT) {
2422       Symbol *sv = mkSysVal(SV_POSITION, 3);
2423       fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
2424       mkOp1(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
2425    }
2426 
2427    for (ip = 0; ip < code->scan.num_instructions; ++ip) {
2428       if (!handleInstruction(&code->insns[ip]))
2429          return false;
2430    }
2431 
2432    if (!BindArgumentsPass(*this).run(prog))
2433       return false;
2434 
2435    return true;
2436 }
2437 
2438 } // unnamed namespace
2439 
2440 namespace nv50_ir {
2441 
2442 bool
makeFromTGSI(struct nv50_ir_prog_info * info)2443 Program::makeFromTGSI(struct nv50_ir_prog_info *info)
2444 {
2445    tgsi::Source src(info);
2446    if (!src.scanSource())
2447       return false;
2448    tlsSize = info->bin.tlsSpace;
2449 
2450    Converter builder(this, &src);
2451    return builder.run();
2452 }
2453 
2454 } // namespace nv50_ir
2455