1 /*
2  * Copyright 2011 Christoph Bumiller
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include "codegen/nv50_ir_target_nvc0.h"
24 
25 namespace nv50_ir {
26 
27 // Argh, all these assertions ...
28 
29 class CodeEmitterNVC0 : public CodeEmitter
30 {
31 public:
32    CodeEmitterNVC0(const TargetNVC0 *);
33 
34    virtual bool emitInstruction(Instruction *);
35    virtual uint32_t getMinEncodingSize(const Instruction *) const;
36    virtual void prepareEmission(Function *);
37 
setProgramType(Program::Type pType)38    inline void setProgramType(Program::Type pType) { progType = pType; }
39 
40 private:
41    const TargetNVC0 *targNVC0;
42 
43    Program::Type progType;
44 
45    const bool writeIssueDelays;
46 
47 private:
48    void emitForm_A(const Instruction *, uint64_t);
49    void emitForm_B(const Instruction *, uint64_t);
50    void emitForm_S(const Instruction *, uint32_t, bool pred);
51 
52    void emitPredicate(const Instruction *);
53 
54    void setAddress16(const ValueRef&);
55    void setAddress24(const ValueRef&);
56    void setAddressByFile(const ValueRef&);
57    void setImmediate(const Instruction *, const int s); // needs op already set
58    void setImmediateS8(const ValueRef&);
59    void setSUConst16(const Instruction *, const int s);
60    void setSUPred(const Instruction *, const int s);
61    void setPDSTL(const Instruction *, const int d);
62 
63    void emitCondCode(CondCode cc, int pos);
64    void emitInterpMode(const Instruction *);
65    void emitLoadStoreType(DataType ty);
66    void emitSUGType(DataType);
67    void emitSUAddr(const TexInstruction *);
68    void emitSUDim(const TexInstruction *);
69    void emitCachingMode(CacheMode c);
70 
71    void emitShortSrc2(const ValueRef&);
72 
73    inline uint8_t getSRegEncoding(const ValueRef&);
74 
75    void roundMode_A(const Instruction *);
76    void roundMode_C(const Instruction *);
77    void roundMode_CS(const Instruction *);
78 
79    void emitNegAbs12(const Instruction *);
80 
81    void emitNOP(const Instruction *);
82 
83    void emitLOAD(const Instruction *);
84    void emitSTORE(const Instruction *);
85    void emitMOV(const Instruction *);
86    void emitATOM(const Instruction *);
87    void emitMEMBAR(const Instruction *);
88    void emitCCTL(const Instruction *);
89 
90    void emitINTERP(const Instruction *);
91    void emitAFETCH(const Instruction *);
92    void emitPFETCH(const Instruction *);
93    void emitVFETCH(const Instruction *);
94    void emitEXPORT(const Instruction *);
95    void emitOUT(const Instruction *);
96 
97    void emitUADD(const Instruction *);
98    void emitFADD(const Instruction *);
99    void emitDADD(const Instruction *);
100    void emitUMUL(const Instruction *);
101    void emitFMUL(const Instruction *);
102    void emitDMUL(const Instruction *);
103    void emitIMAD(const Instruction *);
104    void emitISAD(const Instruction *);
105    void emitSHLADD(const Instruction *a);
106    void emitFMAD(const Instruction *);
107    void emitDMAD(const Instruction *);
108    void emitMADSP(const Instruction *);
109 
110    void emitNOT(Instruction *);
111    void emitLogicOp(const Instruction *, uint8_t subOp);
112    void emitPOPC(const Instruction *);
113    void emitINSBF(const Instruction *);
114    void emitEXTBF(const Instruction *);
115    void emitBFIND(const Instruction *);
116    void emitPERMT(const Instruction *);
117    void emitShift(const Instruction *);
118 
119    void emitSFnOp(const Instruction *, uint8_t subOp);
120 
121    void emitCVT(Instruction *);
122    void emitMINMAX(const Instruction *);
123    void emitPreOp(const Instruction *);
124 
125    void emitSET(const CmpInstruction *);
126    void emitSLCT(const CmpInstruction *);
127    void emitSELP(const Instruction *);
128 
129    void emitTEXBAR(const Instruction *);
130    void emitTEX(const TexInstruction *);
131    void emitTEXCSAA(const TexInstruction *);
132    void emitTXQ(const TexInstruction *);
133 
134    void emitQUADOP(const Instruction *, uint8_t qOp, uint8_t laneMask);
135 
136    void emitFlow(const Instruction *);
137    void emitBAR(const Instruction *);
138 
139    void emitSUCLAMPMode(uint16_t);
140    void emitSUCalc(Instruction *);
141    void emitSULDGB(const TexInstruction *);
142    void emitSUSTGx(const TexInstruction *);
143 
144    void emitSULDB(const TexInstruction *);
145    void emitSUSTx(const TexInstruction *);
146    void emitSULEA(const TexInstruction *);
147 
148    void emitVSHL(const Instruction *);
149    void emitVectorSubOp(const Instruction *);
150 
151    void emitPIXLD(const Instruction *);
152 
153    void emitSHFL(const Instruction *);
154 
155    void emitVOTE(const Instruction *);
156 
157    inline void defId(const ValueDef&, const int pos);
158    inline void defId(const Instruction *, int d, const int pos);
159    inline void srcId(const ValueRef&, const int pos);
160    inline void srcId(const ValueRef *, const int pos);
161    inline void srcId(const Instruction *, int s, const int pos);
162    inline void srcAddr32(const ValueRef&, int pos, int shr);
163 
164    inline bool isLIMM(const ValueRef&, DataType ty);
165 };
166 
167 // for better visibility
168 #define HEX64(h, l) 0x##h##l##ULL
169 
170 #define SDATA(a) ((a).rep()->reg.data)
171 #define DDATA(a) ((a).rep()->reg.data)
172 
srcId(const ValueRef & src,const int pos)173 void CodeEmitterNVC0::srcId(const ValueRef& src, const int pos)
174 {
175    code[pos / 32] |= (src.get() ? SDATA(src).id : 63) << (pos % 32);
176 }
177 
srcId(const ValueRef * src,const int pos)178 void CodeEmitterNVC0::srcId(const ValueRef *src, const int pos)
179 {
180    code[pos / 32] |= (src ? SDATA(*src).id : 63) << (pos % 32);
181 }
182 
srcId(const Instruction * insn,int s,int pos)183 void CodeEmitterNVC0::srcId(const Instruction *insn, int s, int pos)
184 {
185    int r = insn->srcExists(s) ? SDATA(insn->src(s)).id : 63;
186    code[pos / 32] |= r << (pos % 32);
187 }
188 
189 void
srcAddr32(const ValueRef & src,int pos,int shr)190 CodeEmitterNVC0::srcAddr32(const ValueRef& src, int pos, int shr)
191 {
192    const uint32_t offset = SDATA(src).offset >> shr;
193 
194    code[pos / 32] |= offset << (pos % 32);
195    if (pos && (pos < 32))
196       code[1] |= offset >> (32 - pos);
197 }
198 
defId(const ValueDef & def,const int pos)199 void CodeEmitterNVC0::defId(const ValueDef& def, const int pos)
200 {
201    code[pos / 32] |= (def.get() && def.getFile() != FILE_FLAGS ? DDATA(def).id : 63) << (pos % 32);
202 }
203 
defId(const Instruction * insn,int d,const int pos)204 void CodeEmitterNVC0::defId(const Instruction *insn, int d, const int pos)
205 {
206    if (insn->defExists(d))
207       defId(insn->def(d), pos);
208    else
209       code[pos / 32] |= 63 << (pos % 32);
210 }
211 
isLIMM(const ValueRef & ref,DataType ty)212 bool CodeEmitterNVC0::isLIMM(const ValueRef& ref, DataType ty)
213 {
214    const ImmediateValue *imm = ref.get()->asImm();
215 
216    return imm && (imm->reg.data.u32 & ((ty == TYPE_F32) ? 0xfff : 0xfff00000));
217 }
218 
219 void
roundMode_A(const Instruction * insn)220 CodeEmitterNVC0::roundMode_A(const Instruction *insn)
221 {
222    switch (insn->rnd) {
223    case ROUND_M: code[1] |= 1 << 23; break;
224    case ROUND_P: code[1] |= 2 << 23; break;
225    case ROUND_Z: code[1] |= 3 << 23; break;
226    default:
227       assert(insn->rnd == ROUND_N);
228       break;
229    }
230 }
231 
232 void
emitNegAbs12(const Instruction * i)233 CodeEmitterNVC0::emitNegAbs12(const Instruction *i)
234 {
235    if (i->src(1).mod.abs()) code[0] |= 1 << 6;
236    if (i->src(0).mod.abs()) code[0] |= 1 << 7;
237    if (i->src(1).mod.neg()) code[0] |= 1 << 8;
238    if (i->src(0).mod.neg()) code[0] |= 1 << 9;
239 }
240 
emitCondCode(CondCode cc,int pos)241 void CodeEmitterNVC0::emitCondCode(CondCode cc, int pos)
242 {
243    uint8_t val;
244 
245    switch (cc) {
246    case CC_LT:  val = 0x1; break;
247    case CC_LTU: val = 0x9; break;
248    case CC_EQ:  val = 0x2; break;
249    case CC_EQU: val = 0xa; break;
250    case CC_LE:  val = 0x3; break;
251    case CC_LEU: val = 0xb; break;
252    case CC_GT:  val = 0x4; break;
253    case CC_GTU: val = 0xc; break;
254    case CC_NE:  val = 0x5; break;
255    case CC_NEU: val = 0xd; break;
256    case CC_GE:  val = 0x6; break;
257    case CC_GEU: val = 0xe; break;
258    case CC_TR:  val = 0xf; break;
259    case CC_FL:  val = 0x0; break;
260 
261    case CC_A:  val = 0x14; break;
262    case CC_NA: val = 0x13; break;
263    case CC_S:  val = 0x15; break;
264    case CC_NS: val = 0x12; break;
265    case CC_C:  val = 0x16; break;
266    case CC_NC: val = 0x11; break;
267    case CC_O:  val = 0x17; break;
268    case CC_NO: val = 0x10; break;
269 
270    default:
271       val = 0;
272       assert(!"invalid condition code");
273       break;
274    }
275    code[pos / 32] |= val << (pos % 32);
276 }
277 
278 void
emitPredicate(const Instruction * i)279 CodeEmitterNVC0::emitPredicate(const Instruction *i)
280 {
281    if (i->predSrc >= 0) {
282       assert(i->getPredicate()->reg.file == FILE_PREDICATE);
283       srcId(i->src(i->predSrc), 10);
284       if (i->cc == CC_NOT_P)
285          code[0] |= 0x2000; // negate
286    } else {
287       code[0] |= 0x1c00;
288    }
289 }
290 
291 void
setAddressByFile(const ValueRef & src)292 CodeEmitterNVC0::setAddressByFile(const ValueRef& src)
293 {
294    switch (src.getFile()) {
295    case FILE_MEMORY_GLOBAL:
296       srcAddr32(src, 26, 0);
297       break;
298    case FILE_MEMORY_LOCAL:
299    case FILE_MEMORY_SHARED:
300       setAddress24(src);
301       break;
302    default:
303       assert(src.getFile() == FILE_MEMORY_CONST);
304       setAddress16(src);
305       break;
306    }
307 }
308 
309 void
setAddress16(const ValueRef & src)310 CodeEmitterNVC0::setAddress16(const ValueRef& src)
311 {
312    Symbol *sym = src.get()->asSym();
313 
314    assert(sym);
315 
316    code[0] |= (sym->reg.data.offset & 0x003f) << 26;
317    code[1] |= (sym->reg.data.offset & 0xffc0) >> 6;
318 }
319 
320 void
setAddress24(const ValueRef & src)321 CodeEmitterNVC0::setAddress24(const ValueRef& src)
322 {
323    Symbol *sym = src.get()->asSym();
324 
325    assert(sym);
326 
327    code[0] |= (sym->reg.data.offset & 0x00003f) << 26;
328    code[1] |= (sym->reg.data.offset & 0xffffc0) >> 6;
329 }
330 
331 void
setImmediate(const Instruction * i,const int s)332 CodeEmitterNVC0::setImmediate(const Instruction *i, const int s)
333 {
334    const ImmediateValue *imm = i->src(s).get()->asImm();
335    uint32_t u32;
336 
337    assert(imm);
338    u32 = imm->reg.data.u32;
339 
340    if ((code[0] & 0xf) == 0x1) {
341       // double immediate
342       uint64_t u64 = imm->reg.data.u64;
343       assert(!(u64 & 0x00000fffffffffffULL));
344       assert(!(code[1] & 0xc000));
345       code[0] |= ((u64 >> 44) & 0x3f) << 26;
346       code[1] |= 0xc000 | (u64 >> 50);
347    } else
348    if ((code[0] & 0xf) == 0x2) {
349       // LIMM
350       code[0] |= (u32 & 0x3f) << 26;
351       code[1] |= u32 >> 6;
352    } else
353    if ((code[0] & 0xf) == 0x3 || (code[0] & 0xf) == 4) {
354       // integer immediate
355       assert((u32 & 0xfff00000) == 0 || (u32 & 0xfff00000) == 0xfff00000);
356       assert(!(code[1] & 0xc000));
357       u32 &= 0xfffff;
358       code[0] |= (u32 & 0x3f) << 26;
359       code[1] |= 0xc000 | (u32 >> 6);
360    } else {
361       // float immediate
362       assert(!(u32 & 0x00000fff));
363       assert(!(code[1] & 0xc000));
364       code[0] |= ((u32 >> 12) & 0x3f) << 26;
365       code[1] |= 0xc000 | (u32 >> 18);
366    }
367 }
368 
setImmediateS8(const ValueRef & ref)369 void CodeEmitterNVC0::setImmediateS8(const ValueRef &ref)
370 {
371    const ImmediateValue *imm = ref.get()->asImm();
372 
373    int8_t s8 = static_cast<int8_t>(imm->reg.data.s32);
374 
375    assert(s8 == imm->reg.data.s32);
376 
377    code[0] |= (s8 & 0x3f) << 26;
378    code[0] |= (s8 >> 6) << 8;
379 }
380 
setPDSTL(const Instruction * i,const int d)381 void CodeEmitterNVC0::setPDSTL(const Instruction *i, const int d)
382 {
383    assert(d < 0 || (i->defExists(d) && i->def(d).getFile() == FILE_PREDICATE));
384 
385    uint32_t pred = d >= 0 ? DDATA(i->def(d)).id : 7;
386 
387    code[0] |= (pred & 3) << 8;
388    code[1] |= (pred & 4) << (26 - 2);
389 }
390 
391 void
emitForm_A(const Instruction * i,uint64_t opc)392 CodeEmitterNVC0::emitForm_A(const Instruction *i, uint64_t opc)
393 {
394    code[0] = opc;
395    code[1] = opc >> 32;
396 
397    emitPredicate(i);
398 
399    defId(i->def(0), 14);
400 
401    int s1 = 26;
402    if (i->srcExists(2) && i->getSrc(2)->reg.file == FILE_MEMORY_CONST)
403       s1 = 49;
404 
405    for (int s = 0; s < 3 && i->srcExists(s); ++s) {
406       switch (i->getSrc(s)->reg.file) {
407       case FILE_MEMORY_CONST:
408          assert(!(code[1] & 0xc000));
409          code[1] |= (s == 2) ? 0x8000 : 0x4000;
410          code[1] |= i->getSrc(s)->reg.fileIndex << 10;
411          setAddress16(i->src(s));
412          break;
413       case FILE_IMMEDIATE:
414          assert(s == 1 ||
415                 i->op == OP_MOV || i->op == OP_PRESIN || i->op == OP_PREEX2);
416          assert(!(code[1] & 0xc000));
417          setImmediate(i, s);
418          break;
419       case FILE_GPR:
420          if ((s == 2) && ((code[0] & 0x7) == 2)) // LIMM: 3rd src == dst
421             break;
422          srcId(i->src(s), s ? ((s == 2) ? 49 : s1) : 20);
423          break;
424       default:
425          if (i->op == OP_SELP) {
426             // OP_SELP is used to implement shared+atomics on Fermi.
427             assert(s == 2 && i->src(s).getFile() == FILE_PREDICATE);
428             srcId(i->src(s), 49);
429          }
430          // ignore here, can be predicate or flags, but must not be address
431          break;
432       }
433    }
434 }
435 
436 void
emitForm_B(const Instruction * i,uint64_t opc)437 CodeEmitterNVC0::emitForm_B(const Instruction *i, uint64_t opc)
438 {
439    code[0] = opc;
440    code[1] = opc >> 32;
441 
442    emitPredicate(i);
443 
444    defId(i->def(0), 14);
445 
446    switch (i->src(0).getFile()) {
447    case FILE_MEMORY_CONST:
448       assert(!(code[1] & 0xc000));
449       code[1] |= 0x4000 | (i->src(0).get()->reg.fileIndex << 10);
450       setAddress16(i->src(0));
451       break;
452    case FILE_IMMEDIATE:
453       assert(!(code[1] & 0xc000));
454       setImmediate(i, 0);
455       break;
456    case FILE_GPR:
457       srcId(i->src(0), 26);
458       break;
459    default:
460       // ignore here, can be predicate or flags, but must not be address
461       break;
462    }
463 }
464 
465 void
emitForm_S(const Instruction * i,uint32_t opc,bool pred)466 CodeEmitterNVC0::emitForm_S(const Instruction *i, uint32_t opc, bool pred)
467 {
468    code[0] = opc;
469 
470    int ss2a = 0;
471    if (opc == 0x0d || opc == 0x0e)
472       ss2a = 2;
473 
474    defId(i->def(0), 14);
475    srcId(i->src(0), 20);
476 
477    assert(pred || (i->predSrc < 0));
478    if (pred)
479       emitPredicate(i);
480 
481    for (int s = 1; s < 3 && i->srcExists(s); ++s) {
482       if (i->src(s).get()->reg.file == FILE_MEMORY_CONST) {
483          assert(!(code[0] & (0x300 >> ss2a)));
484          switch (i->src(s).get()->reg.fileIndex) {
485          case 0:  code[0] |= 0x100 >> ss2a; break;
486          case 1:  code[0] |= 0x200 >> ss2a; break;
487          case 16: code[0] |= 0x300 >> ss2a; break;
488          default:
489             ERROR("invalid c[] space for short form\n");
490             break;
491          }
492          if (s == 1)
493             code[0] |= i->getSrc(s)->reg.data.offset << 24;
494          else
495             code[0] |= i->getSrc(s)->reg.data.offset << 6;
496       } else
497       if (i->src(s).getFile() == FILE_IMMEDIATE) {
498          assert(s == 1);
499          setImmediateS8(i->src(s));
500       } else
501       if (i->src(s).getFile() == FILE_GPR) {
502          srcId(i->src(s), (s == 1) ? 26 : 8);
503       }
504    }
505 }
506 
507 void
emitShortSrc2(const ValueRef & src)508 CodeEmitterNVC0::emitShortSrc2(const ValueRef &src)
509 {
510    if (src.getFile() == FILE_MEMORY_CONST) {
511       switch (src.get()->reg.fileIndex) {
512       case 0:  code[0] |= 0x100; break;
513       case 1:  code[0] |= 0x200; break;
514       case 16: code[0] |= 0x300; break;
515       default:
516          assert(!"unsupported file index for short op");
517          break;
518       }
519       srcAddr32(src, 20, 2);
520    } else {
521       srcId(src, 20);
522       assert(src.getFile() == FILE_GPR);
523    }
524 }
525 
526 void
emitNOP(const Instruction * i)527 CodeEmitterNVC0::emitNOP(const Instruction *i)
528 {
529    code[0] = 0x000001e4;
530    code[1] = 0x40000000;
531    emitPredicate(i);
532 }
533 
534 void
emitFMAD(const Instruction * i)535 CodeEmitterNVC0::emitFMAD(const Instruction *i)
536 {
537    bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
538 
539    if (i->encSize == 8) {
540       if (isLIMM(i->src(1), TYPE_F32)) {
541          emitForm_A(i, HEX64(20000000, 00000002));
542       } else {
543          emitForm_A(i, HEX64(30000000, 00000000));
544 
545          if (i->src(2).mod.neg())
546             code[0] |= 1 << 8;
547       }
548       roundMode_A(i);
549 
550       if (neg1)
551          code[0] |= 1 << 9;
552 
553       if (i->saturate)
554          code[0] |= 1 << 5;
555 
556       if (i->dnz)
557          code[0] |= 1 << 7;
558       else
559       if (i->ftz)
560          code[0] |= 1 << 6;
561    } else {
562       assert(!i->saturate && !i->src(2).mod.neg());
563       emitForm_S(i, (i->src(2).getFile() == FILE_MEMORY_CONST) ? 0x2e : 0x0e,
564                  false);
565       if (neg1)
566          code[0] |= 1 << 4;
567    }
568 }
569 
570 void
emitDMAD(const Instruction * i)571 CodeEmitterNVC0::emitDMAD(const Instruction *i)
572 {
573    bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
574 
575    emitForm_A(i, HEX64(20000000, 00000001));
576 
577    if (i->src(2).mod.neg())
578       code[0] |= 1 << 8;
579 
580    roundMode_A(i);
581 
582    if (neg1)
583       code[0] |= 1 << 9;
584 
585    assert(!i->saturate);
586    assert(!i->ftz);
587 }
588 
589 void
emitFMUL(const Instruction * i)590 CodeEmitterNVC0::emitFMUL(const Instruction *i)
591 {
592    bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
593 
594    assert(i->postFactor >= -3 && i->postFactor <= 3);
595 
596    if (i->encSize == 8) {
597       if (isLIMM(i->src(1), TYPE_F32)) {
598          assert(i->postFactor == 0); // constant folded, hopefully
599          emitForm_A(i, HEX64(30000000, 00000002));
600       } else {
601          emitForm_A(i, HEX64(58000000, 00000000));
602          roundMode_A(i);
603          code[1] |= ((i->postFactor > 0) ?
604                      (7 - i->postFactor) : (0 - i->postFactor)) << 17;
605       }
606       if (neg)
607          code[1] ^= 1 << 25; // aliases with LIMM sign bit
608 
609       if (i->saturate)
610          code[0] |= 1 << 5;
611 
612       if (i->dnz)
613          code[0] |= 1 << 7;
614       else
615       if (i->ftz)
616          code[0] |= 1 << 6;
617    } else {
618       assert(!neg && !i->saturate && !i->ftz && !i->postFactor);
619       emitForm_S(i, 0xa8, true);
620    }
621 }
622 
623 void
emitDMUL(const Instruction * i)624 CodeEmitterNVC0::emitDMUL(const Instruction *i)
625 {
626    bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
627 
628    emitForm_A(i, HEX64(50000000, 00000001));
629    roundMode_A(i);
630 
631    if (neg)
632       code[0] |= 1 << 9;
633 
634    assert(!i->saturate);
635    assert(!i->ftz);
636    assert(!i->dnz);
637    assert(!i->postFactor);
638 }
639 
640 void
emitUMUL(const Instruction * i)641 CodeEmitterNVC0::emitUMUL(const Instruction *i)
642 {
643    if (i->encSize == 8) {
644       if (i->src(1).getFile() == FILE_IMMEDIATE) {
645          emitForm_A(i, HEX64(10000000, 00000002));
646       } else {
647          emitForm_A(i, HEX64(50000000, 00000003));
648       }
649       if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
650          code[0] |= 1 << 6;
651       if (i->sType == TYPE_S32)
652          code[0] |= 1 << 5;
653       if (i->dType == TYPE_S32)
654          code[0] |= 1 << 7;
655    } else {
656       emitForm_S(i, i->src(1).getFile() == FILE_IMMEDIATE ? 0xaa : 0x2a, true);
657 
658       if (i->sType == TYPE_S32)
659          code[0] |= 1 << 6;
660    }
661 }
662 
663 void
emitFADD(const Instruction * i)664 CodeEmitterNVC0::emitFADD(const Instruction *i)
665 {
666    if (i->encSize == 8) {
667       if (isLIMM(i->src(1), TYPE_F32)) {
668          assert(!i->saturate);
669          emitForm_A(i, HEX64(28000000, 00000002));
670 
671          code[0] |= i->src(0).mod.abs() << 7;
672          code[0] |= i->src(0).mod.neg() << 9;
673 
674          if (i->src(1).mod.abs())
675             code[1] &= 0xfdffffff;
676          if ((i->op == OP_SUB) != static_cast<bool>(i->src(1).mod.neg()))
677             code[1] ^= 0x02000000;
678       } else {
679          emitForm_A(i, HEX64(50000000, 00000000));
680 
681          roundMode_A(i);
682          if (i->saturate)
683             code[1] |= 1 << 17;
684 
685          emitNegAbs12(i);
686          if (i->op == OP_SUB) code[0] ^= 1 << 8;
687       }
688       if (i->ftz)
689          code[0] |= 1 << 5;
690    } else {
691       assert(!i->saturate && i->op != OP_SUB &&
692              !i->src(0).mod.abs() &&
693              !i->src(1).mod.neg() && !i->src(1).mod.abs());
694 
695       emitForm_S(i, 0x49, true);
696 
697       if (i->src(0).mod.neg())
698          code[0] |= 1 << 7;
699    }
700 }
701 
702 void
emitDADD(const Instruction * i)703 CodeEmitterNVC0::emitDADD(const Instruction *i)
704 {
705    assert(i->encSize == 8);
706    emitForm_A(i, HEX64(48000000, 00000001));
707    roundMode_A(i);
708    assert(!i->saturate);
709    assert(!i->ftz);
710    emitNegAbs12(i);
711    if (i->op == OP_SUB)
712       code[0] ^= 1 << 8;
713 }
714 
715 void
emitUADD(const Instruction * i)716 CodeEmitterNVC0::emitUADD(const Instruction *i)
717 {
718    uint32_t addOp = 0;
719 
720    assert(!i->src(0).mod.abs() && !i->src(1).mod.abs());
721 
722    if (i->src(0).mod.neg())
723       addOp |= 0x200;
724    if (i->src(1).mod.neg())
725       addOp |= 0x100;
726    if (i->op == OP_SUB)
727       addOp ^= 0x100;
728 
729    assert(addOp != 0x300); // would be add-plus-one
730 
731    if (i->encSize == 8) {
732       if (isLIMM(i->src(1), TYPE_U32)) {
733          emitForm_A(i, HEX64(08000000, 00000002));
734          if (i->flagsDef >= 0)
735             code[1] |= 1 << 26; // write carry
736       } else {
737          emitForm_A(i, HEX64(48000000, 00000003));
738          if (i->flagsDef >= 0)
739             code[1] |= 1 << 16; // write carry
740       }
741       code[0] |= addOp;
742 
743       if (i->saturate)
744          code[0] |= 1 << 5;
745       if (i->flagsSrc >= 0) // add carry
746          code[0] |= 1 << 6;
747    } else {
748       assert(!(addOp & 0x100));
749       emitForm_S(i, (addOp >> 3) |
750                  ((i->src(1).getFile() == FILE_IMMEDIATE) ? 0xac : 0x2c), true);
751    }
752 }
753 
754 void
emitIMAD(const Instruction * i)755 CodeEmitterNVC0::emitIMAD(const Instruction *i)
756 {
757    uint8_t addOp =
758       i->src(2).mod.neg() | ((i->src(0).mod.neg() ^ i->src(1).mod.neg()) << 1);
759 
760    assert(i->encSize == 8);
761    emitForm_A(i, HEX64(20000000, 00000003));
762 
763    assert(addOp != 3);
764    code[0] |= addOp << 8;
765 
766    if (isSignedType(i->dType))
767       code[0] |= 1 << 7;
768    if (isSignedType(i->sType))
769       code[0] |= 1 << 5;
770 
771    code[1] |= i->saturate << 24;
772 
773    if (i->flagsDef >= 0) code[1] |= 1 << 16;
774    if (i->flagsSrc >= 0) code[1] |= 1 << 23;
775 
776    if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
777       code[0] |= 1 << 6;
778 }
779 
780 void
emitSHLADD(const Instruction * i)781 CodeEmitterNVC0::emitSHLADD(const Instruction *i)
782 {
783    uint8_t addOp = (i->src(0).mod.neg() << 1) | i->src(2).mod.neg();
784    const ImmediateValue *imm = i->src(1).get()->asImm();
785    assert(imm);
786 
787    code[0] = 0x00000003;
788    code[1] = 0x40000000 | addOp << 23;
789 
790    emitPredicate(i);
791 
792    defId(i->def(0), 14);
793    srcId(i->src(0), 20);
794 
795    if (i->flagsDef >= 0)
796       code[1] |= 1 << 16;
797 
798    assert(!(imm->reg.data.u32 & 0xffffffe0));
799    code[0] |= imm->reg.data.u32 << 5;
800 
801    switch (i->src(2).getFile()) {
802    case FILE_GPR:
803       srcId(i->src(2), 26);
804       break;
805    case FILE_MEMORY_CONST:
806       code[1] |= 0x4000;
807       code[1] |= i->getSrc(2)->reg.fileIndex << 10;
808       setAddress16(i->src(2));
809       break;
810    case FILE_IMMEDIATE:
811       setImmediate(i, 2);
812       break;
813    default:
814       assert(!"bad src2 file");
815       break;
816    }
817 }
818 
819 void
emitMADSP(const Instruction * i)820 CodeEmitterNVC0::emitMADSP(const Instruction *i)
821 {
822    assert(targ->getChipset() >= NVISA_GK104_CHIPSET);
823 
824    emitForm_A(i, HEX64(00000000, 00000003));
825 
826    if (i->subOp == NV50_IR_SUBOP_MADSP_SD) {
827       code[1] |= 0x01800000;
828    } else {
829       code[0] |= (i->subOp & 0x00f) << 7;
830       code[0] |= (i->subOp & 0x0f0) << 1;
831       code[0] |= (i->subOp & 0x100) >> 3;
832       code[0] |= (i->subOp & 0x200) >> 2;
833       code[1] |= (i->subOp & 0xc00) << 13;
834    }
835 
836    if (i->flagsDef >= 0)
837       code[1] |= 1 << 16;
838 }
839 
840 void
emitISAD(const Instruction * i)841 CodeEmitterNVC0::emitISAD(const Instruction *i)
842 {
843    assert(i->dType == TYPE_S32 || i->dType == TYPE_U32);
844    assert(i->encSize == 8);
845 
846    emitForm_A(i, HEX64(38000000, 00000003));
847 
848    if (i->dType == TYPE_S32)
849       code[0] |= 1 << 5;
850 }
851 
852 void
emitNOT(Instruction * i)853 CodeEmitterNVC0::emitNOT(Instruction *i)
854 {
855    assert(i->encSize == 8);
856    i->setSrc(1, i->src(0));
857    emitForm_A(i, HEX64(68000000, 000001c3));
858 }
859 
860 void
emitLogicOp(const Instruction * i,uint8_t subOp)861 CodeEmitterNVC0::emitLogicOp(const Instruction *i, uint8_t subOp)
862 {
863    if (i->def(0).getFile() == FILE_PREDICATE) {
864       code[0] = 0x00000004 | (subOp << 30);
865       code[1] = 0x0c000000;
866 
867       emitPredicate(i);
868 
869       defId(i->def(0), 17);
870       srcId(i->src(0), 20);
871       if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 23;
872       srcId(i->src(1), 26);
873       if (i->src(1).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 29;
874 
875       if (i->defExists(1)) {
876          defId(i->def(1), 14);
877       } else {
878          code[0] |= 7 << 14;
879       }
880       // (a OP b) OP c
881       if (i->predSrc != 2 && i->srcExists(2)) {
882          code[1] |= subOp << 21;
883          srcId(i->src(2), 49);
884          if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT)) code[1] |= 1 << 20;
885       } else {
886          code[1] |= 0x000e0000;
887       }
888    } else
889    if (i->encSize == 8) {
890       if (isLIMM(i->src(1), TYPE_U32)) {
891          emitForm_A(i, HEX64(38000000, 00000002));
892 
893          if (i->flagsDef >= 0)
894             code[1] |= 1 << 26;
895       } else {
896          emitForm_A(i, HEX64(68000000, 00000003));
897 
898          if (i->flagsDef >= 0)
899             code[1] |= 1 << 16;
900       }
901       code[0] |= subOp << 6;
902 
903       if (i->flagsSrc >= 0) // carry
904          code[0] |= 1 << 5;
905 
906       if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9;
907       if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8;
908    } else {
909       emitForm_S(i, (subOp << 5) |
910                  ((i->src(1).getFile() == FILE_IMMEDIATE) ? 0x1d : 0x8d), true);
911    }
912 }
913 
914 void
emitPOPC(const Instruction * i)915 CodeEmitterNVC0::emitPOPC(const Instruction *i)
916 {
917    emitForm_A(i, HEX64(54000000, 00000004));
918 
919    if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9;
920    if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8;
921 }
922 
923 void
emitINSBF(const Instruction * i)924 CodeEmitterNVC0::emitINSBF(const Instruction *i)
925 {
926    emitForm_A(i, HEX64(28000000, 00000003));
927 }
928 
929 void
emitEXTBF(const Instruction * i)930 CodeEmitterNVC0::emitEXTBF(const Instruction *i)
931 {
932    emitForm_A(i, HEX64(70000000, 00000003));
933 
934    if (i->dType == TYPE_S32)
935       code[0] |= 1 << 5;
936    if (i->subOp == NV50_IR_SUBOP_EXTBF_REV)
937       code[0] |= 1 << 8;
938 }
939 
940 void
emitBFIND(const Instruction * i)941 CodeEmitterNVC0::emitBFIND(const Instruction *i)
942 {
943    emitForm_B(i, HEX64(78000000, 00000003));
944 
945    if (i->dType == TYPE_S32)
946       code[0] |= 1 << 5;
947    if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT))
948       code[0] |= 1 << 8;
949    if (i->subOp == NV50_IR_SUBOP_BFIND_SAMT)
950       code[0] |= 1 << 6;
951 }
952 
953 void
emitPERMT(const Instruction * i)954 CodeEmitterNVC0::emitPERMT(const Instruction *i)
955 {
956    emitForm_A(i, HEX64(24000000, 00000004));
957 
958    code[0] |= i->subOp << 5;
959 }
960 
961 void
emitShift(const Instruction * i)962 CodeEmitterNVC0::emitShift(const Instruction *i)
963 {
964    if (i->op == OP_SHR) {
965       emitForm_A(i, HEX64(58000000, 00000003)
966                  | (isSignedType(i->dType) ? 0x20 : 0x00));
967    } else {
968       emitForm_A(i, HEX64(60000000, 00000003));
969    }
970 
971    if (i->subOp == NV50_IR_SUBOP_SHIFT_WRAP)
972       code[0] |= 1 << 9;
973 }
974 
975 void
emitPreOp(const Instruction * i)976 CodeEmitterNVC0::emitPreOp(const Instruction *i)
977 {
978    if (i->encSize == 8) {
979       emitForm_B(i, HEX64(60000000, 00000000));
980 
981       if (i->op == OP_PREEX2)
982          code[0] |= 0x20;
983 
984       if (i->src(0).mod.abs()) code[0] |= 1 << 6;
985       if (i->src(0).mod.neg()) code[0] |= 1 << 8;
986    } else {
987       emitForm_S(i, i->op == OP_PREEX2 ? 0x74000008 : 0x70000008, true);
988    }
989 }
990 
991 void
emitSFnOp(const Instruction * i,uint8_t subOp)992 CodeEmitterNVC0::emitSFnOp(const Instruction *i, uint8_t subOp)
993 {
994    if (i->encSize == 8) {
995       code[0] = 0x00000000 | (subOp << 26);
996       code[1] = 0xc8000000;
997 
998       emitPredicate(i);
999 
1000       defId(i->def(0), 14);
1001       srcId(i->src(0), 20);
1002 
1003       assert(i->src(0).getFile() == FILE_GPR);
1004 
1005       if (i->saturate) code[0] |= 1 << 5;
1006 
1007       if (i->src(0).mod.abs()) code[0] |= 1 << 7;
1008       if (i->src(0).mod.neg()) code[0] |= 1 << 9;
1009    } else {
1010       emitForm_S(i, 0x80000008 | (subOp << 26), true);
1011 
1012       assert(!i->src(0).mod.neg());
1013       if (i->src(0).mod.abs()) code[0] |= 1 << 30;
1014    }
1015 }
1016 
1017 void
emitMINMAX(const Instruction * i)1018 CodeEmitterNVC0::emitMINMAX(const Instruction *i)
1019 {
1020    uint64_t op;
1021 
1022    assert(i->encSize == 8);
1023 
1024    op = (i->op == OP_MIN) ? 0x080e000000000000ULL : 0x081e000000000000ULL;
1025 
1026    if (i->ftz)
1027       op |= 1 << 5;
1028    else
1029    if (!isFloatType(i->dType)) {
1030       op |= isSignedType(i->dType) ? 0x23 : 0x03;
1031       op |= i->subOp << 6;
1032    }
1033    if (i->dType == TYPE_F64)
1034       op |= 0x01;
1035 
1036    emitForm_A(i, op);
1037    emitNegAbs12(i);
1038 
1039    if (i->flagsDef >= 0)
1040       code[1] |= 1 << 16;
1041 }
1042 
1043 void
roundMode_C(const Instruction * i)1044 CodeEmitterNVC0::roundMode_C(const Instruction *i)
1045 {
1046    switch (i->rnd) {
1047    case ROUND_M:  code[1] |= 1 << 17; break;
1048    case ROUND_P:  code[1] |= 2 << 17; break;
1049    case ROUND_Z:  code[1] |= 3 << 17; break;
1050    case ROUND_NI: code[0] |= 1 << 7; break;
1051    case ROUND_MI: code[0] |= 1 << 7; code[1] |= 1 << 17; break;
1052    case ROUND_PI: code[0] |= 1 << 7; code[1] |= 2 << 17; break;
1053    case ROUND_ZI: code[0] |= 1 << 7; code[1] |= 3 << 17; break;
1054    case ROUND_N: break;
1055    default:
1056       assert(!"invalid round mode");
1057       break;
1058    }
1059 }
1060 
1061 void
roundMode_CS(const Instruction * i)1062 CodeEmitterNVC0::roundMode_CS(const Instruction *i)
1063 {
1064    switch (i->rnd) {
1065    case ROUND_M:
1066    case ROUND_MI: code[0] |= 1 << 16; break;
1067    case ROUND_P:
1068    case ROUND_PI: code[0] |= 2 << 16; break;
1069    case ROUND_Z:
1070    case ROUND_ZI: code[0] |= 3 << 16; break;
1071    default:
1072       break;
1073    }
1074 }
1075 
1076 void
emitCVT(Instruction * i)1077 CodeEmitterNVC0::emitCVT(Instruction *i)
1078 {
1079    const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
1080    DataType dType;
1081 
1082    switch (i->op) {
1083    case OP_CEIL:  i->rnd = f2f ? ROUND_PI : ROUND_P; break;
1084    case OP_FLOOR: i->rnd = f2f ? ROUND_MI : ROUND_M; break;
1085    case OP_TRUNC: i->rnd = f2f ? ROUND_ZI : ROUND_Z; break;
1086    default:
1087       break;
1088    }
1089 
1090    const bool sat = (i->op == OP_SAT) || i->saturate;
1091    const bool abs = (i->op == OP_ABS) || i->src(0).mod.abs();
1092    const bool neg = (i->op == OP_NEG) || i->src(0).mod.neg();
1093 
1094    if (i->op == OP_NEG && i->dType == TYPE_U32)
1095       dType = TYPE_S32;
1096    else
1097       dType = i->dType;
1098 
1099    if (i->encSize == 8) {
1100       emitForm_B(i, HEX64(10000000, 00000004));
1101 
1102       roundMode_C(i);
1103 
1104       // cvt u16 f32 sets high bits to 0, so we don't have to use Value::Size()
1105       code[0] |= util_logbase2(typeSizeof(dType)) << 20;
1106       code[0] |= util_logbase2(typeSizeof(i->sType)) << 23;
1107 
1108       // for 8/16 source types, the byte/word is in subOp. word 1 is
1109       // represented as 2.
1110       if (!isFloatType(i->sType))
1111          code[1] |= i->subOp << 0x17;
1112       else
1113          code[1] |= i->subOp << 0x18;
1114 
1115       if (sat)
1116          code[0] |= 0x20;
1117       if (abs)
1118          code[0] |= 1 << 6;
1119       if (neg && i->op != OP_ABS)
1120          code[0] |= 1 << 8;
1121 
1122       if (i->ftz)
1123          code[1] |= 1 << 23;
1124 
1125       if (isSignedIntType(dType))
1126          code[0] |= 0x080;
1127       if (isSignedIntType(i->sType))
1128          code[0] |= 0x200;
1129 
1130       if (isFloatType(dType)) {
1131          if (!isFloatType(i->sType))
1132             code[1] |= 0x08000000;
1133       } else {
1134          if (isFloatType(i->sType))
1135             code[1] |= 0x04000000;
1136          else
1137             code[1] |= 0x0c000000;
1138       }
1139    } else {
1140       if (i->op == OP_CEIL || i->op == OP_FLOOR || i->op == OP_TRUNC) {
1141          code[0] = 0x298;
1142       } else
1143       if (isFloatType(dType)) {
1144          if (isFloatType(i->sType))
1145             code[0] = 0x098;
1146          else
1147             code[0] = 0x088 | (isSignedType(i->sType) ? (1 << 8) : 0);
1148       } else {
1149          assert(isFloatType(i->sType));
1150 
1151          code[0] = 0x288 | (isSignedType(i->sType) ? (1 << 8) : 0);
1152       }
1153 
1154       if (neg) code[0] |= 1 << 16;
1155       if (sat) code[0] |= 1 << 18;
1156       if (abs) code[0] |= 1 << 19;
1157 
1158       roundMode_CS(i);
1159    }
1160 }
1161 
1162 void
emitSET(const CmpInstruction * i)1163 CodeEmitterNVC0::emitSET(const CmpInstruction *i)
1164 {
1165    uint32_t hi;
1166    uint32_t lo = 0;
1167 
1168    if (i->sType == TYPE_F64)
1169       lo = 0x1;
1170    else
1171    if (!isFloatType(i->sType))
1172       lo = 0x3;
1173 
1174    if (isSignedIntType(i->sType))
1175       lo |= 0x20;
1176    if (isFloatType(i->dType)) {
1177       if (isFloatType(i->sType))
1178          lo |= 0x20;
1179       else
1180          lo |= 0x80;
1181    }
1182 
1183    switch (i->op) {
1184    case OP_SET_AND: hi = 0x10000000; break;
1185    case OP_SET_OR:  hi = 0x10200000; break;
1186    case OP_SET_XOR: hi = 0x10400000; break;
1187    default:
1188       hi = 0x100e0000;
1189       break;
1190    }
1191    emitForm_A(i, (static_cast<uint64_t>(hi) << 32) | lo);
1192 
1193    if (i->op != OP_SET)
1194       srcId(i->src(2), 32 + 17);
1195 
1196    if (i->def(0).getFile() == FILE_PREDICATE) {
1197       if (i->sType == TYPE_F32)
1198          code[1] += 0x10000000;
1199       else
1200          code[1] += 0x08000000;
1201 
1202       code[0] &= ~0xfc000;
1203       defId(i->def(0), 17);
1204       if (i->defExists(1))
1205          defId(i->def(1), 14);
1206       else
1207          code[0] |= 0x1c000;
1208    }
1209 
1210    if (i->ftz)
1211       code[1] |= 1 << 27;
1212    if (i->flagsSrc >= 0)
1213       code[0] |= 1 << 6;
1214 
1215    emitCondCode(i->setCond, 32 + 23);
1216    emitNegAbs12(i);
1217 }
1218 
1219 void
emitSLCT(const CmpInstruction * i)1220 CodeEmitterNVC0::emitSLCT(const CmpInstruction *i)
1221 {
1222    uint64_t op;
1223 
1224    switch (i->dType) {
1225    case TYPE_S32:
1226       op = HEX64(30000000, 00000023);
1227       break;
1228    case TYPE_U32:
1229       op = HEX64(30000000, 00000003);
1230       break;
1231    case TYPE_F32:
1232       op = HEX64(38000000, 00000000);
1233       break;
1234    default:
1235       assert(!"invalid type for SLCT");
1236       op = 0;
1237       break;
1238    }
1239    emitForm_A(i, op);
1240 
1241    CondCode cc = i->setCond;
1242 
1243    if (i->src(2).mod.neg())
1244       cc = reverseCondCode(cc);
1245 
1246    emitCondCode(cc, 32 + 23);
1247 
1248    if (i->ftz)
1249       code[0] |= 1 << 5;
1250 }
1251 
1252 static void
selpFlip(const FixupEntry * entry,uint32_t * code,const FixupData & data)1253 selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
1254 {
1255    int loc = entry->loc;
1256    if (data.force_persample_interp)
1257       code[loc + 1] |= 1 << 20;
1258    else
1259       code[loc + 1] &= ~(1 << 20);
1260 }
1261 
emitSELP(const Instruction * i)1262 void CodeEmitterNVC0::emitSELP(const Instruction *i)
1263 {
1264    emitForm_A(i, HEX64(20000000, 00000004));
1265 
1266    if (i->src(2).mod & Modifier(NV50_IR_MOD_NOT))
1267       code[1] |= 1 << 20;
1268 
1269    if (i->subOp == 1) {
1270       addInterp(0, 0, selpFlip);
1271    }
1272 }
1273 
emitTEXBAR(const Instruction * i)1274 void CodeEmitterNVC0::emitTEXBAR(const Instruction *i)
1275 {
1276    code[0] = 0x00000006 | (i->subOp << 26);
1277    code[1] = 0xf0000000;
1278    emitPredicate(i);
1279    emitCondCode(i->flagsSrc >= 0 ? i->cc : CC_ALWAYS, 5);
1280 }
1281 
emitTEXCSAA(const TexInstruction * i)1282 void CodeEmitterNVC0::emitTEXCSAA(const TexInstruction *i)
1283 {
1284    code[0] = 0x00000086;
1285    code[1] = 0xd0000000;
1286 
1287    code[1] |= i->tex.r;
1288    code[1] |= i->tex.s << 8;
1289 
1290    if (i->tex.liveOnly)
1291       code[0] |= 1 << 9;
1292 
1293    defId(i->def(0), 14);
1294    srcId(i->src(0), 20);
1295 }
1296 
1297 static inline bool
isNextIndependentTex(const TexInstruction * i)1298 isNextIndependentTex(const TexInstruction *i)
1299 {
1300    if (!i->next || !isTextureOp(i->next->op))
1301       return false;
1302    if (i->getDef(0)->interfers(i->next->getSrc(0)))
1303       return false;
1304    return !i->next->srcExists(1) || !i->getDef(0)->interfers(i->next->getSrc(1));
1305 }
1306 
1307 void
emitTEX(const TexInstruction * i)1308 CodeEmitterNVC0::emitTEX(const TexInstruction *i)
1309 {
1310    code[0] = 0x00000006;
1311 
1312    if (isNextIndependentTex(i))
1313       code[0] |= 0x080; // t mode
1314    else
1315       code[0] |= 0x100; // p mode
1316 
1317    if (i->tex.liveOnly)
1318       code[0] |= 1 << 9;
1319 
1320    switch (i->op) {
1321    case OP_TEX: code[1] = 0x80000000; break;
1322    case OP_TXB: code[1] = 0x84000000; break;
1323    case OP_TXL: code[1] = 0x86000000; break;
1324    case OP_TXF: code[1] = 0x90000000; break;
1325    case OP_TXG: code[1] = 0xa0000000; break;
1326    case OP_TXLQ: code[1] = 0xb0000000; break;
1327    case OP_TXD: code[1] = 0xe0000000; break;
1328    default:
1329       assert(!"invalid texture op");
1330       break;
1331    }
1332    if (i->op == OP_TXF) {
1333       if (!i->tex.levelZero)
1334          code[1] |= 0x02000000;
1335    } else
1336    if (i->tex.levelZero) {
1337       code[1] |= 0x02000000;
1338    }
1339 
1340    if (i->op != OP_TXD && i->tex.derivAll)
1341       code[1] |= 1 << 13;
1342 
1343    defId(i->def(0), 14);
1344    srcId(i->src(0), 20);
1345 
1346    emitPredicate(i);
1347 
1348    if (i->op == OP_TXG) code[0] |= i->tex.gatherComp << 5;
1349 
1350    code[1] |= i->tex.mask << 14;
1351 
1352    code[1] |= i->tex.r;
1353    code[1] |= i->tex.s << 8;
1354    if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0)
1355       code[1] |= 1 << 18; // in 1st source (with array index)
1356 
1357    // texture target:
1358    code[1] |= (i->tex.target.getDim() - 1) << 20;
1359    if (i->tex.target.isCube())
1360       code[1] += 2 << 20;
1361    if (i->tex.target.isArray())
1362       code[1] |= 1 << 19;
1363    if (i->tex.target.isShadow())
1364       code[1] |= 1 << 24;
1365 
1366    const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1367 
1368    if (i->srcExists(src1) && i->src(src1).getFile() == FILE_IMMEDIATE) {
1369       // lzero
1370       if (i->op == OP_TXL)
1371          code[1] &= ~(1 << 26);
1372       else
1373       if (i->op == OP_TXF)
1374          code[1] &= ~(1 << 25);
1375    }
1376    if (i->tex.target == TEX_TARGET_2D_MS ||
1377        i->tex.target == TEX_TARGET_2D_MS_ARRAY)
1378       code[1] |= 1 << 23;
1379 
1380    if (i->tex.useOffsets == 1)
1381       code[1] |= 1 << 22;
1382    if (i->tex.useOffsets == 4)
1383       code[1] |= 1 << 23;
1384 
1385    srcId(i, src1, 26);
1386 }
1387 
1388 void
emitTXQ(const TexInstruction * i)1389 CodeEmitterNVC0::emitTXQ(const TexInstruction *i)
1390 {
1391    code[0] = 0x00000086;
1392    code[1] = 0xc0000000;
1393 
1394    switch (i->tex.query) {
1395    case TXQ_DIMS:            code[1] |= 0 << 22; break;
1396    case TXQ_TYPE:            code[1] |= 1 << 22; break;
1397    case TXQ_SAMPLE_POSITION: code[1] |= 2 << 22; break;
1398    case TXQ_FILTER:          code[1] |= 3 << 22; break;
1399    case TXQ_LOD:             code[1] |= 4 << 22; break;
1400    case TXQ_BORDER_COLOUR:   code[1] |= 5 << 22; break;
1401    default:
1402       assert(!"invalid texture query");
1403       break;
1404    }
1405 
1406    code[1] |= i->tex.mask << 14;
1407 
1408    code[1] |= i->tex.r;
1409    code[1] |= i->tex.s << 8;
1410    if (i->tex.sIndirectSrc >= 0 || i->tex.rIndirectSrc >= 0)
1411       code[1] |= 1 << 18;
1412 
1413    const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1414 
1415    defId(i->def(0), 14);
1416    srcId(i->src(0), 20);
1417    srcId(i, src1, 26);
1418 
1419    emitPredicate(i);
1420 }
1421 
1422 void
emitQUADOP(const Instruction * i,uint8_t qOp,uint8_t laneMask)1423 CodeEmitterNVC0::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask)
1424 {
1425    code[0] = 0x00000200 | (laneMask << 6); // dall
1426    code[1] = 0x48000000 | qOp;
1427 
1428    defId(i->def(0), 14);
1429    srcId(i->src(0), 20);
1430    srcId((i->srcExists(1) && i->predSrc != 1) ? i->src(1) : i->src(0), 26);
1431 
1432    emitPredicate(i);
1433 }
1434 
1435 void
emitFlow(const Instruction * i)1436 CodeEmitterNVC0::emitFlow(const Instruction *i)
1437 {
1438    const FlowInstruction *f = i->asFlow();
1439 
1440    unsigned mask; // bit 0: predicate, bit 1: target
1441 
1442    code[0] = 0x00000007;
1443 
1444    switch (i->op) {
1445    case OP_BRA:
1446       code[1] = f->absolute ? 0x00000000 : 0x40000000;
1447       if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST)
1448          code[0] |= 0x4000;
1449       mask = 3;
1450       break;
1451    case OP_CALL:
1452       code[1] = f->absolute ? 0x10000000 : 0x50000000;
1453       if (f->indirect)
1454          code[0] |= 0x4000; // indirect calls always use c[] source
1455       mask = 2;
1456       break;
1457 
1458    case OP_EXIT:    code[1] = 0x80000000; mask = 1; break;
1459    case OP_RET:     code[1] = 0x90000000; mask = 1; break;
1460    case OP_DISCARD: code[1] = 0x98000000; mask = 1; break;
1461    case OP_BREAK:   code[1] = 0xa8000000; mask = 1; break;
1462    case OP_CONT:    code[1] = 0xb0000000; mask = 1; break;
1463 
1464    case OP_JOINAT:   code[1] = 0x60000000; mask = 2; break;
1465    case OP_PREBREAK: code[1] = 0x68000000; mask = 2; break;
1466    case OP_PRECONT:  code[1] = 0x70000000; mask = 2; break;
1467    case OP_PRERET:   code[1] = 0x78000000; mask = 2; break;
1468 
1469    case OP_QUADON:  code[1] = 0xc0000000; mask = 0; break;
1470    case OP_QUADPOP: code[1] = 0xc8000000; mask = 0; break;
1471    case OP_BRKPT:   code[1] = 0xd0000000; mask = 0; break;
1472    default:
1473       assert(!"invalid flow operation");
1474       return;
1475    }
1476 
1477    if (mask & 1) {
1478       emitPredicate(i);
1479       if (i->flagsSrc < 0)
1480          code[0] |= 0x1e0;
1481    }
1482 
1483    if (!f)
1484       return;
1485 
1486    if (f->allWarp)
1487       code[0] |= 1 << 15;
1488    if (f->limit)
1489       code[0] |= 1 << 16;
1490 
1491    if (f->indirect) {
1492       if (code[0] & 0x4000) {
1493          assert(i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST);
1494          setAddress16(i->src(0));
1495          code[1] |= i->getSrc(0)->reg.fileIndex << 10;
1496          if (f->op == OP_BRA)
1497             srcId(f->src(0).getIndirect(0), 20);
1498       } else {
1499          srcId(f, 0, 20);
1500       }
1501    }
1502 
1503    if (f->op == OP_CALL) {
1504       if (f->indirect) {
1505          // nothing
1506       } else
1507       if (f->builtin) {
1508          assert(f->absolute);
1509          uint32_t pcAbs = targNVC0->getBuiltinOffset(f->target.builtin);
1510          addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfc000000, 26);
1511          addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x03ffffff, -6);
1512       } else {
1513          assert(!f->absolute);
1514          int32_t pcRel = f->target.fn->binPos - (codeSize + 8);
1515          code[0] |= (pcRel & 0x3f) << 26;
1516          code[1] |= (pcRel >> 6) & 0x3ffff;
1517       }
1518    } else
1519    if (mask & 2) {
1520       int32_t pcRel = f->target.bb->binPos - (codeSize + 8);
1521       if (writeIssueDelays && !(f->target.bb->binPos & 0x3f))
1522          pcRel += 8;
1523       // currently we don't want absolute branches
1524       assert(!f->absolute);
1525       code[0] |= (pcRel & 0x3f) << 26;
1526       code[1] |= (pcRel >> 6) & 0x3ffff;
1527    }
1528 }
1529 
1530 void
emitBAR(const Instruction * i)1531 CodeEmitterNVC0::emitBAR(const Instruction *i)
1532 {
1533    Value *rDef = NULL, *pDef = NULL;
1534 
1535    switch (i->subOp) {
1536    case NV50_IR_SUBOP_BAR_ARRIVE:   code[0] = 0x84; break;
1537    case NV50_IR_SUBOP_BAR_RED_AND:  code[0] = 0x24; break;
1538    case NV50_IR_SUBOP_BAR_RED_OR:   code[0] = 0x44; break;
1539    case NV50_IR_SUBOP_BAR_RED_POPC: code[0] = 0x04; break;
1540    default:
1541       code[0] = 0x04;
1542       assert(i->subOp == NV50_IR_SUBOP_BAR_SYNC);
1543       break;
1544    }
1545    code[1] = 0x50000000;
1546 
1547    code[0] |= 63 << 14;
1548    code[1] |= 7 << 21;
1549 
1550    emitPredicate(i);
1551 
1552    // barrier id
1553    if (i->src(0).getFile() == FILE_GPR) {
1554       srcId(i->src(0), 20);
1555    } else {
1556       ImmediateValue *imm = i->getSrc(0)->asImm();
1557       assert(imm);
1558       code[0] |= imm->reg.data.u32 << 20;
1559       code[1] |= 0x8000;
1560    }
1561 
1562    // thread count
1563    if (i->src(1).getFile() == FILE_GPR) {
1564       srcId(i->src(1), 26);
1565    } else {
1566       ImmediateValue *imm = i->getSrc(1)->asImm();
1567       assert(imm);
1568       assert(imm->reg.data.u32 <= 0xfff);
1569       code[0] |= imm->reg.data.u32 << 26;
1570       code[1] |= imm->reg.data.u32 >> 6;
1571       code[1] |= 0x4000;
1572    }
1573 
1574    if (i->srcExists(2) && (i->predSrc != 2)) {
1575       srcId(i->src(2), 32 + 17);
1576       if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT))
1577          code[1] |= 1 << 20;
1578    } else {
1579       code[1] |= 7 << 17;
1580    }
1581 
1582    if (i->defExists(0)) {
1583       if (i->def(0).getFile() == FILE_GPR)
1584          rDef = i->getDef(0);
1585       else
1586          pDef = i->getDef(0);
1587 
1588       if (i->defExists(1)) {
1589          if (i->def(1).getFile() == FILE_GPR)
1590             rDef = i->getDef(1);
1591          else
1592             pDef = i->getDef(1);
1593       }
1594    }
1595    if (rDef) {
1596       code[0] &= ~(63 << 14);
1597       defId(rDef, 14);
1598    }
1599    if (pDef) {
1600       code[1] &= ~(7 << 21);
1601       defId(pDef, 32 + 21);
1602    }
1603 }
1604 
1605 void
emitAFETCH(const Instruction * i)1606 CodeEmitterNVC0::emitAFETCH(const Instruction *i)
1607 {
1608    code[0] = 0x00000006;
1609    code[1] = 0x0c000000 | (i->src(0).get()->reg.data.offset & 0x7ff);
1610 
1611    if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
1612       code[0] |= 0x200;
1613 
1614    emitPredicate(i);
1615 
1616    defId(i->def(0), 14);
1617    srcId(i->src(0).getIndirect(0), 20);
1618 }
1619 
1620 void
emitPFETCH(const Instruction * i)1621 CodeEmitterNVC0::emitPFETCH(const Instruction *i)
1622 {
1623    uint32_t prim = i->src(0).get()->reg.data.u32;
1624 
1625    code[0] = 0x00000006 | ((prim & 0x3f) << 26);
1626    code[1] = 0x00000000 | (prim >> 6);
1627 
1628    emitPredicate(i);
1629 
1630    const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1631 
1632    defId(i->def(0), 14);
1633    srcId(i, src1, 20);
1634 }
1635 
1636 void
emitVFETCH(const Instruction * i)1637 CodeEmitterNVC0::emitVFETCH(const Instruction *i)
1638 {
1639    code[0] = 0x00000006;
1640    code[1] = 0x06000000 | i->src(0).get()->reg.data.offset;
1641 
1642    if (i->perPatch)
1643       code[0] |= 0x100;
1644    if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
1645       code[0] |= 0x200; // yes, TCPs can read from *outputs* of other threads
1646 
1647    emitPredicate(i);
1648 
1649    code[0] |= ((i->getDef(0)->reg.size / 4) - 1) << 5;
1650 
1651    defId(i->def(0), 14);
1652    srcId(i->src(0).getIndirect(0), 20);
1653    srcId(i->src(0).getIndirect(1), 26); // vertex address
1654 }
1655 
1656 void
emitEXPORT(const Instruction * i)1657 CodeEmitterNVC0::emitEXPORT(const Instruction *i)
1658 {
1659    unsigned int size = typeSizeof(i->dType);
1660 
1661    code[0] = 0x00000006 | ((size / 4 - 1) << 5);
1662    code[1] = 0x0a000000 | i->src(0).get()->reg.data.offset;
1663 
1664    assert(!(code[1] & ((size == 12) ? 15 : (size - 1))));
1665 
1666    if (i->perPatch)
1667       code[0] |= 0x100;
1668 
1669    emitPredicate(i);
1670 
1671    assert(i->src(1).getFile() == FILE_GPR);
1672 
1673    srcId(i->src(0).getIndirect(0), 20);
1674    srcId(i->src(0).getIndirect(1), 32 + 17); // vertex base address
1675    srcId(i->src(1), 26);
1676 }
1677 
1678 void
emitOUT(const Instruction * i)1679 CodeEmitterNVC0::emitOUT(const Instruction *i)
1680 {
1681    code[0] = 0x00000006;
1682    code[1] = 0x1c000000;
1683 
1684    emitPredicate(i);
1685 
1686    defId(i->def(0), 14); // new secret address
1687    srcId(i->src(0), 20); // old secret address, should be 0 initially
1688 
1689    assert(i->src(0).getFile() == FILE_GPR);
1690 
1691    if (i->op == OP_EMIT)
1692       code[0] |= 1 << 5;
1693    if (i->op == OP_RESTART || i->subOp == NV50_IR_SUBOP_EMIT_RESTART)
1694       code[0] |= 1 << 6;
1695 
1696    // vertex stream
1697    if (i->src(1).getFile() == FILE_IMMEDIATE) {
1698       unsigned int stream = SDATA(i->src(1)).u32;
1699       assert(stream < 4);
1700       if (stream) {
1701          code[1] |= 0xc000;
1702          code[0] |= stream << 26;
1703       } else {
1704          srcId(NULL, 26);
1705       }
1706    } else {
1707       srcId(i->src(1), 26);
1708    }
1709 }
1710 
1711 void
emitInterpMode(const Instruction * i)1712 CodeEmitterNVC0::emitInterpMode(const Instruction *i)
1713 {
1714    if (i->encSize == 8) {
1715       code[0] |= i->ipa << 6; // TODO: INTERP_SAMPLEID
1716    } else {
1717       if (i->getInterpMode() == NV50_IR_INTERP_SC)
1718          code[0] |= 0x80;
1719       assert(i->op == OP_PINTERP && i->getSampleMode() == 0);
1720    }
1721 }
1722 
1723 static void
interpApply(const FixupEntry * entry,uint32_t * code,const FixupData & data)1724 interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)
1725 {
1726    int ipa = entry->ipa;
1727    int reg = entry->reg;
1728    int loc = entry->loc;
1729 
1730    if (data.flatshade &&
1731        (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
1732       ipa = NV50_IR_INTERP_FLAT;
1733       reg = 0x3f;
1734    } else if (data.force_persample_interp &&
1735               (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
1736               (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
1737       ipa |= NV50_IR_INTERP_CENTROID;
1738    }
1739    code[loc + 0] &= ~(0xf << 6);
1740    code[loc + 0] |= ipa << 6;
1741    code[loc + 0] &= ~(0x3f << 26);
1742    code[loc + 0] |= reg << 26;
1743 }
1744 
1745 void
emitINTERP(const Instruction * i)1746 CodeEmitterNVC0::emitINTERP(const Instruction *i)
1747 {
1748    const uint32_t base = i->getSrc(0)->reg.data.offset;
1749 
1750    if (i->encSize == 8) {
1751       code[0] = 0x00000000;
1752       code[1] = 0xc0000000 | (base & 0xffff);
1753 
1754       if (i->saturate)
1755          code[0] |= 1 << 5;
1756 
1757       if (i->op == OP_PINTERP) {
1758          srcId(i->src(1), 26);
1759          addInterp(i->ipa, SDATA(i->src(1)).id, interpApply);
1760       } else {
1761          code[0] |= 0x3f << 26;
1762          addInterp(i->ipa, 0x3f, interpApply);
1763       }
1764 
1765       srcId(i->src(0).getIndirect(0), 20);
1766    } else {
1767       assert(i->op == OP_PINTERP);
1768       code[0] = 0x00000009 | ((base & 0xc) << 6) | ((base >> 4) << 26);
1769       srcId(i->src(1), 20);
1770    }
1771    emitInterpMode(i);
1772 
1773    emitPredicate(i);
1774    defId(i->def(0), 14);
1775 
1776    if (i->getSampleMode() == NV50_IR_INTERP_OFFSET)
1777       srcId(i->src(i->op == OP_PINTERP ? 2 : 1), 32 + 17);
1778    else
1779       code[1] |= 0x3f << 17;
1780 }
1781 
1782 void
emitLoadStoreType(DataType ty)1783 CodeEmitterNVC0::emitLoadStoreType(DataType ty)
1784 {
1785    uint8_t val;
1786 
1787    switch (ty) {
1788    case TYPE_U8:
1789       val = 0x00;
1790       break;
1791    case TYPE_S8:
1792       val = 0x20;
1793       break;
1794    case TYPE_F16:
1795    case TYPE_U16:
1796       val = 0x40;
1797       break;
1798    case TYPE_S16:
1799       val = 0x60;
1800       break;
1801    case TYPE_F32:
1802    case TYPE_U32:
1803    case TYPE_S32:
1804       val = 0x80;
1805       break;
1806    case TYPE_F64:
1807    case TYPE_U64:
1808    case TYPE_S64:
1809       val = 0xa0;
1810       break;
1811    case TYPE_B128:
1812       val = 0xc0;
1813       break;
1814    default:
1815       val = 0x80;
1816       assert(!"invalid type");
1817       break;
1818    }
1819    code[0] |= val;
1820 }
1821 
1822 void
emitCachingMode(CacheMode c)1823 CodeEmitterNVC0::emitCachingMode(CacheMode c)
1824 {
1825    uint32_t val;
1826 
1827    switch (c) {
1828    case CACHE_CA:
1829 // case CACHE_WB:
1830       val = 0x000;
1831       break;
1832    case CACHE_CG:
1833       val = 0x100;
1834       break;
1835    case CACHE_CS:
1836       val = 0x200;
1837       break;
1838    case CACHE_CV:
1839 // case CACHE_WT:
1840       val = 0x300;
1841       break;
1842    default:
1843       val = 0;
1844       assert(!"invalid caching mode");
1845       break;
1846    }
1847    code[0] |= val;
1848 }
1849 
1850 static inline bool
uses64bitAddress(const Instruction * ldst)1851 uses64bitAddress(const Instruction *ldst)
1852 {
1853    return ldst->src(0).getFile() == FILE_MEMORY_GLOBAL &&
1854       ldst->src(0).isIndirect(0) &&
1855       ldst->getIndirect(0, 0)->reg.size == 8;
1856 }
1857 
1858 void
emitSTORE(const Instruction * i)1859 CodeEmitterNVC0::emitSTORE(const Instruction *i)
1860 {
1861    uint32_t opc;
1862 
1863    switch (i->src(0).getFile()) {
1864    case FILE_MEMORY_GLOBAL: opc = 0x90000000; break;
1865    case FILE_MEMORY_LOCAL:  opc = 0xc8000000; break;
1866    case FILE_MEMORY_SHARED:
1867       if (i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) {
1868          if (targ->getChipset() >= NVISA_GK104_CHIPSET)
1869             opc = 0xb8000000;
1870          else
1871             opc = 0xcc000000;
1872       } else {
1873          opc = 0xc9000000;
1874       }
1875       break;
1876    default:
1877       assert(!"invalid memory file");
1878       opc = 0;
1879       break;
1880    }
1881    code[0] = 0x00000005;
1882    code[1] = opc;
1883 
1884    if (targ->getChipset() >= NVISA_GK104_CHIPSET) {
1885       // Unlocked store on shared memory can fail.
1886       if (i->src(0).getFile() == FILE_MEMORY_SHARED &&
1887           i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) {
1888          assert(i->defExists(0));
1889          setPDSTL(i, 0);
1890       }
1891    }
1892 
1893    setAddressByFile(i->src(0));
1894    srcId(i->src(1), 14);
1895    srcId(i->src(0).getIndirect(0), 20);
1896    if (uses64bitAddress(i))
1897       code[1] |= 1 << 26;
1898 
1899    emitPredicate(i);
1900 
1901    emitLoadStoreType(i->dType);
1902    emitCachingMode(i->cache);
1903 }
1904 
1905 void
emitLOAD(const Instruction * i)1906 CodeEmitterNVC0::emitLOAD(const Instruction *i)
1907 {
1908    uint32_t opc;
1909 
1910    code[0] = 0x00000005;
1911 
1912    switch (i->src(0).getFile()) {
1913    case FILE_MEMORY_GLOBAL: opc = 0x80000000; break;
1914    case FILE_MEMORY_LOCAL:  opc = 0xc0000000; break;
1915    case FILE_MEMORY_SHARED:
1916       if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {
1917          if (targ->getChipset() >= NVISA_GK104_CHIPSET)
1918             opc = 0xa8000000;
1919          else
1920             opc = 0xc4000000;
1921       } else {
1922          opc = 0xc1000000;
1923       }
1924       break;
1925    case FILE_MEMORY_CONST:
1926       if (!i->src(0).isIndirect(0) && typeSizeof(i->dType) == 4) {
1927          emitMOV(i); // not sure if this is any better
1928          return;
1929       }
1930       opc = 0x14000000 | (i->src(0).get()->reg.fileIndex << 10);
1931       code[0] = 0x00000006 | (i->subOp << 8);
1932       break;
1933    default:
1934       assert(!"invalid memory file");
1935       opc = 0;
1936       break;
1937    }
1938    code[1] = opc;
1939 
1940    int r = 0, p = -1;
1941    if (i->src(0).getFile() == FILE_MEMORY_SHARED) {
1942       if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {
1943          if (i->def(0).getFile() == FILE_PREDICATE) { // p, #
1944             r = -1;
1945             p = 0;
1946          } else if (i->defExists(1)) { // r, p
1947             p = 1;
1948          } else {
1949             assert(!"Expected predicate dest for load locked");
1950          }
1951       }
1952    }
1953 
1954    if (r >= 0)
1955       defId(i->def(r), 14);
1956    else
1957       code[0] |= 63 << 14;
1958 
1959    if (p >= 0) {
1960       if (targ->getChipset() >= NVISA_GK104_CHIPSET)
1961          setPDSTL(i, p);
1962       else
1963          defId(i->def(p), 32 + 18);
1964    }
1965 
1966    setAddressByFile(i->src(0));
1967    srcId(i->src(0).getIndirect(0), 20);
1968    if (uses64bitAddress(i))
1969       code[1] |= 1 << 26;
1970 
1971    emitPredicate(i);
1972 
1973    emitLoadStoreType(i->dType);
1974    emitCachingMode(i->cache);
1975 }
1976 
1977 uint8_t
getSRegEncoding(const ValueRef & ref)1978 CodeEmitterNVC0::getSRegEncoding(const ValueRef& ref)
1979 {
1980    switch (SDATA(ref).sv.sv) {
1981    case SV_LANEID:        return 0x00;
1982    case SV_PHYSID:        return 0x03;
1983    case SV_VERTEX_COUNT:  return 0x10;
1984    case SV_INVOCATION_ID: return 0x11;
1985    case SV_YDIR:          return 0x12;
1986    case SV_THREAD_KILL:   return 0x13;
1987    case SV_TID:           return 0x21 + SDATA(ref).sv.index;
1988    case SV_CTAID:         return 0x25 + SDATA(ref).sv.index;
1989    case SV_NTID:          return 0x29 + SDATA(ref).sv.index;
1990    case SV_GRIDID:        return 0x2c;
1991    case SV_NCTAID:        return 0x2d + SDATA(ref).sv.index;
1992    case SV_LBASE:         return 0x34;
1993    case SV_SBASE:         return 0x30;
1994    case SV_LANEMASK_EQ:   return 0x38;
1995    case SV_LANEMASK_LT:   return 0x39;
1996    case SV_LANEMASK_LE:   return 0x3a;
1997    case SV_LANEMASK_GT:   return 0x3b;
1998    case SV_LANEMASK_GE:   return 0x3c;
1999    case SV_CLOCK:         return 0x50 + SDATA(ref).sv.index;
2000    default:
2001       assert(!"no sreg for system value");
2002       return 0;
2003    }
2004 }
2005 
2006 void
emitMOV(const Instruction * i)2007 CodeEmitterNVC0::emitMOV(const Instruction *i)
2008 {
2009    assert(!i->saturate);
2010    if (i->def(0).getFile() == FILE_PREDICATE) {
2011       if (i->src(0).getFile() == FILE_GPR) {
2012          code[0] = 0xfc01c003;
2013          code[1] = 0x1a8e0000;
2014          srcId(i->src(0), 20);
2015       } else {
2016          code[0] = 0x0001c004;
2017          code[1] = 0x0c0e0000;
2018          if (i->src(0).getFile() == FILE_IMMEDIATE) {
2019             code[0] |= 7 << 20;
2020             if (!i->getSrc(0)->reg.data.u32)
2021                code[0] |= 1 << 23;
2022          } else {
2023             srcId(i->src(0), 20);
2024          }
2025       }
2026       defId(i->def(0), 17);
2027       emitPredicate(i);
2028    } else
2029    if (i->src(0).getFile() == FILE_SYSTEM_VALUE) {
2030       uint8_t sr = getSRegEncoding(i->src(0));
2031 
2032       if (i->encSize == 8) {
2033          code[0] = 0x00000004 | (sr << 26);
2034          code[1] = 0x2c000000;
2035       } else {
2036          code[0] = 0x40000008 | (sr << 20);
2037       }
2038       defId(i->def(0), 14);
2039 
2040       emitPredicate(i);
2041    } else
2042    if (i->encSize == 8) {
2043       uint64_t opc;
2044 
2045       if (i->src(0).getFile() == FILE_IMMEDIATE)
2046          opc = HEX64(18000000, 000001e2);
2047       else
2048       if (i->src(0).getFile() == FILE_PREDICATE)
2049          opc = HEX64(080e0000, 1c000004);
2050       else
2051          opc = HEX64(28000000, 00000004);
2052 
2053       if (i->src(0).getFile() != FILE_PREDICATE)
2054          opc |= i->lanes << 5;
2055 
2056       emitForm_B(i, opc);
2057 
2058       // Explicitly emit the predicate source as emitForm_B skips it.
2059       if (i->src(0).getFile() == FILE_PREDICATE)
2060          srcId(i->src(0), 20);
2061    } else {
2062       uint32_t imm;
2063 
2064       if (i->src(0).getFile() == FILE_IMMEDIATE) {
2065          imm = SDATA(i->src(0)).u32;
2066          if (imm & 0xfff00000) {
2067             assert(!(imm & 0x000fffff));
2068             code[0] = 0x00000318 | imm;
2069          } else {
2070             assert(imm < 0x800 || ((int32_t)imm >= -0x800));
2071             code[0] = 0x00000118 | (imm << 20);
2072          }
2073       } else {
2074          code[0] = 0x0028;
2075          emitShortSrc2(i->src(0));
2076       }
2077       defId(i->def(0), 14);
2078 
2079       emitPredicate(i);
2080    }
2081 }
2082 
2083 void
emitATOM(const Instruction * i)2084 CodeEmitterNVC0::emitATOM(const Instruction *i)
2085 {
2086    const bool hasDst = i->defExists(0);
2087    const bool casOrExch =
2088       i->subOp == NV50_IR_SUBOP_ATOM_EXCH ||
2089       i->subOp == NV50_IR_SUBOP_ATOM_CAS;
2090 
2091    if (i->dType == TYPE_U64) {
2092       switch (i->subOp) {
2093       case NV50_IR_SUBOP_ATOM_ADD:
2094          code[0] = 0x205;
2095          if (hasDst)
2096             code[1] = 0x507e0000;
2097          else
2098             code[1] = 0x10000000;
2099          break;
2100       case NV50_IR_SUBOP_ATOM_EXCH:
2101          code[0] = 0x305;
2102          code[1] = 0x507e0000;
2103          break;
2104       case NV50_IR_SUBOP_ATOM_CAS:
2105          code[0] = 0x325;
2106          code[1] = 0x50000000;
2107          break;
2108       default:
2109          assert(!"invalid u64 red op");
2110          break;
2111       }
2112    } else
2113    if (i->dType == TYPE_U32) {
2114       switch (i->subOp) {
2115       case NV50_IR_SUBOP_ATOM_EXCH:
2116          code[0] = 0x105;
2117          code[1] = 0x507e0000;
2118          break;
2119       case NV50_IR_SUBOP_ATOM_CAS:
2120          code[0] = 0x125;
2121          code[1] = 0x50000000;
2122          break;
2123       default:
2124          code[0] = 0x5 | (i->subOp << 5);
2125          if (hasDst)
2126             code[1] = 0x507e0000;
2127          else
2128             code[1] = 0x10000000;
2129          break;
2130       }
2131    } else
2132    if (i->dType == TYPE_S32) {
2133       assert(i->subOp <= 2);
2134       code[0] = 0x205 | (i->subOp << 5);
2135       if (hasDst)
2136          code[1] = 0x587e0000;
2137       else
2138          code[1] = 0x18000000;
2139    } else
2140    if (i->dType == TYPE_F32) {
2141       assert(i->subOp == NV50_IR_SUBOP_ATOM_ADD);
2142       code[0] = 0x205;
2143       if (hasDst)
2144          code[1] = 0x687e0000;
2145       else
2146          code[1] = 0x28000000;
2147    }
2148 
2149    emitPredicate(i);
2150 
2151    srcId(i->src(1), 14);
2152 
2153    if (hasDst)
2154       defId(i->def(0), 32 + 11);
2155    else
2156    if (casOrExch)
2157       code[1] |= 63 << 11;
2158 
2159    if (hasDst || casOrExch) {
2160       const int32_t offset = SDATA(i->src(0)).offset;
2161       assert(offset < 0x80000 && offset >= -0x80000);
2162       code[0] |= offset << 26;
2163       code[1] |= (offset & 0x1ffc0) >> 6;
2164       code[1] |= (offset & 0xe0000) << 6;
2165    } else {
2166       srcAddr32(i->src(0), 26, 0);
2167    }
2168    if (i->getIndirect(0, 0)) {
2169       srcId(i->getIndirect(0, 0), 20);
2170       if (i->getIndirect(0, 0)->reg.size == 8)
2171          code[1] |= 1 << 26;
2172    } else {
2173       code[0] |= 63 << 20;
2174    }
2175 
2176    if (i->subOp == NV50_IR_SUBOP_ATOM_CAS) {
2177       assert(i->src(1).getSize() == 2 * typeSizeof(i->sType));
2178       code[1] |= (SDATA(i->src(1)).id + 1) << 17;
2179    }
2180 }
2181 
2182 void
emitMEMBAR(const Instruction * i)2183 CodeEmitterNVC0::emitMEMBAR(const Instruction *i)
2184 {
2185    switch (NV50_IR_SUBOP_MEMBAR_SCOPE(i->subOp)) {
2186    case NV50_IR_SUBOP_MEMBAR_CTA: code[0] = 0x05; break;
2187    case NV50_IR_SUBOP_MEMBAR_GL:  code[0] = 0x25; break;
2188    default:
2189       code[0] = 0x45;
2190       assert(NV50_IR_SUBOP_MEMBAR_SCOPE(i->subOp) == NV50_IR_SUBOP_MEMBAR_SYS);
2191       break;
2192    }
2193    code[1] = 0xe0000000;
2194 
2195    emitPredicate(i);
2196 }
2197 
2198 void
emitCCTL(const Instruction * i)2199 CodeEmitterNVC0::emitCCTL(const Instruction *i)
2200 {
2201    code[0] = 0x00000005 | (i->subOp << 5);
2202 
2203    if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
2204       code[1] = 0x98000000;
2205       srcAddr32(i->src(0), 28, 2);
2206    } else {
2207       code[1] = 0xd0000000;
2208       setAddress24(i->src(0));
2209    }
2210    if (uses64bitAddress(i))
2211       code[1] |= 1 << 26;
2212    srcId(i->src(0).getIndirect(0), 20);
2213 
2214    emitPredicate(i);
2215 
2216    defId(i, 0, 14);
2217 }
2218 
2219 void
emitSUCLAMPMode(uint16_t subOp)2220 CodeEmitterNVC0::emitSUCLAMPMode(uint16_t subOp)
2221 {
2222    uint8_t m;
2223    switch (subOp & ~NV50_IR_SUBOP_SUCLAMP_2D) {
2224    case NV50_IR_SUBOP_SUCLAMP_SD(0, 1): m = 0; break;
2225    case NV50_IR_SUBOP_SUCLAMP_SD(1, 1): m = 1; break;
2226    case NV50_IR_SUBOP_SUCLAMP_SD(2, 1): m = 2; break;
2227    case NV50_IR_SUBOP_SUCLAMP_SD(3, 1): m = 3; break;
2228    case NV50_IR_SUBOP_SUCLAMP_SD(4, 1): m = 4; break;
2229    case NV50_IR_SUBOP_SUCLAMP_PL(0, 1): m = 5; break;
2230    case NV50_IR_SUBOP_SUCLAMP_PL(1, 1): m = 6; break;
2231    case NV50_IR_SUBOP_SUCLAMP_PL(2, 1): m = 7; break;
2232    case NV50_IR_SUBOP_SUCLAMP_PL(3, 1): m = 8; break;
2233    case NV50_IR_SUBOP_SUCLAMP_PL(4, 1): m = 9; break;
2234    case NV50_IR_SUBOP_SUCLAMP_BL(0, 1): m = 10; break;
2235    case NV50_IR_SUBOP_SUCLAMP_BL(1, 1): m = 11; break;
2236    case NV50_IR_SUBOP_SUCLAMP_BL(2, 1): m = 12; break;
2237    case NV50_IR_SUBOP_SUCLAMP_BL(3, 1): m = 13; break;
2238    case NV50_IR_SUBOP_SUCLAMP_BL(4, 1): m = 14; break;
2239    default:
2240       return;
2241    }
2242    code[0] |= m << 5;
2243    if (subOp & NV50_IR_SUBOP_SUCLAMP_2D)
2244       code[1] |= 1 << 16;
2245 }
2246 
2247 void
emitSUCalc(Instruction * i)2248 CodeEmitterNVC0::emitSUCalc(Instruction *i)
2249 {
2250    ImmediateValue *imm = NULL;
2251    uint64_t opc;
2252 
2253    if (i->srcExists(2)) {
2254       imm = i->getSrc(2)->asImm();
2255       if (imm)
2256          i->setSrc(2, NULL); // special case, make emitForm_A not assert
2257    }
2258 
2259    switch (i->op) {
2260    case OP_SUCLAMP: opc = HEX64(58000000, 00000004); break;
2261    case OP_SUBFM: opc = HEX64(5c000000, 00000004); break;
2262    case OP_SUEAU: opc = HEX64(60000000, 00000004); break;
2263    default:
2264       assert(0);
2265       return;
2266    }
2267    emitForm_A(i, opc);
2268 
2269    if (i->op == OP_SUCLAMP) {
2270       if (i->dType == TYPE_S32)
2271          code[0] |= 1 << 9;
2272       emitSUCLAMPMode(i->subOp);
2273    }
2274 
2275    if (i->op == OP_SUBFM && i->subOp == NV50_IR_SUBOP_SUBFM_3D)
2276          code[1] |= 1 << 16;
2277 
2278    if (i->op != OP_SUEAU) {
2279       if (i->def(0).getFile() == FILE_PREDICATE) { // p, #
2280          code[0] |= 63 << 14;
2281          code[1] |= i->getDef(0)->reg.data.id << 23;
2282       } else
2283       if (i->defExists(1)) { // r, p
2284          assert(i->def(1).getFile() == FILE_PREDICATE);
2285          code[1] |= i->getDef(1)->reg.data.id << 23;
2286       } else { // r, #
2287          code[1] |= 7 << 23;
2288       }
2289    }
2290    if (imm) {
2291       assert(i->op == OP_SUCLAMP);
2292       i->setSrc(2, imm);
2293       code[1] |= (imm->reg.data.u32 & 0x3f) << 17; // sint6
2294    }
2295 }
2296 
2297 void
emitSUGType(DataType ty)2298 CodeEmitterNVC0::emitSUGType(DataType ty)
2299 {
2300    switch (ty) {
2301    case TYPE_S32: code[1] |= 1 << 13; break;
2302    case TYPE_U8:  code[1] |= 2 << 13; break;
2303    case TYPE_S8:  code[1] |= 3 << 13; break;
2304    default:
2305       assert(ty == TYPE_U32);
2306       break;
2307    }
2308 }
2309 
2310 void
setSUConst16(const Instruction * i,const int s)2311 CodeEmitterNVC0::setSUConst16(const Instruction *i, const int s)
2312 {
2313    const uint32_t offset = i->getSrc(s)->reg.data.offset;
2314 
2315    assert(i->src(s).getFile() == FILE_MEMORY_CONST);
2316    assert(offset == (offset & 0xfffc));
2317 
2318    code[1] |= 1 << 21;
2319    code[0] |= offset << 24;
2320    code[1] |= offset >> 8;
2321    code[1] |= i->getSrc(s)->reg.fileIndex << 8;
2322 }
2323 
2324 void
setSUPred(const Instruction * i,const int s)2325 CodeEmitterNVC0::setSUPred(const Instruction *i, const int s)
2326 {
2327    if (!i->srcExists(s) || (i->predSrc == s)) {
2328       code[1] |= 0x7 << 17;
2329    } else {
2330       if (i->src(s).mod == Modifier(NV50_IR_MOD_NOT))
2331          code[1] |= 1 << 20;
2332       srcId(i->src(s), 32 + 17);
2333    }
2334 }
2335 
2336 void
emitSULDGB(const TexInstruction * i)2337 CodeEmitterNVC0::emitSULDGB(const TexInstruction *i)
2338 {
2339    code[0] = 0x5;
2340    code[1] = 0xd4000000 | (i->subOp << 15);
2341 
2342    emitLoadStoreType(i->dType);
2343    emitSUGType(i->sType);
2344    emitCachingMode(i->cache);
2345 
2346    emitPredicate(i);
2347    defId(i->def(0), 14); // destination
2348    srcId(i->src(0), 20); // address
2349    // format
2350    if (i->src(1).getFile() == FILE_GPR)
2351       srcId(i->src(1), 26);
2352    else
2353       setSUConst16(i, 1);
2354    setSUPred(i, 2);
2355 }
2356 
2357 void
emitSUSTGx(const TexInstruction * i)2358 CodeEmitterNVC0::emitSUSTGx(const TexInstruction *i)
2359 {
2360    code[0] = 0x5;
2361    code[1] = 0xdc000000 | (i->subOp << 15);
2362 
2363    if (i->op == OP_SUSTP)
2364       code[1] |= i->tex.mask << 22;
2365    else
2366       emitLoadStoreType(i->dType);
2367    emitSUGType(i->sType);
2368    emitCachingMode(i->cache);
2369 
2370    emitPredicate(i);
2371    srcId(i->src(0), 20); // address
2372    // format
2373    if (i->src(1).getFile() == FILE_GPR)
2374       srcId(i->src(1), 26);
2375    else
2376       setSUConst16(i, 1);
2377    srcId(i->src(3), 14); // values
2378    setSUPred(i, 2);
2379 }
2380 
2381 void
emitSUAddr(const TexInstruction * i)2382 CodeEmitterNVC0::emitSUAddr(const TexInstruction *i)
2383 {
2384    assert(targ->getChipset() < NVISA_GK104_CHIPSET);
2385 
2386    if (i->tex.rIndirectSrc < 0) {
2387       code[1] |= 0x00004000;
2388       code[0] |= i->tex.r << 26;
2389    } else {
2390       srcId(i, i->tex.rIndirectSrc, 26);
2391    }
2392 }
2393 
2394 void
emitSUDim(const TexInstruction * i)2395 CodeEmitterNVC0::emitSUDim(const TexInstruction *i)
2396 {
2397    assert(targ->getChipset() < NVISA_GK104_CHIPSET);
2398 
2399    code[1] |= (i->tex.target.getDim() - 1) << 12;
2400    if (i->tex.target.isArray() || i->tex.target.isCube() ||
2401        i->tex.target.getDim() == 3) {
2402       // use e2d mode for 3-dim images, arrays and cubes.
2403       code[1] |= 3 << 12;
2404    }
2405 
2406    srcId(i->src(0), 20);
2407 }
2408 
2409 void
emitSULEA(const TexInstruction * i)2410 CodeEmitterNVC0::emitSULEA(const TexInstruction *i)
2411 {
2412    assert(targ->getChipset() < NVISA_GK104_CHIPSET);
2413 
2414    code[0] = 0x5;
2415    code[1] = 0xf0000000;
2416 
2417    emitPredicate(i);
2418    emitLoadStoreType(i->sType);
2419 
2420    defId(i->def(0), 14);
2421 
2422    if (i->defExists(1)) {
2423       defId(i->def(1), 32 + 22);
2424    } else {
2425       code[1] |= 7 << 22;
2426    }
2427 
2428    emitSUAddr(i);
2429    emitSUDim(i);
2430 }
2431 
2432 void
emitSULDB(const TexInstruction * i)2433 CodeEmitterNVC0::emitSULDB(const TexInstruction *i)
2434 {
2435    assert(targ->getChipset() < NVISA_GK104_CHIPSET);
2436 
2437    code[0] = 0x5;
2438    code[1] = 0xd4000000 | (i->subOp << 15);
2439 
2440    emitPredicate(i);
2441    emitLoadStoreType(i->dType);
2442 
2443    defId(i->def(0), 14);
2444 
2445    emitCachingMode(i->cache);
2446    emitSUAddr(i);
2447    emitSUDim(i);
2448 }
2449 
2450 void
emitSUSTx(const TexInstruction * i)2451 CodeEmitterNVC0::emitSUSTx(const TexInstruction *i)
2452 {
2453    assert(targ->getChipset() < NVISA_GK104_CHIPSET);
2454 
2455    code[0] = 0x5;
2456    code[1] = 0xdc000000 | (i->subOp << 15);
2457 
2458    if (i->op == OP_SUSTP)
2459       code[1] |= i->tex.mask << 17;
2460    else
2461       emitLoadStoreType(i->dType);
2462 
2463    emitPredicate(i);
2464 
2465    srcId(i->src(1), 14);
2466 
2467    emitCachingMode(i->cache);
2468    emitSUAddr(i);
2469    emitSUDim(i);
2470 }
2471 
2472 void
emitVectorSubOp(const Instruction * i)2473 CodeEmitterNVC0::emitVectorSubOp(const Instruction *i)
2474 {
2475    switch (NV50_IR_SUBOP_Vn(i->subOp)) {
2476    case 0:
2477       code[1] |= (i->subOp & 0x000f) << 12; // vsrc1
2478       code[1] |= (i->subOp & 0x00e0) >> 5;  // vsrc2
2479       code[1] |= (i->subOp & 0x0100) << 7;  // vsrc2
2480       code[1] |= (i->subOp & 0x3c00) << 13; // vdst
2481       break;
2482    case 1:
2483       code[1] |= (i->subOp & 0x000f) << 8;  // v2src1
2484       code[1] |= (i->subOp & 0x0010) << 11; // v2src1
2485       code[1] |= (i->subOp & 0x01e0) >> 1;  // v2src2
2486       code[1] |= (i->subOp & 0x0200) << 6;  // v2src2
2487       code[1] |= (i->subOp & 0x3c00) << 2;  // v4dst
2488       code[1] |= (i->mask & 0x3) << 2;
2489       break;
2490    case 2:
2491       code[1] |= (i->subOp & 0x000f) << 8; // v4src1
2492       code[1] |= (i->subOp & 0x01e0) >> 1; // v4src2
2493       code[1] |= (i->subOp & 0x3c00) << 2; // v4dst
2494       code[1] |= (i->mask & 0x3) << 2;
2495       code[1] |= (i->mask & 0xc) << 21;
2496       break;
2497    default:
2498       assert(0);
2499       break;
2500    }
2501 }
2502 
2503 void
emitVSHL(const Instruction * i)2504 CodeEmitterNVC0::emitVSHL(const Instruction *i)
2505 {
2506    uint64_t opc = 0x4;
2507 
2508    switch (NV50_IR_SUBOP_Vn(i->subOp)) {
2509    case 0: opc |= 0xe8ULL << 56; break;
2510    case 1: opc |= 0xb4ULL << 56; break;
2511    case 2: opc |= 0x94ULL << 56; break;
2512    default:
2513       assert(0);
2514       break;
2515    }
2516    if (NV50_IR_SUBOP_Vn(i->subOp) == 1) {
2517       if (isSignedType(i->dType)) opc |= 1ULL << 0x2a;
2518       if (isSignedType(i->sType)) opc |= (1 << 6) | (1 << 5);
2519    } else {
2520       if (isSignedType(i->dType)) opc |= 1ULL << 0x39;
2521       if (isSignedType(i->sType)) opc |= 1 << 6;
2522    }
2523    emitForm_A(i, opc);
2524    emitVectorSubOp(i);
2525 
2526    if (i->saturate)
2527       code[0] |= 1 << 9;
2528    if (i->flagsDef >= 0)
2529       code[1] |= 1 << 16;
2530 }
2531 
2532 void
emitPIXLD(const Instruction * i)2533 CodeEmitterNVC0::emitPIXLD(const Instruction *i)
2534 {
2535    assert(i->encSize == 8);
2536    emitForm_A(i, HEX64(10000000, 00000006));
2537    code[0] |= i->subOp << 5;
2538    code[1] |= 0x00e00000;
2539 }
2540 
2541 void
emitSHFL(const Instruction * i)2542 CodeEmitterNVC0::emitSHFL(const Instruction *i)
2543 {
2544    const ImmediateValue *imm;
2545 
2546    assert(targ->getChipset() >= NVISA_GK104_CHIPSET);
2547 
2548    code[0] = 0x00000005;
2549    code[1] = 0x88000000 | (i->subOp << 23);
2550 
2551    emitPredicate(i);
2552 
2553    defId(i->def(0), 14);
2554    srcId(i->src(0), 20);
2555 
2556    switch (i->src(1).getFile()) {
2557    case FILE_GPR:
2558       srcId(i->src(1), 26);
2559       break;
2560    case FILE_IMMEDIATE:
2561       imm = i->getSrc(1)->asImm();
2562       assert(imm && imm->reg.data.u32 < 0x20);
2563       code[0] |= imm->reg.data.u32 << 26;
2564       code[0] |= 1 << 5;
2565       break;
2566    default:
2567       assert(!"invalid src1 file");
2568       break;
2569    }
2570 
2571    switch (i->src(2).getFile()) {
2572    case FILE_GPR:
2573       srcId(i->src(2), 49);
2574       break;
2575    case FILE_IMMEDIATE:
2576       imm = i->getSrc(2)->asImm();
2577       assert(imm && imm->reg.data.u32 < 0x2000);
2578       code[1] |= imm->reg.data.u32 << 10;
2579       code[0] |= 1 << 6;
2580       break;
2581    default:
2582       assert(!"invalid src2 file");
2583       break;
2584    }
2585 
2586    setPDSTL(i, i->defExists(1) ? 1 : -1);
2587 }
2588 
2589 void
emitVOTE(const Instruction * i)2590 CodeEmitterNVC0::emitVOTE(const Instruction *i)
2591 {
2592    const ImmediateValue *imm;
2593    uint32_t u32;
2594 
2595    code[0] = 0x00000004 | (i->subOp << 5);
2596    code[1] = 0x48000000;
2597 
2598    emitPredicate(i);
2599 
2600    unsigned rp = 0;
2601    for (int d = 0; i->defExists(d); d++) {
2602       if (i->def(d).getFile() == FILE_PREDICATE) {
2603          assert(!(rp & 2));
2604          rp |= 2;
2605          defId(i->def(d), 32 + 22);
2606       } else if (i->def(d).getFile() == FILE_GPR) {
2607          assert(!(rp & 1));
2608          rp |= 1;
2609          defId(i->def(d), 14);
2610       } else {
2611          assert(!"Unhandled def");
2612       }
2613    }
2614    if (!(rp & 1))
2615       code[0] |= 63 << 14;
2616    if (!(rp & 2))
2617       code[1] |= 7 << 22;
2618 
2619    switch (i->src(0).getFile()) {
2620    case FILE_PREDICATE:
2621       if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT))
2622          code[0] |= 1 << 23;
2623       srcId(i->src(0), 20);
2624       break;
2625    case FILE_IMMEDIATE:
2626       imm = i->getSrc(0)->asImm();
2627       assert(imm);
2628       u32 = imm->reg.data.u32;
2629       assert(u32 == 0 || u32 == 1);
2630       code[0] |= (u32 == 1 ? 0x7 : 0xf) << 20;
2631       break;
2632    default:
2633       assert(!"Unhandled src");
2634       break;
2635    }
2636 }
2637 
2638 bool
emitInstruction(Instruction * insn)2639 CodeEmitterNVC0::emitInstruction(Instruction *insn)
2640 {
2641    unsigned int size = insn->encSize;
2642 
2643    if (writeIssueDelays && !(codeSize & 0x3f))
2644       size += 8;
2645 
2646    if (!insn->encSize) {
2647       ERROR("skipping unencodable instruction: "); insn->print();
2648       return false;
2649    } else
2650    if (codeSize + size > codeSizeLimit) {
2651       ERROR("code emitter output buffer too small\n");
2652       return false;
2653    }
2654 
2655    if (writeIssueDelays) {
2656       if (!(codeSize & 0x3f)) {
2657          code[0] = 0x00000007; // cf issue delay "instruction"
2658          code[1] = 0x20000000;
2659          code += 2;
2660          codeSize += 8;
2661       }
2662       const unsigned int id = (codeSize & 0x3f) / 8 - 1;
2663       uint32_t *data = code - (id * 2 + 2);
2664       if (id <= 2) {
2665          data[0] |= insn->sched << (id * 8 + 4);
2666       } else
2667       if (id == 3) {
2668          data[0] |= insn->sched << 28;
2669          data[1] |= insn->sched >> 4;
2670       } else {
2671          data[1] |= insn->sched << ((id - 4) * 8 + 4);
2672       }
2673    }
2674 
2675    // assert that instructions with multiple defs don't corrupt registers
2676    for (int d = 0; insn->defExists(d); ++d)
2677       assert(insn->asTex() || insn->def(d).rep()->reg.data.id >= 0);
2678 
2679    switch (insn->op) {
2680    case OP_MOV:
2681    case OP_RDSV:
2682       emitMOV(insn);
2683       break;
2684    case OP_NOP:
2685       break;
2686    case OP_LOAD:
2687       emitLOAD(insn);
2688       break;
2689    case OP_STORE:
2690       emitSTORE(insn);
2691       break;
2692    case OP_LINTERP:
2693    case OP_PINTERP:
2694       emitINTERP(insn);
2695       break;
2696    case OP_VFETCH:
2697       emitVFETCH(insn);
2698       break;
2699    case OP_EXPORT:
2700       emitEXPORT(insn);
2701       break;
2702    case OP_PFETCH:
2703       emitPFETCH(insn);
2704       break;
2705    case OP_AFETCH:
2706       emitAFETCH(insn);
2707       break;
2708    case OP_EMIT:
2709    case OP_RESTART:
2710       emitOUT(insn);
2711       break;
2712    case OP_ADD:
2713    case OP_SUB:
2714       if (insn->dType == TYPE_F64)
2715          emitDADD(insn);
2716       else if (isFloatType(insn->dType))
2717          emitFADD(insn);
2718       else
2719          emitUADD(insn);
2720       break;
2721    case OP_MUL:
2722       if (insn->dType == TYPE_F64)
2723          emitDMUL(insn);
2724       else if (isFloatType(insn->dType))
2725          emitFMUL(insn);
2726       else
2727          emitUMUL(insn);
2728       break;
2729    case OP_MAD:
2730    case OP_FMA:
2731       if (insn->dType == TYPE_F64)
2732          emitDMAD(insn);
2733       else if (isFloatType(insn->dType))
2734          emitFMAD(insn);
2735       else
2736          emitIMAD(insn);
2737       break;
2738    case OP_SAD:
2739       emitISAD(insn);
2740       break;
2741    case OP_SHLADD:
2742       emitSHLADD(insn);
2743       break;
2744    case OP_NOT:
2745       emitNOT(insn);
2746       break;
2747    case OP_AND:
2748       emitLogicOp(insn, 0);
2749       break;
2750    case OP_OR:
2751       emitLogicOp(insn, 1);
2752       break;
2753    case OP_XOR:
2754       emitLogicOp(insn, 2);
2755       break;
2756    case OP_SHL:
2757    case OP_SHR:
2758       emitShift(insn);
2759       break;
2760    case OP_SET:
2761    case OP_SET_AND:
2762    case OP_SET_OR:
2763    case OP_SET_XOR:
2764       emitSET(insn->asCmp());
2765       break;
2766    case OP_SELP:
2767       emitSELP(insn);
2768       break;
2769    case OP_SLCT:
2770       emitSLCT(insn->asCmp());
2771       break;
2772    case OP_MIN:
2773    case OP_MAX:
2774       emitMINMAX(insn);
2775       break;
2776    case OP_ABS:
2777    case OP_NEG:
2778    case OP_CEIL:
2779    case OP_FLOOR:
2780    case OP_TRUNC:
2781    case OP_SAT:
2782       emitCVT(insn);
2783       break;
2784    case OP_CVT:
2785       if (insn->def(0).getFile() == FILE_PREDICATE ||
2786           insn->src(0).getFile() == FILE_PREDICATE)
2787          emitMOV(insn);
2788       else
2789          emitCVT(insn);
2790       break;
2791    case OP_RSQ:
2792       emitSFnOp(insn, 5 + 2 * insn->subOp);
2793       break;
2794    case OP_RCP:
2795       emitSFnOp(insn, 4 + 2 * insn->subOp);
2796       break;
2797    case OP_LG2:
2798       emitSFnOp(insn, 3);
2799       break;
2800    case OP_EX2:
2801       emitSFnOp(insn, 2);
2802       break;
2803    case OP_SIN:
2804       emitSFnOp(insn, 1);
2805       break;
2806    case OP_COS:
2807       emitSFnOp(insn, 0);
2808       break;
2809    case OP_PRESIN:
2810    case OP_PREEX2:
2811       emitPreOp(insn);
2812       break;
2813    case OP_TEX:
2814    case OP_TXB:
2815    case OP_TXL:
2816    case OP_TXD:
2817    case OP_TXF:
2818    case OP_TXG:
2819    case OP_TXLQ:
2820       emitTEX(insn->asTex());
2821       break;
2822    case OP_TXQ:
2823       emitTXQ(insn->asTex());
2824       break;
2825    case OP_TEXBAR:
2826       emitTEXBAR(insn);
2827       break;
2828    case OP_SUBFM:
2829    case OP_SUCLAMP:
2830    case OP_SUEAU:
2831       emitSUCalc(insn);
2832       break;
2833    case OP_MADSP:
2834       emitMADSP(insn);
2835       break;
2836    case OP_SULDB:
2837       if (targ->getChipset() >= NVISA_GK104_CHIPSET)
2838          emitSULDGB(insn->asTex());
2839       else
2840          emitSULDB(insn->asTex());
2841       break;
2842    case OP_SUSTB:
2843    case OP_SUSTP:
2844       if (targ->getChipset() >= NVISA_GK104_CHIPSET)
2845          emitSUSTGx(insn->asTex());
2846       else
2847          emitSUSTx(insn->asTex());
2848       break;
2849    case OP_SULEA:
2850       emitSULEA(insn->asTex());
2851       break;
2852    case OP_ATOM:
2853       emitATOM(insn);
2854       break;
2855    case OP_BRA:
2856    case OP_CALL:
2857    case OP_PRERET:
2858    case OP_RET:
2859    case OP_DISCARD:
2860    case OP_EXIT:
2861    case OP_PRECONT:
2862    case OP_CONT:
2863    case OP_PREBREAK:
2864    case OP_BREAK:
2865    case OP_JOINAT:
2866    case OP_BRKPT:
2867    case OP_QUADON:
2868    case OP_QUADPOP:
2869       emitFlow(insn);
2870       break;
2871    case OP_QUADOP:
2872       emitQUADOP(insn, insn->subOp, insn->lanes);
2873       break;
2874    case OP_DFDX:
2875       emitQUADOP(insn, insn->src(0).mod.neg() ? 0x66 : 0x99, 0x4);
2876       break;
2877    case OP_DFDY:
2878       emitQUADOP(insn, insn->src(0).mod.neg() ? 0x5a : 0xa5, 0x5);
2879       break;
2880    case OP_POPCNT:
2881       emitPOPC(insn);
2882       break;
2883    case OP_INSBF:
2884       emitINSBF(insn);
2885       break;
2886    case OP_EXTBF:
2887       emitEXTBF(insn);
2888       break;
2889    case OP_BFIND:
2890       emitBFIND(insn);
2891       break;
2892    case OP_PERMT:
2893       emitPERMT(insn);
2894       break;
2895    case OP_JOIN:
2896       emitNOP(insn);
2897       insn->join = 1;
2898       break;
2899    case OP_BAR:
2900       emitBAR(insn);
2901       break;
2902    case OP_MEMBAR:
2903       emitMEMBAR(insn);
2904       break;
2905    case OP_CCTL:
2906       emitCCTL(insn);
2907       break;
2908    case OP_VSHL:
2909       emitVSHL(insn);
2910       break;
2911    case OP_PIXLD:
2912       emitPIXLD(insn);
2913       break;
2914    case OP_SHFL:
2915       emitSHFL(insn);
2916       break;
2917    case OP_VOTE:
2918       emitVOTE(insn);
2919       break;
2920    case OP_PHI:
2921    case OP_UNION:
2922    case OP_CONSTRAINT:
2923       ERROR("operation should have been eliminated");
2924       return false;
2925    case OP_EXP:
2926    case OP_LOG:
2927    case OP_SQRT:
2928    case OP_POW:
2929       ERROR("operation should have been lowered\n");
2930       return false;
2931    default:
2932       ERROR("unknown op: %u\n", insn->op);
2933       return false;
2934    }
2935 
2936    if (insn->join) {
2937       code[0] |= 0x10;
2938       assert(insn->encSize == 8);
2939    }
2940 
2941    code += insn->encSize / 4;
2942    codeSize += insn->encSize;
2943    return true;
2944 }
2945 
2946 uint32_t
getMinEncodingSize(const Instruction * i) const2947 CodeEmitterNVC0::getMinEncodingSize(const Instruction *i) const
2948 {
2949    const Target::OpInfo &info = targ->getOpInfo(i);
2950 
2951    if (writeIssueDelays || info.minEncSize == 8 || 1)
2952       return 8;
2953 
2954    if (i->ftz || i->saturate || i->join)
2955       return 8;
2956    if (i->rnd != ROUND_N)
2957       return 8;
2958    if (i->predSrc >= 0 && i->op == OP_MAD)
2959       return 8;
2960 
2961    if (i->op == OP_PINTERP) {
2962       if (i->getSampleMode() || 1) // XXX: grr, short op doesn't work
2963          return 8;
2964    } else
2965    if (i->op == OP_MOV && i->lanes != 0xf) {
2966       return 8;
2967    }
2968 
2969    for (int s = 0; i->srcExists(s); ++s) {
2970       if (i->src(s).isIndirect(0))
2971          return 8;
2972 
2973       if (i->src(s).getFile() == FILE_MEMORY_CONST) {
2974          if (SDATA(i->src(s)).offset >= 0x100)
2975             return 8;
2976          if (i->getSrc(s)->reg.fileIndex > 1 &&
2977              i->getSrc(s)->reg.fileIndex != 16)
2978              return 8;
2979       } else
2980       if (i->src(s).getFile() == FILE_IMMEDIATE) {
2981          if (i->dType == TYPE_F32) {
2982             if (SDATA(i->src(s)).u32 >= 0x100)
2983                return 8;
2984          } else {
2985             if (SDATA(i->src(s)).u32 > 0xff)
2986                return 8;
2987          }
2988       }
2989 
2990       if (i->op == OP_CVT)
2991          continue;
2992       if (i->src(s).mod != Modifier(0)) {
2993          if (i->src(s).mod == Modifier(NV50_IR_MOD_ABS))
2994             if (i->op != OP_RSQ)
2995                return 8;
2996          if (i->src(s).mod == Modifier(NV50_IR_MOD_NEG))
2997             if (i->op != OP_ADD || s != 0)
2998                return 8;
2999       }
3000    }
3001 
3002    return 4;
3003 }
3004 
3005 // Simplified, erring on safe side.
3006 class SchedDataCalculator : public Pass
3007 {
3008 public:
SchedDataCalculator(const Target * targ)3009    SchedDataCalculator(const Target *targ) : targ(targ) { }
3010 
3011 private:
3012    struct RegScores
3013    {
3014       struct Resource {
3015          int st[DATA_FILE_COUNT]; // LD to LD delay 3
3016          int ld[DATA_FILE_COUNT]; // ST to ST delay 3
3017          int tex; // TEX to non-TEX delay 17 (0x11)
3018          int sfu; // SFU to SFU delay 3 (except PRE-ops)
3019          int imul; // integer MUL to MUL delay 3
3020       } res;
3021       struct ScoreData {
3022          int r[256];
3023          int p[8];
3024          int c;
3025       } rd, wr;
3026       int base;
3027       int regs;
3028 
rebasenv50_ir::SchedDataCalculator::RegScores3029       void rebase(const int base)
3030       {
3031          const int delta = this->base - base;
3032          if (!delta)
3033             return;
3034          this->base = 0;
3035 
3036          for (int i = 0; i < regs; ++i) {
3037             rd.r[i] += delta;
3038             wr.r[i] += delta;
3039          }
3040          for (int i = 0; i < 8; ++i) {
3041             rd.p[i] += delta;
3042             wr.p[i] += delta;
3043          }
3044          rd.c += delta;
3045          wr.c += delta;
3046 
3047          for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
3048             res.ld[f] += delta;
3049             res.st[f] += delta;
3050          }
3051          res.sfu += delta;
3052          res.imul += delta;
3053          res.tex += delta;
3054       }
wipenv50_ir::SchedDataCalculator::RegScores3055       void wipe(int regs)
3056       {
3057          memset(&rd, 0, sizeof(rd));
3058          memset(&wr, 0, sizeof(wr));
3059          memset(&res, 0, sizeof(res));
3060          this->regs = regs;
3061       }
getLatestnv50_ir::SchedDataCalculator::RegScores3062       int getLatest(const ScoreData& d) const
3063       {
3064          int max = 0;
3065          for (int i = 0; i < regs; ++i)
3066             if (d.r[i] > max)
3067                max = d.r[i];
3068          for (int i = 0; i < 8; ++i)
3069             if (d.p[i] > max)
3070                max = d.p[i];
3071          if (d.c > max)
3072             max = d.c;
3073          return max;
3074       }
getLatestRdnv50_ir::SchedDataCalculator::RegScores3075       inline int getLatestRd() const
3076       {
3077          return getLatest(rd);
3078       }
getLatestWrnv50_ir::SchedDataCalculator::RegScores3079       inline int getLatestWr() const
3080       {
3081          return getLatest(wr);
3082       }
getLatestnv50_ir::SchedDataCalculator::RegScores3083       inline int getLatest() const
3084       {
3085          const int a = getLatestRd();
3086          const int b = getLatestWr();
3087 
3088          int max = MAX2(a, b);
3089          for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
3090             max = MAX2(res.ld[f], max);
3091             max = MAX2(res.st[f], max);
3092          }
3093          max = MAX2(res.sfu, max);
3094          max = MAX2(res.imul, max);
3095          max = MAX2(res.tex, max);
3096          return max;
3097       }
setMaxnv50_ir::SchedDataCalculator::RegScores3098       void setMax(const RegScores *that)
3099       {
3100          for (int i = 0; i < regs; ++i) {
3101             rd.r[i] = MAX2(rd.r[i], that->rd.r[i]);
3102             wr.r[i] = MAX2(wr.r[i], that->wr.r[i]);
3103          }
3104          for (int i = 0; i < 8; ++i) {
3105             rd.p[i] = MAX2(rd.p[i], that->rd.p[i]);
3106             wr.p[i] = MAX2(wr.p[i], that->wr.p[i]);
3107          }
3108          rd.c = MAX2(rd.c, that->rd.c);
3109          wr.c = MAX2(wr.c, that->wr.c);
3110 
3111          for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
3112             res.ld[f] = MAX2(res.ld[f], that->res.ld[f]);
3113             res.st[f] = MAX2(res.st[f], that->res.st[f]);
3114          }
3115          res.sfu = MAX2(res.sfu, that->res.sfu);
3116          res.imul = MAX2(res.imul, that->res.imul);
3117          res.tex = MAX2(res.tex, that->res.tex);
3118       }
printnv50_ir::SchedDataCalculator::RegScores3119       void print(int cycle)
3120       {
3121          for (int i = 0; i < regs; ++i) {
3122             if (rd.r[i] > cycle)
3123                INFO("rd $r%i @ %i\n", i, rd.r[i]);
3124             if (wr.r[i] > cycle)
3125                INFO("wr $r%i @ %i\n", i, wr.r[i]);
3126          }
3127          for (int i = 0; i < 8; ++i) {
3128             if (rd.p[i] > cycle)
3129                INFO("rd $p%i @ %i\n", i, rd.p[i]);
3130             if (wr.p[i] > cycle)
3131                INFO("wr $p%i @ %i\n", i, wr.p[i]);
3132          }
3133          if (rd.c > cycle)
3134             INFO("rd $c @ %i\n", rd.c);
3135          if (wr.c > cycle)
3136             INFO("wr $c @ %i\n", wr.c);
3137          if (res.sfu > cycle)
3138             INFO("sfu @ %i\n", res.sfu);
3139          if (res.imul > cycle)
3140             INFO("imul @ %i\n", res.imul);
3141          if (res.tex > cycle)
3142             INFO("tex @ %i\n", res.tex);
3143       }
3144    };
3145 
3146    RegScores *score; // for current BB
3147    std::vector<RegScores> scoreBoards;
3148    int prevData;
3149    operation prevOp;
3150 
3151    const Target *targ;
3152 
3153    bool visit(Function *);
3154    bool visit(BasicBlock *);
3155 
3156    void commitInsn(const Instruction *, int cycle);
3157    int calcDelay(const Instruction *, int cycle) const;
3158    void setDelay(Instruction *, int delay, Instruction *next);
3159 
3160    void recordRd(const Value *, const int ready);
3161    void recordWr(const Value *, const int ready);
3162    void checkRd(const Value *, int cycle, int& delay) const;
3163    void checkWr(const Value *, int cycle, int& delay) const;
3164 
3165    int getCycles(const Instruction *, int origDelay) const;
3166 };
3167 
3168 void
setDelay(Instruction * insn,int delay,Instruction * next)3169 SchedDataCalculator::setDelay(Instruction *insn, int delay, Instruction *next)
3170 {
3171    if (insn->op == OP_EXIT || insn->op == OP_RET)
3172       delay = MAX2(delay, 14);
3173 
3174    if (insn->op == OP_TEXBAR) {
3175       // TODO: except if results not used before EXIT
3176       insn->sched = 0xc2;
3177    } else
3178    if (insn->op == OP_JOIN || insn->join) {
3179       insn->sched = 0x00;
3180    } else
3181    if (delay >= 0 || prevData == 0x04 ||
3182        !next || !targ->canDualIssue(insn, next)) {
3183       insn->sched = static_cast<uint8_t>(MAX2(delay, 0));
3184       if (prevOp == OP_EXPORT)
3185          insn->sched |= 0x40;
3186       else
3187          insn->sched |= 0x20;
3188    } else {
3189       insn->sched = 0x04; // dual-issue
3190    }
3191 
3192    if (prevData != 0x04 || prevOp != OP_EXPORT)
3193       if (insn->sched != 0x04 || insn->op == OP_EXPORT)
3194          prevOp = insn->op;
3195 
3196    prevData = insn->sched;
3197 }
3198 
3199 int
getCycles(const Instruction * insn,int origDelay) const3200 SchedDataCalculator::getCycles(const Instruction *insn, int origDelay) const
3201 {
3202    if (insn->sched & 0x80) {
3203       int c = (insn->sched & 0x0f) * 2 + 1;
3204       if (insn->op == OP_TEXBAR && origDelay > 0)
3205          c += origDelay;
3206       return c;
3207    }
3208    if (insn->sched & 0x60)
3209       return (insn->sched & 0x1f) + 1;
3210    return (insn->sched == 0x04) ? 0 : 32;
3211 }
3212 
3213 bool
visit(Function * func)3214 SchedDataCalculator::visit(Function *func)
3215 {
3216    int regs = targ->getFileSize(FILE_GPR) + 1;
3217    scoreBoards.resize(func->cfg.getSize());
3218    for (size_t i = 0; i < scoreBoards.size(); ++i)
3219       scoreBoards[i].wipe(regs);
3220    return true;
3221 }
3222 
3223 bool
visit(BasicBlock * bb)3224 SchedDataCalculator::visit(BasicBlock *bb)
3225 {
3226    Instruction *insn;
3227    Instruction *next = NULL;
3228 
3229    int cycle = 0;
3230 
3231    prevData = 0x00;
3232    prevOp = OP_NOP;
3233    score = &scoreBoards.at(bb->getId());
3234 
3235    for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
3236       // back branches will wait until all target dependencies are satisfied
3237       if (ei.getType() == Graph::Edge::BACK) // sched would be uninitialized
3238          continue;
3239       BasicBlock *in = BasicBlock::get(ei.getNode());
3240       if (in->getExit()) {
3241          if (prevData != 0x04)
3242             prevData = in->getExit()->sched;
3243          prevOp = in->getExit()->op;
3244       }
3245       score->setMax(&scoreBoards.at(in->getId()));
3246    }
3247    if (bb->cfg.incidentCount() > 1)
3248       prevOp = OP_NOP;
3249 
3250 #ifdef NVC0_DEBUG_SCHED_DATA
3251    INFO("=== BB:%i initial scores\n", bb->getId());
3252    score->print(cycle);
3253 #endif
3254 
3255    for (insn = bb->getEntry(); insn && insn->next; insn = insn->next) {
3256       next = insn->next;
3257 
3258       commitInsn(insn, cycle);
3259       int delay = calcDelay(next, cycle);
3260       setDelay(insn, delay, next);
3261       cycle += getCycles(insn, delay);
3262 
3263 #ifdef NVC0_DEBUG_SCHED_DATA
3264       INFO("cycle %i, sched %02x\n", cycle, insn->sched);
3265       insn->print();
3266       next->print();
3267 #endif
3268    }
3269    if (!insn)
3270       return true;
3271    commitInsn(insn, cycle);
3272 
3273    int bbDelay = -1;
3274 
3275    for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
3276       BasicBlock *out = BasicBlock::get(ei.getNode());
3277 
3278       if (ei.getType() != Graph::Edge::BACK) {
3279          // only test the first instruction of the outgoing block
3280          next = out->getEntry();
3281          if (next)
3282             bbDelay = MAX2(bbDelay, calcDelay(next, cycle));
3283       } else {
3284          // wait until all dependencies are satisfied
3285          const int regsFree = score->getLatest();
3286          next = out->getFirst();
3287          for (int c = cycle; next && c < regsFree; next = next->next) {
3288             bbDelay = MAX2(bbDelay, calcDelay(next, c));
3289             c += getCycles(next, bbDelay);
3290          }
3291          next = NULL;
3292       }
3293    }
3294    if (bb->cfg.outgoingCount() != 1)
3295       next = NULL;
3296    setDelay(insn, bbDelay, next);
3297    cycle += getCycles(insn, bbDelay);
3298 
3299    score->rebase(cycle); // common base for initializing out blocks' scores
3300    return true;
3301 }
3302 
3303 #define NVE4_MAX_ISSUE_DELAY 0x1f
3304 int
calcDelay(const Instruction * insn,int cycle) const3305 SchedDataCalculator::calcDelay(const Instruction *insn, int cycle) const
3306 {
3307    int delay = 0, ready = cycle;
3308 
3309    for (int s = 0; insn->srcExists(s); ++s)
3310       checkRd(insn->getSrc(s), cycle, delay);
3311    // WAR & WAW don't seem to matter
3312    // for (int s = 0; insn->srcExists(s); ++s)
3313    //   recordRd(insn->getSrc(s), cycle);
3314 
3315    switch (Target::getOpClass(insn->op)) {
3316    case OPCLASS_SFU:
3317       ready = score->res.sfu;
3318       break;
3319    case OPCLASS_ARITH:
3320       if (insn->op == OP_MUL && !isFloatType(insn->dType))
3321          ready = score->res.imul;
3322       break;
3323    case OPCLASS_TEXTURE:
3324       ready = score->res.tex;
3325       break;
3326    case OPCLASS_LOAD:
3327       ready = score->res.ld[insn->src(0).getFile()];
3328       break;
3329    case OPCLASS_STORE:
3330       ready = score->res.st[insn->src(0).getFile()];
3331       break;
3332    default:
3333       break;
3334    }
3335    if (Target::getOpClass(insn->op) != OPCLASS_TEXTURE)
3336       ready = MAX2(ready, score->res.tex);
3337 
3338    delay = MAX2(delay, ready - cycle);
3339 
3340    // if can issue next cycle, delay is 0, not 1
3341    return MIN2(delay - 1, NVE4_MAX_ISSUE_DELAY);
3342 }
3343 
3344 void
commitInsn(const Instruction * insn,int cycle)3345 SchedDataCalculator::commitInsn(const Instruction *insn, int cycle)
3346 {
3347    const int ready = cycle + targ->getLatency(insn);
3348 
3349    for (int d = 0; insn->defExists(d); ++d)
3350       recordWr(insn->getDef(d), ready);
3351    // WAR & WAW don't seem to matter
3352    // for (int s = 0; insn->srcExists(s); ++s)
3353    //   recordRd(insn->getSrc(s), cycle);
3354 
3355    switch (Target::getOpClass(insn->op)) {
3356    case OPCLASS_SFU:
3357       score->res.sfu = cycle + 4;
3358       break;
3359    case OPCLASS_ARITH:
3360       if (insn->op == OP_MUL && !isFloatType(insn->dType))
3361          score->res.imul = cycle + 4;
3362       break;
3363    case OPCLASS_TEXTURE:
3364       score->res.tex = cycle + 18;
3365       break;
3366    case OPCLASS_LOAD:
3367       if (insn->src(0).getFile() == FILE_MEMORY_CONST)
3368          break;
3369       score->res.ld[insn->src(0).getFile()] = cycle + 4;
3370       score->res.st[insn->src(0).getFile()] = ready;
3371       break;
3372    case OPCLASS_STORE:
3373       score->res.st[insn->src(0).getFile()] = cycle + 4;
3374       score->res.ld[insn->src(0).getFile()] = ready;
3375       break;
3376    case OPCLASS_OTHER:
3377       if (insn->op == OP_TEXBAR)
3378          score->res.tex = cycle;
3379       break;
3380    default:
3381       break;
3382    }
3383 
3384 #ifdef NVC0_DEBUG_SCHED_DATA
3385    score->print(cycle);
3386 #endif
3387 }
3388 
3389 void
checkRd(const Value * v,int cycle,int & delay) const3390 SchedDataCalculator::checkRd(const Value *v, int cycle, int& delay) const
3391 {
3392    int ready = cycle;
3393    int a, b;
3394 
3395    switch (v->reg.file) {
3396    case FILE_GPR:
3397       a = v->reg.data.id;
3398       b = a + v->reg.size / 4;
3399       for (int r = a; r < b; ++r)
3400          ready = MAX2(ready, score->rd.r[r]);
3401       break;
3402    case FILE_PREDICATE:
3403       ready = MAX2(ready, score->rd.p[v->reg.data.id]);
3404       break;
3405    case FILE_FLAGS:
3406       ready = MAX2(ready, score->rd.c);
3407       break;
3408    case FILE_SHADER_INPUT:
3409    case FILE_SHADER_OUTPUT: // yes, TCPs can read outputs
3410    case FILE_MEMORY_LOCAL:
3411    case FILE_MEMORY_CONST:
3412    case FILE_MEMORY_SHARED:
3413    case FILE_MEMORY_GLOBAL:
3414    case FILE_SYSTEM_VALUE:
3415       // TODO: any restrictions here ?
3416       break;
3417    case FILE_IMMEDIATE:
3418       break;
3419    default:
3420       assert(0);
3421       break;
3422    }
3423    if (cycle < ready)
3424       delay = MAX2(delay, ready - cycle);
3425 }
3426 
3427 void
checkWr(const Value * v,int cycle,int & delay) const3428 SchedDataCalculator::checkWr(const Value *v, int cycle, int& delay) const
3429 {
3430    int ready = cycle;
3431    int a, b;
3432 
3433    switch (v->reg.file) {
3434    case FILE_GPR:
3435       a = v->reg.data.id;
3436       b = a + v->reg.size / 4;
3437       for (int r = a; r < b; ++r)
3438          ready = MAX2(ready, score->wr.r[r]);
3439       break;
3440    case FILE_PREDICATE:
3441       ready = MAX2(ready, score->wr.p[v->reg.data.id]);
3442       break;
3443    default:
3444       assert(v->reg.file == FILE_FLAGS);
3445       ready = MAX2(ready, score->wr.c);
3446       break;
3447    }
3448    if (cycle < ready)
3449       delay = MAX2(delay, ready - cycle);
3450 }
3451 
3452 void
recordWr(const Value * v,const int ready)3453 SchedDataCalculator::recordWr(const Value *v, const int ready)
3454 {
3455    int a = v->reg.data.id;
3456 
3457    if (v->reg.file == FILE_GPR) {
3458       int b = a + v->reg.size / 4;
3459       for (int r = a; r < b; ++r)
3460          score->rd.r[r] = ready;
3461    } else
3462    // $c, $pX: shorter issue-to-read delay (at least as exec pred and carry)
3463    if (v->reg.file == FILE_PREDICATE) {
3464       score->rd.p[a] = ready + 4;
3465    } else {
3466       assert(v->reg.file == FILE_FLAGS);
3467       score->rd.c = ready + 4;
3468    }
3469 }
3470 
3471 void
recordRd(const Value * v,const int ready)3472 SchedDataCalculator::recordRd(const Value *v, const int ready)
3473 {
3474    int a = v->reg.data.id;
3475 
3476    if (v->reg.file == FILE_GPR) {
3477       int b = a + v->reg.size / 4;
3478       for (int r = a; r < b; ++r)
3479          score->wr.r[r] = ready;
3480    } else
3481    if (v->reg.file == FILE_PREDICATE) {
3482       score->wr.p[a] = ready;
3483    } else
3484    if (v->reg.file == FILE_FLAGS) {
3485       score->wr.c = ready;
3486    }
3487 }
3488 
3489 bool
calculateSchedDataNVC0(const Target * targ,Function * func)3490 calculateSchedDataNVC0(const Target *targ, Function *func)
3491 {
3492    SchedDataCalculator sched(targ);
3493    return sched.run(func, true, true);
3494 }
3495 
3496 void
prepareEmission(Function * func)3497 CodeEmitterNVC0::prepareEmission(Function *func)
3498 {
3499    CodeEmitter::prepareEmission(func);
3500 
3501    if (targ->hasSWSched)
3502       calculateSchedDataNVC0(targ, func);
3503 }
3504 
CodeEmitterNVC0(const TargetNVC0 * target)3505 CodeEmitterNVC0::CodeEmitterNVC0(const TargetNVC0 *target)
3506    : CodeEmitter(target),
3507      targNVC0(target),
3508      writeIssueDelays(target->hasSWSched)
3509 {
3510    code = NULL;
3511    codeSize = codeSizeLimit = 0;
3512    relocInfo = NULL;
3513 }
3514 
3515 CodeEmitter *
createCodeEmitterNVC0(Program::Type type)3516 TargetNVC0::createCodeEmitterNVC0(Program::Type type)
3517 {
3518    CodeEmitterNVC0 *emit = new CodeEmitterNVC0(this);
3519    emit->setProgramType(type);
3520    return emit;
3521 }
3522 
3523 CodeEmitter *
getCodeEmitter(Program::Type type)3524 TargetNVC0::getCodeEmitter(Program::Type type)
3525 {
3526    if (chipset >= NVISA_GK20A_CHIPSET)
3527       return createCodeEmitterGK110(type);
3528    return createCodeEmitterNVC0(type);
3529 }
3530 
3531 } // namespace nv50_ir
3532