1 /*
2  * Copyright 2014 Red Hat Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Ben Skeggs <bskeggs@redhat.com>
23  */
24 
25 #include "codegen/nv50_ir_target_gm107.h"
26 
27 //#define GM107_DEBUG_SCHED_DATA
28 
29 namespace nv50_ir {
30 
31 class CodeEmitterGM107 : public CodeEmitter
32 {
33 public:
34    CodeEmitterGM107(const TargetGM107 *);
35 
36    virtual bool emitInstruction(Instruction *);
37    virtual uint32_t getMinEncodingSize(const Instruction *) const;
38 
39    virtual void prepareEmission(Program *);
40    virtual void prepareEmission(Function *);
41 
setProgramType(Program::Type pType)42    inline void setProgramType(Program::Type pType) { progType = pType; }
43 
44 private:
45    const TargetGM107 *targGM107;
46 
47    Program::Type progType;
48 
49    const Instruction *insn;
50    const bool writeIssueDelays;
51    uint32_t *data;
52 
53 private:
54    inline void emitField(uint32_t *, int, int, uint32_t);
emitField(int b,int s,uint32_t v)55    inline void emitField(int b, int s, uint32_t v) { emitField(code, b, s, v); }
56 
57    inline void emitInsn(uint32_t, bool);
emitInsn(uint32_t o)58    inline void emitInsn(uint32_t o) { emitInsn(o, true); }
59    inline void emitPred();
60    inline void emitGPR(int, const Value *);
emitGPR(int pos)61    inline void emitGPR(int pos) {
62       emitGPR(pos, (const Value *)NULL);
63    }
emitGPR(int pos,const ValueRef & ref)64    inline void emitGPR(int pos, const ValueRef &ref) {
65       emitGPR(pos, ref.get() ? ref.rep() : (const Value *)NULL);
66    }
emitGPR(int pos,const ValueRef * ref)67    inline void emitGPR(int pos, const ValueRef *ref) {
68       emitGPR(pos, ref ? ref->rep() : (const Value *)NULL);
69    }
emitGPR(int pos,const ValueDef & def)70    inline void emitGPR(int pos, const ValueDef &def) {
71       emitGPR(pos, def.get() ? def.rep() : (const Value *)NULL);
72    }
73    inline void emitSYS(int, const Value *);
emitSYS(int pos,const ValueRef & ref)74    inline void emitSYS(int pos, const ValueRef &ref) {
75       emitSYS(pos, ref.get() ? ref.rep() : (const Value *)NULL);
76    }
77    inline void emitPRED(int, const Value *);
emitPRED(int pos)78    inline void emitPRED(int pos) {
79       emitPRED(pos, (const Value *)NULL);
80    }
emitPRED(int pos,const ValueRef & ref)81    inline void emitPRED(int pos, const ValueRef &ref) {
82       emitPRED(pos, ref.get() ? ref.rep() : (const Value *)NULL);
83    }
emitPRED(int pos,const ValueDef & def)84    inline void emitPRED(int pos, const ValueDef &def) {
85       emitPRED(pos, def.get() ? def.rep() : (const Value *)NULL);
86    }
87    inline void emitADDR(int, int, int, int, const ValueRef &);
88    inline void emitCBUF(int, int, int, int, int, const ValueRef &);
89    inline bool longIMMD(const ValueRef &);
90    inline void emitIMMD(int, int, const ValueRef &);
91 
92    void emitCond3(int, CondCode);
93    void emitCond4(int, CondCode);
emitCond5(int pos,CondCode cc)94    void emitCond5(int pos, CondCode cc) { emitCond4(pos, cc); }
95    inline void emitO(int);
96    inline void emitP(int);
97    inline void emitSAT(int);
98    inline void emitCC(int);
99    inline void emitX(int);
100    inline void emitABS(int, const ValueRef &);
101    inline void emitNEG(int, const ValueRef &);
102    inline void emitNEG2(int, const ValueRef &, const ValueRef &);
103    inline void emitFMZ(int, int);
104    inline void emitRND(int, RoundMode, int);
emitRND(int pos)105    inline void emitRND(int pos) {
106       emitRND(pos, insn->rnd, -1);
107    }
108    inline void emitPDIV(int);
109    inline void emitINV(int, const ValueRef &);
110 
111    void emitEXIT();
112    void emitBRA();
113    void emitCAL();
114    void emitPCNT();
115    void emitCONT();
116    void emitPBK();
117    void emitBRK();
118    void emitPRET();
119    void emitRET();
120    void emitSSY();
121    void emitSYNC();
122    void emitSAM();
123    void emitRAM();
124 
125    void emitMOV();
126    void emitS2R();
127    void emitF2F();
128    void emitF2I();
129    void emitI2F();
130    void emitI2I();
131    void emitSEL();
132    void emitSHFL();
133 
134    void emitDADD();
135    void emitDMUL();
136    void emitDFMA();
137    void emitDMNMX();
138    void emitDSET();
139    void emitDSETP();
140 
141    void emitFADD();
142    void emitFMUL();
143    void emitFFMA();
144    void emitMUFU();
145    void emitFMNMX();
146    void emitRRO();
147    void emitFCMP();
148    void emitFSET();
149    void emitFSETP();
150    void emitFSWZADD();
151 
152    void emitLOP();
153    void emitNOT();
154    void emitIADD();
155    void emitIMUL();
156    void emitIMAD();
157    void emitISCADD();
158    void emitIMNMX();
159    void emitICMP();
160    void emitISET();
161    void emitISETP();
162    void emitSHL();
163    void emitSHR();
164    void emitSHF();
165    void emitPOPC();
166    void emitBFI();
167    void emitBFE();
168    void emitFLO();
169 
170    void emitLDSTs(int, DataType);
171    void emitLDSTc(int);
172    void emitLDC();
173    void emitLDL();
174    void emitLDS();
175    void emitLD();
176    void emitSTL();
177    void emitSTS();
178    void emitST();
179    void emitALD();
180    void emitAST();
181    void emitISBERD();
182    void emitAL2P();
183    void emitIPA();
184    void emitATOM();
185    void emitATOMS();
186    void emitRED();
187    void emitCCTL();
188 
189    void emitPIXLD();
190 
191    void emitTEXs(int);
192    void emitTEX();
193    void emitTLD();
194    void emitTLD4();
195    void emitTXD();
196    void emitTXQ();
197    void emitTMML();
198    void emitDEPBAR();
199 
200    void emitNOP();
201    void emitKIL();
202    void emitOUT();
203 
204    void emitBAR();
205    void emitMEMBAR();
206 
207    void emitVOTE();
208 
209    void emitSUTarget();
210    void emitSUHandle(const int s);
211    void emitSUSTx();
212    void emitSULDx();
213    void emitSUREDx();
214 };
215 
216 /*******************************************************************************
217  * general instruction layout/fields
218  ******************************************************************************/
219 
220 void
emitField(uint32_t * data,int b,int s,uint32_t v)221 CodeEmitterGM107::emitField(uint32_t *data, int b, int s, uint32_t v)
222 {
223    if (b >= 0) {
224       uint32_t m = ((1ULL << s) - 1);
225       uint64_t d = (uint64_t)(v & m) << b;
226       assert(!(v & ~m) || (v & ~m) == ~m);
227       data[1] |= d >> 32;
228       data[0] |= d;
229    }
230 }
231 
232 void
emitPred()233 CodeEmitterGM107::emitPred()
234 {
235    if (insn->predSrc >= 0) {
236       emitField(16, 3, insn->getSrc(insn->predSrc)->rep()->reg.data.id);
237       emitField(19, 1, insn->cc == CC_NOT_P);
238    } else {
239       emitField(16, 3, 7);
240    }
241 }
242 
243 void
emitInsn(uint32_t hi,bool pred)244 CodeEmitterGM107::emitInsn(uint32_t hi, bool pred)
245 {
246    code[0] = 0x00000000;
247    code[1] = hi;
248    if (pred)
249       emitPred();
250 }
251 
252 void
emitGPR(int pos,const Value * val)253 CodeEmitterGM107::emitGPR(int pos, const Value *val)
254 {
255    emitField(pos, 8, val && !val->inFile(FILE_FLAGS) ?
256              val->reg.data.id : 255);
257 }
258 
259 void
emitSYS(int pos,const Value * val)260 CodeEmitterGM107::emitSYS(int pos, const Value *val)
261 {
262    int id = val ? val->reg.data.id : -1;
263 
264    switch (id) {
265    case SV_LANEID         : id = 0x00; break;
266    case SV_VERTEX_COUNT   : id = 0x10; break;
267    case SV_INVOCATION_ID  : id = 0x11; break;
268    case SV_THREAD_KILL    : id = 0x13; break;
269    case SV_INVOCATION_INFO: id = 0x1d; break;
270    case SV_TID            : id = 0x21 + val->reg.data.sv.index; break;
271    case SV_CTAID          : id = 0x25 + val->reg.data.sv.index; break;
272    case SV_LANEMASK_EQ    : id = 0x38; break;
273    case SV_LANEMASK_LT    : id = 0x39; break;
274    case SV_LANEMASK_LE    : id = 0x3a; break;
275    case SV_LANEMASK_GT    : id = 0x3b; break;
276    case SV_LANEMASK_GE    : id = 0x3c; break;
277    case SV_CLOCK          : id = 0x50 + val->reg.data.sv.index; break;
278    default:
279       assert(!"invalid system value");
280       id = 0;
281       break;
282    }
283 
284    emitField(pos, 8, id);
285 }
286 
287 void
emitPRED(int pos,const Value * val)288 CodeEmitterGM107::emitPRED(int pos, const Value *val)
289 {
290    emitField(pos, 3, val ? val->reg.data.id : 7);
291 }
292 
293 void
emitADDR(int gpr,int off,int len,int shr,const ValueRef & ref)294 CodeEmitterGM107::emitADDR(int gpr, int off, int len, int shr,
295                            const ValueRef &ref)
296 {
297    const Value *v = ref.get();
298    assert(!(v->reg.data.offset & ((1 << shr) - 1)));
299    if (gpr >= 0)
300       emitGPR(gpr, ref.getIndirect(0));
301    emitField(off, len, v->reg.data.offset >> shr);
302 }
303 
304 void
emitCBUF(int buf,int gpr,int off,int len,int shr,const ValueRef & ref)305 CodeEmitterGM107::emitCBUF(int buf, int gpr, int off, int len, int shr,
306                            const ValueRef &ref)
307 {
308    const Value *v = ref.get();
309    const Symbol *s = v->asSym();
310 
311    assert(!(s->reg.data.offset & ((1 << shr) - 1)));
312 
313    emitField(buf,  5, v->reg.fileIndex);
314    if (gpr >= 0)
315       emitGPR(gpr, ref.getIndirect(0));
316    emitField(off, 16, s->reg.data.offset >> shr);
317 }
318 
319 bool
longIMMD(const ValueRef & ref)320 CodeEmitterGM107::longIMMD(const ValueRef &ref)
321 {
322    if (ref.getFile() == FILE_IMMEDIATE) {
323       const ImmediateValue *imm = ref.get()->asImm();
324       if (isFloatType(insn->sType)) {
325          if ((imm->reg.data.u32 & 0x00000fff) != 0x00000000)
326             return true;
327       } else {
328          if ((imm->reg.data.u32 & 0xfff00000) != 0x00000000 &&
329              (imm->reg.data.u32 & 0xfff00000) != 0xfff00000)
330             return true;
331       }
332    }
333    return false;
334 }
335 
336 void
emitIMMD(int pos,int len,const ValueRef & ref)337 CodeEmitterGM107::emitIMMD(int pos, int len, const ValueRef &ref)
338 {
339    const ImmediateValue *imm = ref.get()->asImm();
340    uint32_t val = imm->reg.data.u32;
341 
342    if (len == 19) {
343       if (insn->sType == TYPE_F32 || insn->sType == TYPE_F16) {
344          assert(!(val & 0x00000fff));
345          val >>= 12;
346       } else if (insn->sType == TYPE_F64) {
347          assert(!(imm->reg.data.u64 & 0x00000fffffffffffULL));
348          val = imm->reg.data.u64 >> 44;
349       }
350       assert(!(val & 0xfff00000) || (val & 0xfff00000) == 0xfff00000);
351       emitField( 56,   1, (val & 0x80000) >> 19);
352       emitField(pos, len, (val & 0x7ffff));
353    } else {
354       emitField(pos, len, val);
355    }
356 }
357 
358 /*******************************************************************************
359  * modifiers
360  ******************************************************************************/
361 
362 void
emitCond3(int pos,CondCode code)363 CodeEmitterGM107::emitCond3(int pos, CondCode code)
364 {
365    int data = 0;
366 
367    switch (code) {
368    case CC_FL : data = 0x00; break;
369    case CC_LTU:
370    case CC_LT : data = 0x01; break;
371    case CC_EQU:
372    case CC_EQ : data = 0x02; break;
373    case CC_LEU:
374    case CC_LE : data = 0x03; break;
375    case CC_GTU:
376    case CC_GT : data = 0x04; break;
377    case CC_NEU:
378    case CC_NE : data = 0x05; break;
379    case CC_GEU:
380    case CC_GE : data = 0x06; break;
381    case CC_TR : data = 0x07; break;
382    default:
383       assert(!"invalid cond3");
384       break;
385    }
386 
387    emitField(pos, 3, data);
388 }
389 
390 void
emitCond4(int pos,CondCode code)391 CodeEmitterGM107::emitCond4(int pos, CondCode code)
392 {
393    int data = 0;
394 
395    switch (code) {
396    case CC_FL: data = 0x00; break;
397    case CC_LT: data = 0x01; break;
398    case CC_EQ: data = 0x02; break;
399    case CC_LE: data = 0x03; break;
400    case CC_GT: data = 0x04; break;
401    case CC_NE: data = 0x05; break;
402    case CC_GE: data = 0x06; break;
403 //   case CC_NUM: data = 0x07; break;
404 //   case CC_NAN: data = 0x08; break;
405    case CC_LTU: data = 0x09; break;
406    case CC_EQU: data = 0x0a; break;
407    case CC_LEU: data = 0x0b; break;
408    case CC_GTU: data = 0x0c; break;
409    case CC_NEU: data = 0x0d; break;
410    case CC_GEU: data = 0x0e; break;
411    case CC_TR:  data = 0x0f; break;
412    default:
413       assert(!"invalid cond4");
414       break;
415    }
416 
417    emitField(pos, 4, data);
418 }
419 
420 void
emitO(int pos)421 CodeEmitterGM107::emitO(int pos)
422 {
423    emitField(pos, 1, insn->getSrc(0)->reg.file == FILE_SHADER_OUTPUT);
424 }
425 
426 void
emitP(int pos)427 CodeEmitterGM107::emitP(int pos)
428 {
429    emitField(pos, 1, insn->perPatch);
430 }
431 
432 void
emitSAT(int pos)433 CodeEmitterGM107::emitSAT(int pos)
434 {
435    emitField(pos, 1, insn->saturate);
436 }
437 
438 void
emitCC(int pos)439 CodeEmitterGM107::emitCC(int pos)
440 {
441    emitField(pos, 1, insn->flagsDef >= 0);
442 }
443 
444 void
emitX(int pos)445 CodeEmitterGM107::emitX(int pos)
446 {
447    emitField(pos, 1, insn->flagsSrc >= 0);
448 }
449 
450 void
emitABS(int pos,const ValueRef & ref)451 CodeEmitterGM107::emitABS(int pos, const ValueRef &ref)
452 {
453    emitField(pos, 1, ref.mod.abs());
454 }
455 
456 void
emitNEG(int pos,const ValueRef & ref)457 CodeEmitterGM107::emitNEG(int pos, const ValueRef &ref)
458 {
459    emitField(pos, 1, ref.mod.neg());
460 }
461 
462 void
emitNEG2(int pos,const ValueRef & a,const ValueRef & b)463 CodeEmitterGM107::emitNEG2(int pos, const ValueRef &a, const ValueRef &b)
464 {
465    emitField(pos, 1, a.mod.neg() ^ b.mod.neg());
466 }
467 
468 void
emitFMZ(int pos,int len)469 CodeEmitterGM107::emitFMZ(int pos, int len)
470 {
471    emitField(pos, len, insn->dnz << 1 | insn->ftz);
472 }
473 
474 void
emitRND(int rmp,RoundMode rnd,int rip)475 CodeEmitterGM107::emitRND(int rmp, RoundMode rnd, int rip)
476 {
477    int rm = 0, ri = 0;
478    switch (rnd) {
479    case ROUND_NI: ri = 1;
480    case ROUND_N : rm = 0; break;
481    case ROUND_MI: ri = 1;
482    case ROUND_M : rm = 1; break;
483    case ROUND_PI: ri = 1;
484    case ROUND_P : rm = 2; break;
485    case ROUND_ZI: ri = 1;
486    case ROUND_Z : rm = 3; break;
487    default:
488       assert(!"invalid round mode");
489       break;
490    }
491    emitField(rip, 1, ri);
492    emitField(rmp, 2, rm);
493 }
494 
495 void
emitPDIV(int pos)496 CodeEmitterGM107::emitPDIV(int pos)
497 {
498    assert(insn->postFactor >= -3 && insn->postFactor <= 3);
499    if (insn->postFactor > 0)
500       emitField(pos, 3, 7 - insn->postFactor);
501    else
502       emitField(pos, 3, 0 - insn->postFactor);
503 }
504 
505 void
emitINV(int pos,const ValueRef & ref)506 CodeEmitterGM107::emitINV(int pos, const ValueRef &ref)
507 {
508    emitField(pos, 1, !!(ref.mod & Modifier(NV50_IR_MOD_NOT)));
509 }
510 
511 /*******************************************************************************
512  * control flow
513  ******************************************************************************/
514 
515 void
emitEXIT()516 CodeEmitterGM107::emitEXIT()
517 {
518    emitInsn (0xe3000000);
519    emitCond5(0x00, CC_TR);
520 }
521 
522 void
emitBRA()523 CodeEmitterGM107::emitBRA()
524 {
525    const FlowInstruction *insn = this->insn->asFlow();
526    int gpr = -1;
527 
528    if (insn->indirect) {
529       if (insn->absolute)
530          emitInsn(0xe2000000); // JMX
531       else
532          emitInsn(0xe2500000); // BRX
533       gpr = 0x08;
534    } else {
535       if (insn->absolute)
536          emitInsn(0xe2100000); // JMP
537       else
538          emitInsn(0xe2400000); // BRA
539       emitField(0x07, 1, insn->allWarp);
540    }
541 
542    emitField(0x06, 1, insn->limit);
543    emitCond5(0x00, CC_TR);
544 
545    if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
546       int32_t pos = insn->target.bb->binPos;
547       if (writeIssueDelays && !(pos & 0x1f))
548          pos += 8;
549       if (!insn->absolute)
550          emitField(0x14, 24, pos - (codeSize + 8));
551       else
552          emitField(0x14, 32, pos);
553    } else {
554       emitCBUF (0x24, gpr, 20, 16, 0, insn->src(0));
555       emitField(0x05, 1, 1);
556    }
557 }
558 
559 void
emitCAL()560 CodeEmitterGM107::emitCAL()
561 {
562    const FlowInstruction *insn = this->insn->asFlow();
563 
564    if (insn->absolute) {
565       emitInsn(0xe2200000, 0); // JCAL
566    } else {
567       emitInsn(0xe2600000, 0); // CAL
568    }
569 
570    if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
571       if (!insn->absolute)
572          emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
573       else {
574          if (insn->builtin) {
575             int pcAbs = targGM107->getBuiltinOffset(insn->target.builtin);
576             addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfff00000,  20);
577             addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x000fffff, -12);
578          } else {
579             emitField(0x14, 32, insn->target.bb->binPos);
580          }
581       }
582    } else {
583       emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
584       emitField(0x05, 1, 1);
585    }
586 }
587 
588 void
emitPCNT()589 CodeEmitterGM107::emitPCNT()
590 {
591    const FlowInstruction *insn = this->insn->asFlow();
592 
593    emitInsn(0xe2b00000, 0);
594 
595    if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
596       emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
597    } else {
598       emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
599       emitField(0x05, 1, 1);
600    }
601 }
602 
603 void
emitCONT()604 CodeEmitterGM107::emitCONT()
605 {
606    emitInsn (0xe3500000);
607    emitCond5(0x00, CC_TR);
608 }
609 
610 void
emitPBK()611 CodeEmitterGM107::emitPBK()
612 {
613    const FlowInstruction *insn = this->insn->asFlow();
614 
615    emitInsn(0xe2a00000, 0);
616 
617    if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
618       emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
619    } else {
620       emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
621       emitField(0x05, 1, 1);
622    }
623 }
624 
625 void
emitBRK()626 CodeEmitterGM107::emitBRK()
627 {
628    emitInsn (0xe3400000);
629    emitCond5(0x00, CC_TR);
630 }
631 
632 void
emitPRET()633 CodeEmitterGM107::emitPRET()
634 {
635    const FlowInstruction *insn = this->insn->asFlow();
636 
637    emitInsn(0xe2700000, 0);
638 
639    if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
640       emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
641    } else {
642       emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
643       emitField(0x05, 1, 1);
644    }
645 }
646 
647 void
emitRET()648 CodeEmitterGM107::emitRET()
649 {
650    emitInsn (0xe3200000);
651    emitCond5(0x00, CC_TR);
652 }
653 
654 void
emitSSY()655 CodeEmitterGM107::emitSSY()
656 {
657    const FlowInstruction *insn = this->insn->asFlow();
658 
659    emitInsn(0xe2900000, 0);
660 
661    if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
662       emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
663    } else {
664       emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
665       emitField(0x05, 1, 1);
666    }
667 }
668 
669 void
emitSYNC()670 CodeEmitterGM107::emitSYNC()
671 {
672    emitInsn (0xf0f80000);
673    emitCond5(0x00, CC_TR);
674 }
675 
676 void
emitSAM()677 CodeEmitterGM107::emitSAM()
678 {
679    emitInsn(0xe3700000, 0);
680 }
681 
682 void
emitRAM()683 CodeEmitterGM107::emitRAM()
684 {
685    emitInsn(0xe3800000, 0);
686 }
687 
688 /*******************************************************************************
689  * predicate/cc
690  ******************************************************************************/
691 
692 /*******************************************************************************
693  * movement / conversion
694  ******************************************************************************/
695 
696 void
emitMOV()697 CodeEmitterGM107::emitMOV()
698 {
699    if (insn->src(0).getFile() != FILE_IMMEDIATE) {
700       switch (insn->src(0).getFile()) {
701       case FILE_GPR:
702          if (insn->def(0).getFile() == FILE_PREDICATE) {
703             emitInsn(0x5b6a0000);
704             emitGPR (0x08);
705          } else {
706             emitInsn(0x5c980000);
707          }
708          emitGPR (0x14, insn->src(0));
709          break;
710       case FILE_MEMORY_CONST:
711          emitInsn(0x4c980000);
712          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
713          break;
714       case FILE_IMMEDIATE:
715          emitInsn(0x38980000);
716          emitIMMD(0x14, 19, insn->src(0));
717          break;
718       case FILE_PREDICATE:
719          emitInsn(0x50880000);
720          emitPRED(0x0c, insn->src(0));
721          emitPRED(0x1d);
722          emitPRED(0x27);
723          break;
724       default:
725          assert(!"bad src file");
726          break;
727       }
728       if (insn->def(0).getFile() != FILE_PREDICATE &&
729           insn->src(0).getFile() != FILE_PREDICATE)
730          emitField(0x27, 4, insn->lanes);
731    } else {
732       emitInsn (0x01000000);
733       emitIMMD (0x14, 32, insn->src(0));
734       emitField(0x0c, 4, insn->lanes);
735    }
736 
737    if (insn->def(0).getFile() == FILE_PREDICATE) {
738       emitPRED(0x27);
739       emitPRED(0x03, insn->def(0));
740       emitPRED(0x00);
741    } else {
742       emitGPR(0x00, insn->def(0));
743    }
744 }
745 
746 void
emitS2R()747 CodeEmitterGM107::emitS2R()
748 {
749    emitInsn(0xf0c80000);
750    emitSYS (0x14, insn->src(0));
751    emitGPR (0x00, insn->def(0));
752 }
753 
754 void
emitF2F()755 CodeEmitterGM107::emitF2F()
756 {
757    RoundMode rnd = insn->rnd;
758 
759    switch (insn->op) {
760    case OP_FLOOR: rnd = ROUND_MI; break;
761    case OP_CEIL : rnd = ROUND_PI; break;
762    case OP_TRUNC: rnd = ROUND_ZI; break;
763    default:
764       break;
765    }
766 
767    switch (insn->src(0).getFile()) {
768    case FILE_GPR:
769       emitInsn(0x5ca80000);
770       emitGPR (0x14, insn->src(0));
771       break;
772    case FILE_MEMORY_CONST:
773       emitInsn(0x4ca80000);
774       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
775       break;
776    case FILE_IMMEDIATE:
777       emitInsn(0x38a80000);
778       emitIMMD(0x14, 19, insn->src(0));
779       break;
780    default:
781       assert(!"bad src0 file");
782       break;
783    }
784 
785    emitField(0x32, 1, (insn->op == OP_SAT) || insn->saturate);
786    emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
787    emitCC   (0x2f);
788    emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
789    emitFMZ  (0x2c, 1);
790    emitField(0x29, 1, insn->subOp);
791    emitRND  (0x27, rnd, 0x2a);
792    emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
793    emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
794    emitGPR  (0x00, insn->def(0));
795 }
796 
797 void
emitF2I()798 CodeEmitterGM107::emitF2I()
799 {
800    RoundMode rnd = insn->rnd;
801 
802    switch (insn->op) {
803    case OP_FLOOR: rnd = ROUND_M; break;
804    case OP_CEIL : rnd = ROUND_P; break;
805    case OP_TRUNC: rnd = ROUND_Z; break;
806    default:
807       break;
808    }
809 
810    switch (insn->src(0).getFile()) {
811    case FILE_GPR:
812       emitInsn(0x5cb00000);
813       emitGPR (0x14, insn->src(0));
814       break;
815    case FILE_MEMORY_CONST:
816       emitInsn(0x4cb00000);
817       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
818       break;
819    case FILE_IMMEDIATE:
820       emitInsn(0x38b00000);
821       emitIMMD(0x14, 19, insn->src(0));
822       break;
823    default:
824       assert(!"bad src0 file");
825       break;
826    }
827 
828    emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
829    emitCC   (0x2f);
830    emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
831    emitFMZ  (0x2c, 1);
832    emitRND  (0x27, rnd, 0x2a);
833    emitField(0x0c, 1, isSignedType(insn->dType));
834    emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
835    emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
836    emitGPR  (0x00, insn->def(0));
837 }
838 
839 void
emitI2F()840 CodeEmitterGM107::emitI2F()
841 {
842    RoundMode rnd = insn->rnd;
843 
844    switch (insn->op) {
845    case OP_FLOOR: rnd = ROUND_M; break;
846    case OP_CEIL : rnd = ROUND_P; break;
847    case OP_TRUNC: rnd = ROUND_Z; break;
848    default:
849       break;
850    }
851 
852    switch (insn->src(0).getFile()) {
853    case FILE_GPR:
854       emitInsn(0x5cb80000);
855       emitGPR (0x14, insn->src(0));
856       break;
857    case FILE_MEMORY_CONST:
858       emitInsn(0x4cb80000);
859       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
860       break;
861    case FILE_IMMEDIATE:
862       emitInsn(0x38b80000);
863       emitIMMD(0x14, 19, insn->src(0));
864       break;
865    default:
866       assert(!"bad src0 file");
867       break;
868    }
869 
870    emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
871    emitCC   (0x2f);
872    emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
873    emitField(0x29, 2, insn->subOp);
874    emitRND  (0x27, rnd, -1);
875    emitField(0x0d, 1, isSignedType(insn->sType));
876    emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
877    emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
878    emitGPR  (0x00, insn->def(0));
879 }
880 
881 void
emitI2I()882 CodeEmitterGM107::emitI2I()
883 {
884    switch (insn->src(0).getFile()) {
885    case FILE_GPR:
886       emitInsn(0x5ce00000);
887       emitGPR (0x14, insn->src(0));
888       break;
889    case FILE_MEMORY_CONST:
890       emitInsn(0x4ce00000);
891       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
892       break;
893    case FILE_IMMEDIATE:
894       emitInsn(0x38e00000);
895       emitIMMD(0x14, 19, insn->src(0));
896       break;
897    default:
898       assert(!"bad src0 file");
899       break;
900    }
901 
902    emitSAT  (0x32);
903    emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
904    emitCC   (0x2f);
905    emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
906    emitField(0x29, 2, insn->subOp);
907    emitField(0x0d, 1, isSignedType(insn->sType));
908    emitField(0x0c, 1, isSignedType(insn->dType));
909    emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
910    emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
911    emitGPR  (0x00, insn->def(0));
912 }
913 
914 static void
selpFlip(const FixupEntry * entry,uint32_t * code,const FixupData & data)915 selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
916 {
917    int loc = entry->loc;
918    if (data.force_persample_interp)
919       code[loc + 1] |= 1 << 10;
920    else
921       code[loc + 1] &= ~(1 << 10);
922 }
923 
924 void
emitSEL()925 CodeEmitterGM107::emitSEL()
926 {
927    switch (insn->src(1).getFile()) {
928    case FILE_GPR:
929       emitInsn(0x5ca00000);
930       emitGPR (0x14, insn->src(1));
931       break;
932    case FILE_MEMORY_CONST:
933       emitInsn(0x4ca00000);
934       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
935       break;
936    case FILE_IMMEDIATE:
937       emitInsn(0x38a00000);
938       emitIMMD(0x14, 19, insn->src(1));
939       break;
940    default:
941       assert(!"bad src1 file");
942       break;
943    }
944 
945    emitINV (0x2a, insn->src(2));
946    emitPRED(0x27, insn->src(2));
947    emitGPR (0x08, insn->src(0));
948    emitGPR (0x00, insn->def(0));
949 
950    if (insn->subOp == 1) {
951       addInterp(0, 0, selpFlip);
952    }
953 }
954 
955 void
emitSHFL()956 CodeEmitterGM107::emitSHFL()
957 {
958    int type = 0;
959 
960    emitInsn (0xef100000);
961 
962    switch (insn->src(1).getFile()) {
963    case FILE_GPR:
964       emitGPR(0x14, insn->src(1));
965       break;
966    case FILE_IMMEDIATE:
967       emitIMMD(0x14, 5, insn->src(1));
968       type |= 1;
969       break;
970    default:
971       assert(!"invalid src1 file");
972       break;
973    }
974 
975    switch (insn->src(2).getFile()) {
976    case FILE_GPR:
977       emitGPR(0x27, insn->src(2));
978       break;
979    case FILE_IMMEDIATE:
980       emitIMMD(0x22, 13, insn->src(2));
981       type |= 2;
982       break;
983    default:
984       assert(!"invalid src2 file");
985       break;
986    }
987 
988    if (!insn->defExists(1))
989       emitPRED(0x30);
990    else {
991       assert(insn->def(1).getFile() == FILE_PREDICATE);
992       emitPRED(0x30, insn->def(1));
993    }
994 
995    emitField(0x1e, 2, insn->subOp);
996    emitField(0x1c, 2, type);
997    emitGPR  (0x08, insn->src(0));
998    emitGPR  (0x00, insn->def(0));
999 }
1000 
1001 /*******************************************************************************
1002  * double
1003  ******************************************************************************/
1004 
1005 void
emitDADD()1006 CodeEmitterGM107::emitDADD()
1007 {
1008    switch (insn->src(1).getFile()) {
1009    case FILE_GPR:
1010       emitInsn(0x5c700000);
1011       emitGPR (0x14, insn->src(1));
1012       break;
1013    case FILE_MEMORY_CONST:
1014       emitInsn(0x4c700000);
1015       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1016       break;
1017    case FILE_IMMEDIATE:
1018       emitInsn(0x38700000);
1019       emitIMMD(0x14, 19, insn->src(1));
1020       break;
1021    default:
1022       assert(!"bad src1 file");
1023       break;
1024    }
1025    emitABS(0x31, insn->src(1));
1026    emitNEG(0x30, insn->src(0));
1027    emitCC (0x2f);
1028    emitABS(0x2e, insn->src(0));
1029    emitNEG(0x2d, insn->src(1));
1030 
1031    if (insn->op == OP_SUB)
1032       code[1] ^= 0x00002000;
1033 
1034    emitGPR(0x08, insn->src(0));
1035    emitGPR(0x00, insn->def(0));
1036 }
1037 
1038 void
emitDMUL()1039 CodeEmitterGM107::emitDMUL()
1040 {
1041    switch (insn->src(1).getFile()) {
1042    case FILE_GPR:
1043       emitInsn(0x5c800000);
1044       emitGPR (0x14, insn->src(1));
1045       break;
1046    case FILE_MEMORY_CONST:
1047       emitInsn(0x4c800000);
1048       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1049       break;
1050    case FILE_IMMEDIATE:
1051       emitInsn(0x38800000);
1052       emitIMMD(0x14, 19, insn->src(1));
1053       break;
1054    default:
1055       assert(!"bad src1 file");
1056       break;
1057    }
1058 
1059    emitNEG2(0x30, insn->src(0), insn->src(1));
1060    emitCC  (0x2f);
1061    emitRND (0x27);
1062    emitGPR (0x08, insn->src(0));
1063    emitGPR (0x00, insn->def(0));
1064 }
1065 
1066 void
emitDFMA()1067 CodeEmitterGM107::emitDFMA()
1068 {
1069    switch(insn->src(2).getFile()) {
1070    case FILE_GPR:
1071       switch (insn->src(1).getFile()) {
1072       case FILE_GPR:
1073          emitInsn(0x5b700000);
1074          emitGPR (0x14, insn->src(1));
1075          break;
1076       case FILE_MEMORY_CONST:
1077          emitInsn(0x4b700000);
1078          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1079          break;
1080       case FILE_IMMEDIATE:
1081          emitInsn(0x36700000);
1082          emitIMMD(0x14, 19, insn->src(1));
1083          break;
1084       default:
1085          assert(!"bad src1 file");
1086          break;
1087       }
1088       emitGPR (0x27, insn->src(2));
1089       break;
1090    case FILE_MEMORY_CONST:
1091       emitInsn(0x53700000);
1092       emitGPR (0x27, insn->src(1));
1093       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1094       break;
1095    default:
1096       assert(!"bad src2 file");
1097       break;
1098    }
1099 
1100    emitRND (0x32);
1101    emitNEG (0x31, insn->src(2));
1102    emitNEG2(0x30, insn->src(0), insn->src(1));
1103    emitCC  (0x2f);
1104    emitGPR (0x08, insn->src(0));
1105    emitGPR (0x00, insn->def(0));
1106 }
1107 
1108 void
emitDMNMX()1109 CodeEmitterGM107::emitDMNMX()
1110 {
1111    switch (insn->src(1).getFile()) {
1112    case FILE_GPR:
1113       emitInsn(0x5c500000);
1114       emitGPR (0x14, insn->src(1));
1115       break;
1116    case FILE_MEMORY_CONST:
1117       emitInsn(0x4c500000);
1118       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1119       break;
1120    case FILE_IMMEDIATE:
1121       emitInsn(0x38500000);
1122       emitIMMD(0x14, 19, insn->src(1));
1123       break;
1124    default:
1125       assert(!"bad src1 file");
1126       break;
1127    }
1128 
1129    emitABS  (0x31, insn->src(1));
1130    emitNEG  (0x30, insn->src(0));
1131    emitCC   (0x2f);
1132    emitABS  (0x2e, insn->src(0));
1133    emitNEG  (0x2d, insn->src(1));
1134    emitField(0x2a, 1, insn->op == OP_MAX);
1135    emitPRED (0x27);
1136    emitGPR  (0x08, insn->src(0));
1137    emitGPR  (0x00, insn->def(0));
1138 }
1139 
1140 void
emitDSET()1141 CodeEmitterGM107::emitDSET()
1142 {
1143    const CmpInstruction *insn = this->insn->asCmp();
1144 
1145    switch (insn->src(1).getFile()) {
1146    case FILE_GPR:
1147       emitInsn(0x59000000);
1148       emitGPR (0x14, insn->src(1));
1149       break;
1150    case FILE_MEMORY_CONST:
1151       emitInsn(0x49000000);
1152       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1153       break;
1154    case FILE_IMMEDIATE:
1155       emitInsn(0x32000000);
1156       emitIMMD(0x14, 19, insn->src(1));
1157       break;
1158    default:
1159       assert(!"bad src1 file");
1160       break;
1161    }
1162 
1163    if (insn->op != OP_SET) {
1164       switch (insn->op) {
1165       case OP_SET_AND: emitField(0x2d, 2, 0); break;
1166       case OP_SET_OR : emitField(0x2d, 2, 1); break;
1167       case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1168       default:
1169          assert(!"invalid set op");
1170          break;
1171       }
1172       emitPRED(0x27, insn->src(2));
1173    } else {
1174       emitPRED(0x27);
1175    }
1176 
1177    emitABS  (0x36, insn->src(0));
1178    emitNEG  (0x35, insn->src(1));
1179    emitField(0x34, 1, insn->dType == TYPE_F32);
1180    emitCond4(0x30, insn->setCond);
1181    emitCC   (0x2f);
1182    emitABS  (0x2c, insn->src(1));
1183    emitNEG  (0x2b, insn->src(0));
1184    emitGPR  (0x08, insn->src(0));
1185    emitGPR  (0x00, insn->def(0));
1186 }
1187 
1188 void
emitDSETP()1189 CodeEmitterGM107::emitDSETP()
1190 {
1191    const CmpInstruction *insn = this->insn->asCmp();
1192 
1193    switch (insn->src(1).getFile()) {
1194    case FILE_GPR:
1195       emitInsn(0x5b800000);
1196       emitGPR (0x14, insn->src(1));
1197       break;
1198    case FILE_MEMORY_CONST:
1199       emitInsn(0x4b800000);
1200       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1201       break;
1202    case FILE_IMMEDIATE:
1203       emitInsn(0x36800000);
1204       emitIMMD(0x14, 19, insn->src(1));
1205       break;
1206    default:
1207       assert(!"bad src1 file");
1208       break;
1209    }
1210 
1211    if (insn->op != OP_SET) {
1212       switch (insn->op) {
1213       case OP_SET_AND: emitField(0x2d, 2, 0); break;
1214       case OP_SET_OR : emitField(0x2d, 2, 1); break;
1215       case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1216       default:
1217          assert(!"invalid set op");
1218          break;
1219       }
1220       emitPRED(0x27, insn->src(2));
1221    } else {
1222       emitPRED(0x27);
1223    }
1224 
1225    emitCond4(0x30, insn->setCond);
1226    emitABS  (0x2c, insn->src(1));
1227    emitNEG  (0x2b, insn->src(0));
1228    emitGPR  (0x08, insn->src(0));
1229    emitABS  (0x07, insn->src(0));
1230    emitNEG  (0x06, insn->src(1));
1231    emitPRED (0x03, insn->def(0));
1232    if (insn->defExists(1))
1233       emitPRED(0x00, insn->def(1));
1234    else
1235       emitPRED(0x00);
1236 }
1237 
1238 /*******************************************************************************
1239  * float
1240  ******************************************************************************/
1241 
1242 void
emitFADD()1243 CodeEmitterGM107::emitFADD()
1244 {
1245    if (!longIMMD(insn->src(1))) {
1246       switch (insn->src(1).getFile()) {
1247       case FILE_GPR:
1248          emitInsn(0x5c580000);
1249          emitGPR (0x14, insn->src(1));
1250          break;
1251       case FILE_MEMORY_CONST:
1252          emitInsn(0x4c580000);
1253          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1254          break;
1255       case FILE_IMMEDIATE:
1256          emitInsn(0x38580000);
1257          emitIMMD(0x14, 19, insn->src(1));
1258          break;
1259       default:
1260          assert(!"bad src1 file");
1261          break;
1262       }
1263       emitSAT(0x32);
1264       emitABS(0x31, insn->src(1));
1265       emitNEG(0x30, insn->src(0));
1266       emitCC (0x2f);
1267       emitABS(0x2e, insn->src(0));
1268       emitNEG(0x2d, insn->src(1));
1269       emitFMZ(0x2c, 1);
1270 
1271       if (insn->op == OP_SUB)
1272          code[1] ^= 0x00002000;
1273    } else {
1274       emitInsn(0x08000000);
1275       emitABS(0x39, insn->src(1));
1276       emitNEG(0x38, insn->src(0));
1277       emitFMZ(0x37, 1);
1278       emitABS(0x36, insn->src(0));
1279       emitNEG(0x35, insn->src(1));
1280       emitCC  (0x34);
1281       emitIMMD(0x14, 32, insn->src(1));
1282 
1283       if (insn->op == OP_SUB)
1284          code[1] ^= 0x00080000;
1285    }
1286 
1287    emitGPR(0x08, insn->src(0));
1288    emitGPR(0x00, insn->def(0));
1289 }
1290 
1291 void
emitFMUL()1292 CodeEmitterGM107::emitFMUL()
1293 {
1294    if (!longIMMD(insn->src(1))) {
1295       switch (insn->src(1).getFile()) {
1296       case FILE_GPR:
1297          emitInsn(0x5c680000);
1298          emitGPR (0x14, insn->src(1));
1299          break;
1300       case FILE_MEMORY_CONST:
1301          emitInsn(0x4c680000);
1302          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1303          break;
1304       case FILE_IMMEDIATE:
1305          emitInsn(0x38680000);
1306          emitIMMD(0x14, 19, insn->src(1));
1307          break;
1308       default:
1309          assert(!"bad src1 file");
1310          break;
1311       }
1312       emitSAT (0x32);
1313       emitNEG2(0x30, insn->src(0), insn->src(1));
1314       emitCC  (0x2f);
1315       emitFMZ (0x2c, 2);
1316       emitPDIV(0x29);
1317       emitRND (0x27);
1318    } else {
1319       emitInsn(0x1e000000);
1320       emitSAT (0x37);
1321       emitFMZ (0x35, 2);
1322       emitCC  (0x34);
1323       emitIMMD(0x14, 32, insn->src(1));
1324       if (insn->src(0).mod.neg() ^ insn->src(1).mod.neg())
1325          code[1] ^= 0x00080000; /* flip immd sign bit */
1326    }
1327 
1328    emitGPR(0x08, insn->src(0));
1329    emitGPR(0x00, insn->def(0));
1330 }
1331 
1332 void
emitFFMA()1333 CodeEmitterGM107::emitFFMA()
1334 {
1335    bool isLongIMMD = false;
1336    switch(insn->src(2).getFile()) {
1337    case FILE_GPR:
1338       switch (insn->src(1).getFile()) {
1339       case FILE_GPR:
1340          emitInsn(0x59800000);
1341          emitGPR (0x14, insn->src(1));
1342          break;
1343       case FILE_MEMORY_CONST:
1344          emitInsn(0x49800000);
1345          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1346          break;
1347       case FILE_IMMEDIATE:
1348          if (longIMMD(insn->getSrc(1))) {
1349             assert(insn->getDef(0)->reg.data.id == insn->getSrc(2)->reg.data.id);
1350             isLongIMMD = true;
1351             emitInsn(0x0c000000);
1352             emitIMMD(0x14, 32, insn->src(1));
1353          } else {
1354             emitInsn(0x32800000);
1355             emitIMMD(0x14, 19, insn->src(1));
1356          }
1357          break;
1358       default:
1359          assert(!"bad src1 file");
1360          break;
1361       }
1362       if (!isLongIMMD)
1363          emitGPR (0x27, insn->src(2));
1364       break;
1365    case FILE_MEMORY_CONST:
1366       emitInsn(0x51800000);
1367       emitGPR (0x27, insn->src(1));
1368       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1369       break;
1370    default:
1371       assert(!"bad src2 file");
1372       break;
1373    }
1374 
1375    if (isLongIMMD) {
1376       emitNEG (0x39, insn->src(2));
1377       emitNEG2(0x38, insn->src(0), insn->src(1));
1378       emitSAT (0x37);
1379       emitCC  (0x34);
1380    } else {
1381       emitRND (0x33);
1382       emitSAT (0x32);
1383       emitNEG (0x31, insn->src(2));
1384       emitNEG2(0x30, insn->src(0), insn->src(1));
1385       emitCC  (0x2f);
1386    }
1387 
1388    emitFMZ(0x35, 2);
1389    emitGPR(0x08, insn->src(0));
1390    emitGPR(0x00, insn->def(0));
1391 }
1392 
1393 void
emitMUFU()1394 CodeEmitterGM107::emitMUFU()
1395 {
1396    int mufu = 0;
1397 
1398    switch (insn->op) {
1399    case OP_COS: mufu = 0; break;
1400    case OP_SIN: mufu = 1; break;
1401    case OP_EX2: mufu = 2; break;
1402    case OP_LG2: mufu = 3; break;
1403    case OP_RCP: mufu = 4 + 2 * insn->subOp; break;
1404    case OP_RSQ: mufu = 5 + 2 * insn->subOp; break;
1405    default:
1406       assert(!"invalid mufu");
1407       break;
1408    }
1409 
1410    emitInsn (0x50800000);
1411    emitSAT  (0x32);
1412    emitNEG  (0x30, insn->src(0));
1413    emitABS  (0x2e, insn->src(0));
1414    emitField(0x14, 3, mufu);
1415    emitGPR  (0x08, insn->src(0));
1416    emitGPR  (0x00, insn->def(0));
1417 }
1418 
1419 void
emitFMNMX()1420 CodeEmitterGM107::emitFMNMX()
1421 {
1422    switch (insn->src(1).getFile()) {
1423    case FILE_GPR:
1424       emitInsn(0x5c600000);
1425       emitGPR (0x14, insn->src(1));
1426       break;
1427    case FILE_MEMORY_CONST:
1428       emitInsn(0x4c600000);
1429       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1430       break;
1431    case FILE_IMMEDIATE:
1432       emitInsn(0x38600000);
1433       emitIMMD(0x14, 19, insn->src(1));
1434       break;
1435    default:
1436       assert(!"bad src1 file");
1437       break;
1438    }
1439 
1440    emitField(0x2a, 1, insn->op == OP_MAX);
1441    emitPRED (0x27);
1442 
1443    emitABS(0x31, insn->src(1));
1444    emitNEG(0x30, insn->src(0));
1445    emitCC (0x2f);
1446    emitABS(0x2e, insn->src(0));
1447    emitNEG(0x2d, insn->src(1));
1448    emitFMZ(0x2c, 1);
1449    emitGPR(0x08, insn->src(0));
1450    emitGPR(0x00, insn->def(0));
1451 }
1452 
1453 void
emitRRO()1454 CodeEmitterGM107::emitRRO()
1455 {
1456    switch (insn->src(0).getFile()) {
1457    case FILE_GPR:
1458       emitInsn(0x5c900000);
1459       emitGPR (0x14, insn->src(0));
1460       break;
1461    case FILE_MEMORY_CONST:
1462       emitInsn(0x4c900000);
1463       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
1464       break;
1465    case FILE_IMMEDIATE:
1466       emitInsn(0x38900000);
1467       emitIMMD(0x14, 19, insn->src(0));
1468       break;
1469    default:
1470       assert(!"bad src file");
1471       break;
1472    }
1473 
1474    emitABS  (0x31, insn->src(0));
1475    emitNEG  (0x2d, insn->src(0));
1476    emitField(0x27, 1, insn->op == OP_PREEX2);
1477    emitGPR  (0x00, insn->def(0));
1478 }
1479 
1480 void
emitFCMP()1481 CodeEmitterGM107::emitFCMP()
1482 {
1483    const CmpInstruction *insn = this->insn->asCmp();
1484    CondCode cc = insn->setCond;
1485 
1486    if (insn->src(2).mod.neg())
1487       cc = reverseCondCode(cc);
1488 
1489    switch(insn->src(2).getFile()) {
1490    case FILE_GPR:
1491       switch (insn->src(1).getFile()) {
1492       case FILE_GPR:
1493          emitInsn(0x5ba00000);
1494          emitGPR (0x14, insn->src(1));
1495          break;
1496       case FILE_MEMORY_CONST:
1497          emitInsn(0x4ba00000);
1498          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1499          break;
1500       case FILE_IMMEDIATE:
1501          emitInsn(0x36a00000);
1502          emitIMMD(0x14, 19, insn->src(1));
1503          break;
1504       default:
1505          assert(!"bad src1 file");
1506          break;
1507       }
1508       emitGPR (0x27, insn->src(2));
1509       break;
1510    case FILE_MEMORY_CONST:
1511       emitInsn(0x53a00000);
1512       emitGPR (0x27, insn->src(1));
1513       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1514       break;
1515    default:
1516       assert(!"bad src2 file");
1517       break;
1518    }
1519 
1520    emitCond4(0x30, cc);
1521    emitFMZ  (0x2f, 1);
1522    emitGPR  (0x08, insn->src(0));
1523    emitGPR  (0x00, insn->def(0));
1524 }
1525 
1526 void
emitFSET()1527 CodeEmitterGM107::emitFSET()
1528 {
1529    const CmpInstruction *insn = this->insn->asCmp();
1530 
1531    switch (insn->src(1).getFile()) {
1532    case FILE_GPR:
1533       emitInsn(0x58000000);
1534       emitGPR (0x14, insn->src(1));
1535       break;
1536    case FILE_MEMORY_CONST:
1537       emitInsn(0x48000000);
1538       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1539       break;
1540    case FILE_IMMEDIATE:
1541       emitInsn(0x30000000);
1542       emitIMMD(0x14, 19, insn->src(1));
1543       break;
1544    default:
1545       assert(!"bad src1 file");
1546       break;
1547    }
1548 
1549    if (insn->op != OP_SET) {
1550       switch (insn->op) {
1551       case OP_SET_AND: emitField(0x2d, 2, 0); break;
1552       case OP_SET_OR : emitField(0x2d, 2, 1); break;
1553       case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1554       default:
1555          assert(!"invalid set op");
1556          break;
1557       }
1558       emitPRED(0x27, insn->src(2));
1559    } else {
1560       emitPRED(0x27);
1561    }
1562 
1563    emitFMZ  (0x37, 1);
1564    emitABS  (0x36, insn->src(0));
1565    emitNEG  (0x35, insn->src(1));
1566    emitField(0x34, 1, insn->dType == TYPE_F32);
1567    emitCond4(0x30, insn->setCond);
1568    emitCC   (0x2f);
1569    emitABS  (0x2c, insn->src(1));
1570    emitNEG  (0x2b, insn->src(0));
1571    emitGPR  (0x08, insn->src(0));
1572    emitGPR  (0x00, insn->def(0));
1573 }
1574 
1575 void
emitFSETP()1576 CodeEmitterGM107::emitFSETP()
1577 {
1578    const CmpInstruction *insn = this->insn->asCmp();
1579 
1580    switch (insn->src(1).getFile()) {
1581    case FILE_GPR:
1582       emitInsn(0x5bb00000);
1583       emitGPR (0x14, insn->src(1));
1584       break;
1585    case FILE_MEMORY_CONST:
1586       emitInsn(0x4bb00000);
1587       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1588       break;
1589    case FILE_IMMEDIATE:
1590       emitInsn(0x36b00000);
1591       emitIMMD(0x14, 19, insn->src(1));
1592       break;
1593    default:
1594       assert(!"bad src1 file");
1595       break;
1596    }
1597 
1598    if (insn->op != OP_SET) {
1599       switch (insn->op) {
1600       case OP_SET_AND: emitField(0x2d, 2, 0); break;
1601       case OP_SET_OR : emitField(0x2d, 2, 1); break;
1602       case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1603       default:
1604          assert(!"invalid set op");
1605          break;
1606       }
1607       emitPRED(0x27, insn->src(2));
1608    } else {
1609       emitPRED(0x27);
1610    }
1611 
1612    emitCond4(0x30, insn->setCond);
1613    emitFMZ  (0x2f, 1);
1614    emitABS  (0x2c, insn->src(1));
1615    emitNEG  (0x2b, insn->src(0));
1616    emitGPR  (0x08, insn->src(0));
1617    emitABS  (0x07, insn->src(0));
1618    emitNEG  (0x06, insn->src(1));
1619    emitPRED (0x03, insn->def(0));
1620    if (insn->defExists(1))
1621       emitPRED(0x00, insn->def(1));
1622    else
1623       emitPRED(0x00);
1624 }
1625 
1626 void
emitFSWZADD()1627 CodeEmitterGM107::emitFSWZADD()
1628 {
1629    emitInsn (0x50f80000);
1630    emitCC   (0x2f);
1631    emitFMZ  (0x2c, 1);
1632    emitRND  (0x27);
1633    emitField(0x26, 1, insn->lanes); /* abused for .ndv */
1634    emitField(0x1c, 8, insn->subOp);
1635    if (insn->predSrc != 1)
1636       emitGPR  (0x14, insn->src(1));
1637    else
1638       emitGPR  (0x14);
1639    emitGPR  (0x08, insn->src(0));
1640    emitGPR  (0x00, insn->def(0));
1641 }
1642 
1643 /*******************************************************************************
1644  * integer
1645  ******************************************************************************/
1646 
1647 void
emitLOP()1648 CodeEmitterGM107::emitLOP()
1649 {
1650    int lop = 0;
1651 
1652    switch (insn->op) {
1653    case OP_AND: lop = 0; break;
1654    case OP_OR : lop = 1; break;
1655    case OP_XOR: lop = 2; break;
1656    default:
1657       assert(!"invalid lop");
1658       break;
1659    }
1660 
1661    if (insn->src(1).getFile() != FILE_IMMEDIATE) {
1662       switch (insn->src(1).getFile()) {
1663       case FILE_GPR:
1664          emitInsn(0x5c400000);
1665          emitGPR (0x14, insn->src(1));
1666          break;
1667       case FILE_MEMORY_CONST:
1668          emitInsn(0x4c400000);
1669          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1670          break;
1671       case FILE_IMMEDIATE:
1672          emitInsn(0x38400000);
1673          emitIMMD(0x14, 19, insn->src(1));
1674          break;
1675       default:
1676          assert(!"bad src1 file");
1677          break;
1678       }
1679       emitPRED (0x30);
1680       emitCC   (0x2f);
1681       emitX    (0x2b);
1682       emitField(0x29, 2, lop);
1683       emitINV  (0x28, insn->src(1));
1684       emitINV  (0x27, insn->src(0));
1685    } else {
1686       emitInsn (0x04000000);
1687       emitX    (0x39);
1688       emitINV  (0x38, insn->src(1));
1689       emitINV  (0x37, insn->src(0));
1690       emitField(0x35, 2, lop);
1691       emitCC   (0x34);
1692       emitIMMD (0x14, 32, insn->src(1));
1693    }
1694 
1695    emitGPR  (0x08, insn->src(0));
1696    emitGPR  (0x00, insn->def(0));
1697 }
1698 
1699 /* special-case of emitLOP(): lop pass_b dst 0 ~src */
1700 void
emitNOT()1701 CodeEmitterGM107::emitNOT()
1702 {
1703    if (!longIMMD(insn->src(0))) {
1704       switch (insn->src(0).getFile()) {
1705       case FILE_GPR:
1706          emitInsn(0x5c400700);
1707          emitGPR (0x14, insn->src(0));
1708          break;
1709       case FILE_MEMORY_CONST:
1710          emitInsn(0x4c400700);
1711          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
1712          break;
1713       case FILE_IMMEDIATE:
1714          emitInsn(0x38400700);
1715          emitIMMD(0x14, 19, insn->src(0));
1716          break;
1717       default:
1718          assert(!"bad src1 file");
1719          break;
1720       }
1721       emitPRED (0x30);
1722    } else {
1723       emitInsn (0x05600000);
1724       emitIMMD (0x14, 32, insn->src(1));
1725    }
1726 
1727    emitGPR(0x08);
1728    emitGPR(0x00, insn->def(0));
1729 }
1730 
1731 void
emitIADD()1732 CodeEmitterGM107::emitIADD()
1733 {
1734    if (insn->src(1).getFile() != FILE_IMMEDIATE) {
1735       switch (insn->src(1).getFile()) {
1736       case FILE_GPR:
1737          emitInsn(0x5c100000);
1738          emitGPR (0x14, insn->src(1));
1739          break;
1740       case FILE_MEMORY_CONST:
1741          emitInsn(0x4c100000);
1742          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1743          break;
1744       case FILE_IMMEDIATE:
1745          emitInsn(0x38100000);
1746          emitIMMD(0x14, 19, insn->src(1));
1747          break;
1748       default:
1749          assert(!"bad src1 file");
1750          break;
1751       }
1752       emitSAT(0x32);
1753       emitNEG(0x31, insn->src(0));
1754       emitNEG(0x30, insn->src(1));
1755       emitCC (0x2f);
1756       emitX  (0x2b);
1757    } else {
1758       emitInsn(0x1c000000);
1759       emitNEG (0x38, insn->src(0));
1760       emitSAT (0x36);
1761       emitX   (0x35);
1762       emitCC  (0x34);
1763       emitIMMD(0x14, 32, insn->src(1));
1764    }
1765 
1766    if (insn->op == OP_SUB)
1767       code[1] ^= 0x00010000;
1768 
1769    emitGPR(0x08, insn->src(0));
1770    emitGPR(0x00, insn->def(0));
1771 }
1772 
1773 void
emitIMUL()1774 CodeEmitterGM107::emitIMUL()
1775 {
1776    if (insn->src(1).getFile() != FILE_IMMEDIATE) {
1777       switch (insn->src(1).getFile()) {
1778       case FILE_GPR:
1779          emitInsn(0x5c380000);
1780          emitGPR (0x14, insn->src(1));
1781          break;
1782       case FILE_MEMORY_CONST:
1783          emitInsn(0x4c380000);
1784          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1785          break;
1786       case FILE_IMMEDIATE:
1787          emitInsn(0x38380000);
1788          emitIMMD(0x14, 19, insn->src(1));
1789          break;
1790       default:
1791          assert(!"bad src1 file");
1792          break;
1793       }
1794       emitCC   (0x2f);
1795       emitField(0x29, 1, isSignedType(insn->sType));
1796       emitField(0x28, 1, isSignedType(insn->dType));
1797       emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1798    } else {
1799       emitInsn (0x1f000000);
1800       emitField(0x37, 1, isSignedType(insn->sType));
1801       emitField(0x36, 1, isSignedType(insn->dType));
1802       emitField(0x35, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1803       emitCC   (0x34);
1804       emitIMMD (0x14, 32, insn->src(1));
1805    }
1806 
1807    emitGPR(0x08, insn->src(0));
1808    emitGPR(0x00, insn->def(0));
1809 }
1810 
1811 void
emitIMAD()1812 CodeEmitterGM107::emitIMAD()
1813 {
1814    /*XXX: imad32i exists, but not using it as third src overlaps dst */
1815    switch(insn->src(2).getFile()) {
1816    case FILE_GPR:
1817       switch (insn->src(1).getFile()) {
1818       case FILE_GPR:
1819          emitInsn(0x5a000000);
1820          emitGPR (0x14, insn->src(1));
1821          break;
1822       case FILE_MEMORY_CONST:
1823          emitInsn(0x4a000000);
1824          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1825          break;
1826       case FILE_IMMEDIATE:
1827          emitInsn(0x34000000);
1828          emitIMMD(0x14, 19, insn->src(1));
1829          break;
1830       default:
1831          assert(!"bad src1 file");
1832          break;
1833       }
1834       emitGPR (0x27, insn->src(2));
1835       break;
1836    case FILE_MEMORY_CONST:
1837       emitInsn(0x52000000);
1838       emitGPR (0x27, insn->src(1));
1839       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1840       break;
1841    default:
1842       assert(!"bad src2 file");
1843       break;
1844    }
1845 
1846    emitField(0x36, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1847    emitField(0x35, 1, isSignedType(insn->sType));
1848    emitNEG  (0x34, insn->src(2));
1849    emitNEG2 (0x33, insn->src(0), insn->src(1));
1850    emitSAT  (0x32);
1851    emitX    (0x31);
1852    emitField(0x30, 1, isSignedType(insn->dType));
1853    emitCC   (0x2f);
1854    emitGPR  (0x08, insn->src(0));
1855    emitGPR  (0x00, insn->def(0));
1856 }
1857 
1858 void
emitISCADD()1859 CodeEmitterGM107::emitISCADD()
1860 {
1861    assert(insn->src(1).get()->asImm());
1862 
1863    switch (insn->src(2).getFile()) {
1864    case FILE_GPR:
1865       emitInsn(0x5c180000);
1866       emitGPR (0x14, insn->src(2));
1867       break;
1868    case FILE_MEMORY_CONST:
1869       emitInsn(0x4c180000);
1870       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1871       break;
1872    case FILE_IMMEDIATE:
1873       emitInsn(0x38180000);
1874       emitIMMD(0x14, 19, insn->src(2));
1875       break;
1876    default:
1877       assert(!"bad src1 file");
1878       break;
1879    }
1880    emitNEG (0x31, insn->src(0));
1881    emitNEG (0x30, insn->src(2));
1882    emitCC  (0x2f);
1883    emitIMMD(0x27, 5, insn->src(1));
1884    emitGPR (0x08, insn->src(0));
1885    emitGPR (0x00, insn->def(0));
1886 }
1887 
1888 void
emitIMNMX()1889 CodeEmitterGM107::emitIMNMX()
1890 {
1891    switch (insn->src(1).getFile()) {
1892    case FILE_GPR:
1893       emitInsn(0x5c200000);
1894       emitGPR (0x14, insn->src(1));
1895       break;
1896    case FILE_MEMORY_CONST:
1897       emitInsn(0x4c200000);
1898       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1899       break;
1900    case FILE_IMMEDIATE:
1901       emitInsn(0x38200000);
1902       emitIMMD(0x14, 19, insn->src(1));
1903       break;
1904    default:
1905       assert(!"bad src1 file");
1906       break;
1907    }
1908 
1909    emitField(0x30, 1, isSignedType(insn->dType));
1910    emitCC   (0x2f);
1911    emitField(0x2b, 2, insn->subOp);
1912    emitField(0x2a, 1, insn->op == OP_MAX);
1913    emitPRED (0x27);
1914    emitGPR  (0x08, insn->src(0));
1915    emitGPR  (0x00, insn->def(0));
1916 }
1917 
1918 void
emitICMP()1919 CodeEmitterGM107::emitICMP()
1920 {
1921    const CmpInstruction *insn = this->insn->asCmp();
1922    CondCode cc = insn->setCond;
1923 
1924    if (insn->src(2).mod.neg())
1925       cc = reverseCondCode(cc);
1926 
1927    switch(insn->src(2).getFile()) {
1928    case FILE_GPR:
1929       switch (insn->src(1).getFile()) {
1930       case FILE_GPR:
1931          emitInsn(0x5b400000);
1932          emitGPR (0x14, insn->src(1));
1933          break;
1934       case FILE_MEMORY_CONST:
1935          emitInsn(0x4b400000);
1936          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1937          break;
1938       case FILE_IMMEDIATE:
1939          emitInsn(0x36400000);
1940          emitIMMD(0x14, 19, insn->src(1));
1941          break;
1942       default:
1943          assert(!"bad src1 file");
1944          break;
1945       }
1946       emitGPR (0x27, insn->src(2));
1947       break;
1948    case FILE_MEMORY_CONST:
1949       emitInsn(0x53400000);
1950       emitGPR (0x27, insn->src(1));
1951       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1952       break;
1953    default:
1954       assert(!"bad src2 file");
1955       break;
1956    }
1957 
1958    emitCond3(0x31, cc);
1959    emitField(0x30, 1, isSignedType(insn->sType));
1960    emitGPR  (0x08, insn->src(0));
1961    emitGPR  (0x00, insn->def(0));
1962 }
1963 
1964 void
emitISET()1965 CodeEmitterGM107::emitISET()
1966 {
1967    const CmpInstruction *insn = this->insn->asCmp();
1968 
1969    switch (insn->src(1).getFile()) {
1970    case FILE_GPR:
1971       emitInsn(0x5b500000);
1972       emitGPR (0x14, insn->src(1));
1973       break;
1974    case FILE_MEMORY_CONST:
1975       emitInsn(0x4b500000);
1976       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1977       break;
1978    case FILE_IMMEDIATE:
1979       emitInsn(0x36500000);
1980       emitIMMD(0x14, 19, insn->src(1));
1981       break;
1982    default:
1983       assert(!"bad src1 file");
1984       break;
1985    }
1986 
1987    if (insn->op != OP_SET) {
1988       switch (insn->op) {
1989       case OP_SET_AND: emitField(0x2d, 2, 0); break;
1990       case OP_SET_OR : emitField(0x2d, 2, 1); break;
1991       case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1992       default:
1993          assert(!"invalid set op");
1994          break;
1995       }
1996       emitPRED(0x27, insn->src(2));
1997    } else {
1998       emitPRED(0x27);
1999    }
2000 
2001    emitCond3(0x31, insn->setCond);
2002    emitField(0x30, 1, isSignedType(insn->sType));
2003    emitCC   (0x2f);
2004    emitField(0x2c, 1, insn->dType == TYPE_F32);
2005    emitX    (0x2b);
2006    emitGPR  (0x08, insn->src(0));
2007    emitGPR  (0x00, insn->def(0));
2008 }
2009 
2010 void
emitISETP()2011 CodeEmitterGM107::emitISETP()
2012 {
2013    const CmpInstruction *insn = this->insn->asCmp();
2014 
2015    switch (insn->src(1).getFile()) {
2016    case FILE_GPR:
2017       emitInsn(0x5b600000);
2018       emitGPR (0x14, insn->src(1));
2019       break;
2020    case FILE_MEMORY_CONST:
2021       emitInsn(0x4b600000);
2022       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2023       break;
2024    case FILE_IMMEDIATE:
2025       emitInsn(0x36600000);
2026       emitIMMD(0x14, 19, insn->src(1));
2027       break;
2028    default:
2029       assert(!"bad src1 file");
2030       break;
2031    }
2032 
2033    if (insn->op != OP_SET) {
2034       switch (insn->op) {
2035       case OP_SET_AND: emitField(0x2d, 2, 0); break;
2036       case OP_SET_OR : emitField(0x2d, 2, 1); break;
2037       case OP_SET_XOR: emitField(0x2d, 2, 2); break;
2038       default:
2039          assert(!"invalid set op");
2040          break;
2041       }
2042       emitPRED(0x27, insn->src(2));
2043    } else {
2044       emitPRED(0x27);
2045    }
2046 
2047    emitCond3(0x31, insn->setCond);
2048    emitField(0x30, 1, isSignedType(insn->sType));
2049    emitX    (0x2b);
2050    emitGPR  (0x08, insn->src(0));
2051    emitPRED (0x03, insn->def(0));
2052    if (insn->defExists(1))
2053       emitPRED(0x00, insn->def(1));
2054    else
2055       emitPRED(0x00);
2056 }
2057 
2058 void
emitSHL()2059 CodeEmitterGM107::emitSHL()
2060 {
2061    switch (insn->src(1).getFile()) {
2062    case FILE_GPR:
2063       emitInsn(0x5c480000);
2064       emitGPR (0x14, insn->src(1));
2065       break;
2066    case FILE_MEMORY_CONST:
2067       emitInsn(0x4c480000);
2068       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2069       break;
2070    case FILE_IMMEDIATE:
2071       emitInsn(0x38480000);
2072       emitIMMD(0x14, 19, insn->src(1));
2073       break;
2074    default:
2075       assert(!"bad src1 file");
2076       break;
2077    }
2078 
2079    emitCC   (0x2f);
2080    emitX    (0x2b);
2081    emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP);
2082    emitGPR  (0x08, insn->src(0));
2083    emitGPR  (0x00, insn->def(0));
2084 }
2085 
2086 void
emitSHR()2087 CodeEmitterGM107::emitSHR()
2088 {
2089    switch (insn->src(1).getFile()) {
2090    case FILE_GPR:
2091       emitInsn(0x5c280000);
2092       emitGPR (0x14, insn->src(1));
2093       break;
2094    case FILE_MEMORY_CONST:
2095       emitInsn(0x4c280000);
2096       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2097       break;
2098    case FILE_IMMEDIATE:
2099       emitInsn(0x38280000);
2100       emitIMMD(0x14, 19, insn->src(1));
2101       break;
2102    default:
2103       assert(!"bad src1 file");
2104       break;
2105    }
2106 
2107    emitField(0x30, 1, isSignedType(insn->dType));
2108    emitCC   (0x2f);
2109    emitX    (0x2c);
2110    emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP);
2111    emitGPR  (0x08, insn->src(0));
2112    emitGPR  (0x00, insn->def(0));
2113 }
2114 
2115 void
emitSHF()2116 CodeEmitterGM107::emitSHF()
2117 {
2118    unsigned type;
2119 
2120    switch (insn->src(1).getFile()) {
2121    case FILE_GPR:
2122       emitInsn(insn->op == OP_SHL ? 0x5bf80000 : 0x5cf80000);
2123       emitGPR(0x14, insn->src(1));
2124       break;
2125    case FILE_IMMEDIATE:
2126       emitInsn(insn->op == OP_SHL ? 0x36f80000 : 0x38f80000);
2127       emitIMMD(0x14, 19, insn->src(1));
2128       break;
2129    default:
2130       assert(!"bad src1 file");
2131       break;
2132    }
2133 
2134    switch (insn->sType) {
2135    case TYPE_U64:
2136       type = 2;
2137       break;
2138    case TYPE_S64:
2139       type = 3;
2140       break;
2141    default:
2142       type = 0;
2143       break;
2144    }
2145 
2146    emitField(0x32, 1, !!(insn->subOp & NV50_IR_SUBOP_SHIFT_WRAP));
2147    emitX    (0x31);
2148    emitField(0x30, 1, !!(insn->subOp & NV50_IR_SUBOP_SHIFT_HIGH));
2149    emitCC   (0x2f);
2150    emitGPR  (0x27, insn->src(2));
2151    emitField(0x25, 2, type);
2152    emitGPR  (0x08, insn->src(0));
2153    emitGPR  (0x00, insn->def(0));
2154 }
2155 
2156 void
emitPOPC()2157 CodeEmitterGM107::emitPOPC()
2158 {
2159    switch (insn->src(0).getFile()) {
2160    case FILE_GPR:
2161       emitInsn(0x5c080000);
2162       emitGPR (0x14, insn->src(0));
2163       break;
2164    case FILE_MEMORY_CONST:
2165       emitInsn(0x4c080000);
2166       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
2167       break;
2168    case FILE_IMMEDIATE:
2169       emitInsn(0x38080000);
2170       emitIMMD(0x14, 19, insn->src(0));
2171       break;
2172    default:
2173       assert(!"bad src1 file");
2174       break;
2175    }
2176 
2177    emitINV(0x28, insn->src(0));
2178    emitGPR(0x00, insn->def(0));
2179 }
2180 
2181 void
emitBFI()2182 CodeEmitterGM107::emitBFI()
2183 {
2184    switch(insn->src(2).getFile()) {
2185    case FILE_GPR:
2186       switch (insn->src(1).getFile()) {
2187       case FILE_GPR:
2188          emitInsn(0x5bf00000);
2189          emitGPR (0x14, insn->src(1));
2190          break;
2191       case FILE_MEMORY_CONST:
2192          emitInsn(0x4bf00000);
2193          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2194          break;
2195       case FILE_IMMEDIATE:
2196          emitInsn(0x36f00000);
2197          emitIMMD(0x14, 19, insn->src(1));
2198          break;
2199       default:
2200          assert(!"bad src1 file");
2201          break;
2202       }
2203       emitGPR (0x27, insn->src(2));
2204       break;
2205    case FILE_MEMORY_CONST:
2206       emitInsn(0x53f00000);
2207       emitGPR (0x27, insn->src(1));
2208       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
2209       break;
2210    default:
2211       assert(!"bad src2 file");
2212       break;
2213    }
2214 
2215    emitCC   (0x2f);
2216    emitGPR  (0x08, insn->src(0));
2217    emitGPR  (0x00, insn->def(0));
2218 }
2219 
2220 void
emitBFE()2221 CodeEmitterGM107::emitBFE()
2222 {
2223    switch (insn->src(1).getFile()) {
2224    case FILE_GPR:
2225       emitInsn(0x5c000000);
2226       emitGPR (0x14, insn->src(1));
2227       break;
2228    case FILE_MEMORY_CONST:
2229       emitInsn(0x4c000000);
2230       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2231       break;
2232    case FILE_IMMEDIATE:
2233       emitInsn(0x38000000);
2234       emitIMMD(0x14, 19, insn->src(1));
2235       break;
2236    default:
2237       assert(!"bad src1 file");
2238       break;
2239    }
2240 
2241    emitField(0x30, 1, isSignedType(insn->dType));
2242    emitCC   (0x2f);
2243    emitField(0x28, 1, insn->subOp == NV50_IR_SUBOP_EXTBF_REV);
2244    emitGPR  (0x08, insn->src(0));
2245    emitGPR  (0x00, insn->def(0));
2246 }
2247 
2248 void
emitFLO()2249 CodeEmitterGM107::emitFLO()
2250 {
2251    switch (insn->src(0).getFile()) {
2252    case FILE_GPR:
2253       emitInsn(0x5c300000);
2254       emitGPR (0x14, insn->src(0));
2255       break;
2256    case FILE_MEMORY_CONST:
2257       emitInsn(0x4c300000);
2258       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
2259       break;
2260    case FILE_IMMEDIATE:
2261       emitInsn(0x38300000);
2262       emitIMMD(0x14, 19, insn->src(0));
2263       break;
2264    default:
2265       assert(!"bad src1 file");
2266       break;
2267    }
2268 
2269    emitField(0x30, 1, isSignedType(insn->dType));
2270    emitCC   (0x2f);
2271    emitField(0x29, 1, insn->subOp == NV50_IR_SUBOP_BFIND_SAMT);
2272    emitINV  (0x28, insn->src(0));
2273    emitGPR  (0x00, insn->def(0));
2274 }
2275 
2276 /*******************************************************************************
2277  * memory
2278  ******************************************************************************/
2279 
2280 void
emitLDSTs(int pos,DataType type)2281 CodeEmitterGM107::emitLDSTs(int pos, DataType type)
2282 {
2283    int data = 0;
2284 
2285    switch (typeSizeof(type)) {
2286    case  1: data = isSignedType(type) ? 1 : 0; break;
2287    case  2: data = isSignedType(type) ? 3 : 2; break;
2288    case  4: data = 4; break;
2289    case  8: data = 5; break;
2290    case 16: data = 6; break;
2291    default:
2292       assert(!"bad type");
2293       break;
2294    }
2295 
2296    emitField(pos, 3, data);
2297 }
2298 
2299 void
emitLDSTc(int pos)2300 CodeEmitterGM107::emitLDSTc(int pos)
2301 {
2302    int mode = 0;
2303 
2304    switch (insn->cache) {
2305    case CACHE_CA: mode = 0; break;
2306    case CACHE_CG: mode = 1; break;
2307    case CACHE_CS: mode = 2; break;
2308    case CACHE_CV: mode = 3; break;
2309    default:
2310       assert(!"invalid caching mode");
2311       break;
2312    }
2313 
2314    emitField(pos, 2, mode);
2315 }
2316 
2317 void
emitLDC()2318 CodeEmitterGM107::emitLDC()
2319 {
2320    emitInsn (0xef900000);
2321    emitLDSTs(0x30, insn->dType);
2322    emitField(0x2c, 2, insn->subOp);
2323    emitCBUF (0x24, 0x08, 0x14, 16, 0, insn->src(0));
2324    emitGPR  (0x00, insn->def(0));
2325 }
2326 
2327 void
emitLDL()2328 CodeEmitterGM107::emitLDL()
2329 {
2330    emitInsn (0xef400000);
2331    emitLDSTs(0x30, insn->dType);
2332    emitLDSTc(0x2c);
2333    emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2334    emitGPR  (0x00, insn->def(0));
2335 }
2336 
2337 void
emitLDS()2338 CodeEmitterGM107::emitLDS()
2339 {
2340    emitInsn (0xef480000);
2341    emitLDSTs(0x30, insn->dType);
2342    emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2343    emitGPR  (0x00, insn->def(0));
2344 }
2345 
2346 void
emitLD()2347 CodeEmitterGM107::emitLD()
2348 {
2349    emitInsn (0x80000000);
2350    emitPRED (0x3a);
2351    emitLDSTc(0x38);
2352    emitLDSTs(0x35, insn->dType);
2353    emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2354    emitADDR (0x08, 0x14, 32, 0, insn->src(0));
2355    emitGPR  (0x00, insn->def(0));
2356 }
2357 
2358 void
emitSTL()2359 CodeEmitterGM107::emitSTL()
2360 {
2361    emitInsn (0xef500000);
2362    emitLDSTs(0x30, insn->dType);
2363    emitLDSTc(0x2c);
2364    emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2365    emitGPR  (0x00, insn->src(1));
2366 }
2367 
2368 void
emitSTS()2369 CodeEmitterGM107::emitSTS()
2370 {
2371    emitInsn (0xef580000);
2372    emitLDSTs(0x30, insn->dType);
2373    emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2374    emitGPR  (0x00, insn->src(1));
2375 }
2376 
2377 void
emitST()2378 CodeEmitterGM107::emitST()
2379 {
2380    emitInsn (0xa0000000);
2381    emitPRED (0x3a);
2382    emitLDSTc(0x38);
2383    emitLDSTs(0x35, insn->dType);
2384    emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2385    emitADDR (0x08, 0x14, 32, 0, insn->src(0));
2386    emitGPR  (0x00, insn->src(1));
2387 }
2388 
2389 void
emitALD()2390 CodeEmitterGM107::emitALD()
2391 {
2392    emitInsn (0xefd80000);
2393    emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1);
2394    emitGPR  (0x27, insn->src(0).getIndirect(1));
2395    emitO    (0x20);
2396    emitP    (0x1f);
2397    emitADDR (0x08, 20, 10, 0, insn->src(0));
2398    emitGPR  (0x00, insn->def(0));
2399 }
2400 
2401 void
emitAST()2402 CodeEmitterGM107::emitAST()
2403 {
2404    emitInsn (0xeff00000);
2405    emitField(0x2f, 2, (typeSizeof(insn->dType) / 4) - 1);
2406    emitGPR  (0x27, insn->src(0).getIndirect(1));
2407    emitP    (0x1f);
2408    emitADDR (0x08, 20, 10, 0, insn->src(0));
2409    emitGPR  (0x00, insn->src(1));
2410 }
2411 
2412 void
emitISBERD()2413 CodeEmitterGM107::emitISBERD()
2414 {
2415    emitInsn(0xefd00000);
2416    emitGPR (0x08, insn->src(0));
2417    emitGPR (0x00, insn->def(0));
2418 }
2419 
2420 void
emitAL2P()2421 CodeEmitterGM107::emitAL2P()
2422 {
2423    emitInsn (0xefa00000);
2424    emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1);
2425    emitPRED (0x2c);
2426    emitO    (0x20);
2427    emitField(0x14, 11, insn->src(0).get()->reg.data.offset);
2428    emitGPR  (0x08, insn->src(0).getIndirect(0));
2429    emitGPR  (0x00, insn->def(0));
2430 }
2431 
2432 static void
interpApply(const FixupEntry * entry,uint32_t * code,const FixupData & data)2433 interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)
2434 {
2435    int ipa = entry->ipa;
2436    int reg = entry->reg;
2437    int loc = entry->loc;
2438 
2439    if (data.flatshade &&
2440        (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
2441       ipa = NV50_IR_INTERP_FLAT;
2442       reg = 0xff;
2443    } else if (data.force_persample_interp &&
2444               (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
2445               (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
2446       ipa |= NV50_IR_INTERP_CENTROID;
2447    }
2448    code[loc + 1] &= ~(0xf << 0x14);
2449    code[loc + 1] |= (ipa & 0x3) << 0x16;
2450    code[loc + 1] |= (ipa & 0xc) << (0x14 - 2);
2451    code[loc + 0] &= ~(0xff << 0x14);
2452    code[loc + 0] |= reg << 0x14;
2453 }
2454 
2455 void
emitIPA()2456 CodeEmitterGM107::emitIPA()
2457 {
2458    int ipam = 0, ipas = 0;
2459 
2460    switch (insn->getInterpMode()) {
2461    case NV50_IR_INTERP_LINEAR     : ipam = 0; break;
2462    case NV50_IR_INTERP_PERSPECTIVE: ipam = 1; break;
2463    case NV50_IR_INTERP_FLAT       : ipam = 2; break;
2464    case NV50_IR_INTERP_SC         : ipam = 3; break;
2465    default:
2466       assert(!"invalid ipa mode");
2467       break;
2468    }
2469 
2470    switch (insn->getSampleMode()) {
2471    case NV50_IR_INTERP_DEFAULT : ipas = 0; break;
2472    case NV50_IR_INTERP_CENTROID: ipas = 1; break;
2473    case NV50_IR_INTERP_OFFSET  : ipas = 2; break;
2474    default:
2475       assert(!"invalid ipa sample mode");
2476       break;
2477    }
2478 
2479    emitInsn (0xe0000000);
2480    emitField(0x36, 2, ipam);
2481    emitField(0x34, 2, ipas);
2482    emitSAT  (0x33);
2483    emitField(0x2f, 3, 7);
2484    emitADDR (0x08, 0x1c, 10, 0, insn->src(0));
2485    if ((code[0] & 0x0000ff00) != 0x0000ff00)
2486       code[1] |= 0x00000040; /* .idx */
2487    emitGPR(0x00, insn->def(0));
2488 
2489    if (insn->op == OP_PINTERP) {
2490       emitGPR(0x14, insn->src(1));
2491       if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
2492          emitGPR(0x27, insn->src(2));
2493       addInterp(insn->ipa, insn->getSrc(1)->reg.data.id, interpApply);
2494    } else {
2495       if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
2496          emitGPR(0x27, insn->src(1));
2497       emitGPR(0x14);
2498       addInterp(insn->ipa, 0xff, interpApply);
2499    }
2500 
2501    if (insn->getSampleMode() != NV50_IR_INTERP_OFFSET)
2502       emitGPR(0x27);
2503 }
2504 
2505 void
emitATOM()2506 CodeEmitterGM107::emitATOM()
2507 {
2508    unsigned dType, subOp;
2509 
2510    if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
2511       switch (insn->dType) {
2512       case TYPE_U32: dType = 0; break;
2513       case TYPE_U64: dType = 1; break;
2514       default: assert(!"unexpected dType"); dType = 0; break;
2515       }
2516       subOp = 15;
2517 
2518       emitInsn (0xee000000);
2519    } else {
2520       switch (insn->dType) {
2521       case TYPE_U32: dType = 0; break;
2522       case TYPE_S32: dType = 1; break;
2523       case TYPE_U64: dType = 2; break;
2524       case TYPE_F32: dType = 3; break;
2525       case TYPE_B128: dType = 4; break;
2526       case TYPE_S64: dType = 5; break;
2527       default: assert(!"unexpected dType"); dType = 0; break;
2528       }
2529       if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
2530          subOp = 8;
2531       else
2532          subOp = insn->subOp;
2533 
2534       emitInsn (0xed000000);
2535    }
2536 
2537    emitField(0x34, 4, subOp);
2538    emitField(0x31, 3, dType);
2539    emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2540    emitGPR  (0x14, insn->src(1));
2541    emitADDR (0x08, 0x1c, 20, 0, insn->src(0));
2542    emitGPR  (0x00, insn->def(0));
2543 }
2544 
2545 void
emitATOMS()2546 CodeEmitterGM107::emitATOMS()
2547 {
2548    unsigned dType, subOp;
2549 
2550    if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
2551       switch (insn->dType) {
2552       case TYPE_U32: dType = 0; break;
2553       case TYPE_U64: dType = 1; break;
2554       default: assert(!"unexpected dType"); dType = 0; break;
2555       }
2556       subOp = 4;
2557 
2558       emitInsn (0xee000000);
2559       emitField(0x34, 1, dType);
2560    } else {
2561       switch (insn->dType) {
2562       case TYPE_U32: dType = 0; break;
2563       case TYPE_S32: dType = 1; break;
2564       case TYPE_U64: dType = 2; break;
2565       case TYPE_S64: dType = 3; break;
2566       default: assert(!"unexpected dType"); dType = 0; break;
2567       }
2568 
2569       if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
2570          subOp = 8;
2571       else
2572          subOp = insn->subOp;
2573 
2574       emitInsn (0xec000000);
2575       emitField(0x1c, 3, dType);
2576    }
2577 
2578    emitField(0x34, 4, subOp);
2579    emitGPR  (0x14, insn->src(1));
2580    emitADDR (0x08, 0x1e, 22, 2, insn->src(0));
2581    emitGPR  (0x00, insn->def(0));
2582 }
2583 
2584 void
emitRED()2585 CodeEmitterGM107::emitRED()
2586 {
2587    unsigned dType;
2588 
2589    switch (insn->dType) {
2590    case TYPE_U32: dType = 0; break;
2591    case TYPE_S32: dType = 1; break;
2592    case TYPE_U64: dType = 2; break;
2593    case TYPE_F32: dType = 3; break;
2594    case TYPE_B128: dType = 4; break;
2595    case TYPE_S64: dType = 5; break;
2596    default: assert(!"unexpected dType"); dType = 0; break;
2597    }
2598 
2599    emitInsn (0xebf80000);
2600    emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2601    emitField(0x17, 3, insn->subOp);
2602    emitField(0x14, 3, dType);
2603    emitADDR (0x08, 0x1c, 20, 0, insn->src(0));
2604    emitGPR  (0x00, insn->src(1));
2605 }
2606 
2607 void
emitCCTL()2608 CodeEmitterGM107::emitCCTL()
2609 {
2610    unsigned width;
2611    if (insn->src(0).getFile() == FILE_MEMORY_GLOBAL) {
2612       emitInsn(0xef600000);
2613       width = 30;
2614    } else {
2615       emitInsn(0xef800000);
2616       width = 22;
2617    }
2618    emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2619    emitADDR (0x08, 0x16, width, 2, insn->src(0));
2620    emitField(0x00, 4, insn->subOp);
2621 }
2622 
2623 /*******************************************************************************
2624  * surface
2625  ******************************************************************************/
2626 
2627 void
emitPIXLD()2628 CodeEmitterGM107::emitPIXLD()
2629 {
2630    emitInsn (0xefe80000);
2631    emitPRED (0x2d);
2632    emitField(0x1f, 3, insn->subOp);
2633    emitGPR  (0x08, insn->src(0));
2634    emitGPR  (0x00, insn->def(0));
2635 }
2636 
2637 /*******************************************************************************
2638  * texture
2639  ******************************************************************************/
2640 
2641 void
emitTEXs(int pos)2642 CodeEmitterGM107::emitTEXs(int pos)
2643 {
2644    int src1 = insn->predSrc == 1 ? 2 : 1;
2645    if (insn->srcExists(src1))
2646       emitGPR(pos, insn->src(src1));
2647    else
2648       emitGPR(pos);
2649 }
2650 
2651 void
emitTEX()2652 CodeEmitterGM107::emitTEX()
2653 {
2654    const TexInstruction *insn = this->insn->asTex();
2655    int lodm = 0;
2656 
2657    if (!insn->tex.levelZero) {
2658       switch (insn->op) {
2659       case OP_TEX: lodm = 0; break;
2660       case OP_TXB: lodm = 2; break;
2661       case OP_TXL: lodm = 3; break;
2662       default:
2663          assert(!"invalid tex op");
2664          break;
2665       }
2666    } else {
2667       lodm = 1;
2668    }
2669 
2670    if (insn->tex.rIndirectSrc >= 0) {
2671       emitInsn (0xdeb80000);
2672       emitField(0x25, 2, lodm);
2673       emitField(0x24, 1, insn->tex.useOffsets == 1);
2674    } else {
2675       emitInsn (0xc0380000);
2676       emitField(0x37, 2, lodm);
2677       emitField(0x36, 1, insn->tex.useOffsets == 1);
2678       emitField(0x24, 13, insn->tex.r);
2679    }
2680 
2681    emitField(0x32, 1, insn->tex.target.isShadow());
2682    emitField(0x31, 1, insn->tex.liveOnly);
2683    emitField(0x23, 1, insn->tex.derivAll);
2684    emitField(0x1f, 4, insn->tex.mask);
2685    emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2686                       insn->tex.target.getDim() - 1);
2687    emitField(0x1c, 1, insn->tex.target.isArray());
2688    emitTEXs (0x14);
2689    emitGPR  (0x08, insn->src(0));
2690    emitGPR  (0x00, insn->def(0));
2691 }
2692 
2693 void
emitTLD()2694 CodeEmitterGM107::emitTLD()
2695 {
2696    const TexInstruction *insn = this->insn->asTex();
2697 
2698    if (insn->tex.rIndirectSrc >= 0) {
2699       emitInsn (0xdd380000);
2700    } else {
2701       emitInsn (0xdc380000);
2702       emitField(0x24, 13, insn->tex.r);
2703    }
2704 
2705    emitField(0x37, 1, insn->tex.levelZero == 0);
2706    emitField(0x32, 1, insn->tex.target.isMS());
2707    emitField(0x31, 1, insn->tex.liveOnly);
2708    emitField(0x23, 1, insn->tex.useOffsets == 1);
2709    emitField(0x1f, 4, insn->tex.mask);
2710    emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2711                       insn->tex.target.getDim() - 1);
2712    emitField(0x1c, 1, insn->tex.target.isArray());
2713    emitTEXs (0x14);
2714    emitGPR  (0x08, insn->src(0));
2715    emitGPR  (0x00, insn->def(0));
2716 }
2717 
2718 void
emitTLD4()2719 CodeEmitterGM107::emitTLD4()
2720 {
2721    const TexInstruction *insn = this->insn->asTex();
2722 
2723    if (insn->tex.rIndirectSrc >= 0) {
2724       emitInsn (0xdef80000);
2725       emitField(0x26, 2, insn->tex.gatherComp);
2726       emitField(0x25, 2, insn->tex.useOffsets == 4);
2727       emitField(0x24, 2, insn->tex.useOffsets == 1);
2728    } else {
2729       emitInsn (0xc8380000);
2730       emitField(0x38, 2, insn->tex.gatherComp);
2731       emitField(0x37, 2, insn->tex.useOffsets == 4);
2732       emitField(0x36, 2, insn->tex.useOffsets == 1);
2733       emitField(0x24, 13, insn->tex.r);
2734    }
2735 
2736    emitField(0x32, 1, insn->tex.target.isShadow());
2737    emitField(0x31, 1, insn->tex.liveOnly);
2738    emitField(0x23, 1, insn->tex.derivAll);
2739    emitField(0x1f, 4, insn->tex.mask);
2740    emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2741                       insn->tex.target.getDim() - 1);
2742    emitField(0x1c, 1, insn->tex.target.isArray());
2743    emitTEXs (0x14);
2744    emitGPR  (0x08, insn->src(0));
2745    emitGPR  (0x00, insn->def(0));
2746 }
2747 
2748 void
emitTXD()2749 CodeEmitterGM107::emitTXD()
2750 {
2751    const TexInstruction *insn = this->insn->asTex();
2752 
2753    if (insn->tex.rIndirectSrc >= 0) {
2754       emitInsn (0xde780000);
2755    } else {
2756       emitInsn (0xde380000);
2757       emitField(0x24, 13, insn->tex.r);
2758    }
2759 
2760    emitField(0x31, 1, insn->tex.liveOnly);
2761    emitField(0x23, 1, insn->tex.useOffsets == 1);
2762    emitField(0x1f, 4, insn->tex.mask);
2763    emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2764                       insn->tex.target.getDim() - 1);
2765    emitField(0x1c, 1, insn->tex.target.isArray());
2766    emitTEXs (0x14);
2767    emitGPR  (0x08, insn->src(0));
2768    emitGPR  (0x00, insn->def(0));
2769 }
2770 
2771 void
emitTMML()2772 CodeEmitterGM107::emitTMML()
2773 {
2774    const TexInstruction *insn = this->insn->asTex();
2775 
2776    if (insn->tex.rIndirectSrc >= 0) {
2777       emitInsn (0xdf600000);
2778    } else {
2779       emitInsn (0xdf580000);
2780       emitField(0x24, 13, insn->tex.r);
2781    }
2782 
2783    emitField(0x31, 1, insn->tex.liveOnly);
2784    emitField(0x23, 1, insn->tex.derivAll);
2785    emitField(0x1f, 4, insn->tex.mask);
2786    emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2787                       insn->tex.target.getDim() - 1);
2788    emitField(0x1c, 1, insn->tex.target.isArray());
2789    emitTEXs (0x14);
2790    emitGPR  (0x08, insn->src(0));
2791    emitGPR  (0x00, insn->def(0));
2792 }
2793 
2794 void
emitTXQ()2795 CodeEmitterGM107::emitTXQ()
2796 {
2797    const TexInstruction *insn = this->insn->asTex();
2798    int type = 0;
2799 
2800    switch (insn->tex.query) {
2801    case TXQ_DIMS           : type = 0x01; break;
2802    case TXQ_TYPE           : type = 0x02; break;
2803    case TXQ_SAMPLE_POSITION: type = 0x05; break;
2804    case TXQ_FILTER         : type = 0x10; break;
2805    case TXQ_LOD            : type = 0x12; break;
2806    case TXQ_WRAP           : type = 0x14; break;
2807    case TXQ_BORDER_COLOUR  : type = 0x16; break;
2808    default:
2809       assert(!"invalid txq query");
2810       break;
2811    }
2812 
2813    if (insn->tex.rIndirectSrc >= 0) {
2814       emitInsn (0xdf500000);
2815    } else {
2816       emitInsn (0xdf480000);
2817       emitField(0x24, 13, insn->tex.r);
2818    }
2819 
2820    emitField(0x31, 1, insn->tex.liveOnly);
2821    emitField(0x1f, 4, insn->tex.mask);
2822    emitField(0x16, 6, type);
2823    emitGPR  (0x08, insn->src(0));
2824    emitGPR  (0x00, insn->def(0));
2825 }
2826 
2827 void
emitDEPBAR()2828 CodeEmitterGM107::emitDEPBAR()
2829 {
2830    emitInsn (0xf0f00000);
2831    emitField(0x1d, 1, 1); /* le */
2832    emitField(0x1a, 3, 5);
2833    emitField(0x14, 6, insn->subOp);
2834    emitField(0x00, 6, insn->subOp);
2835 }
2836 
2837 /*******************************************************************************
2838  * misc
2839  ******************************************************************************/
2840 
2841 void
emitNOP()2842 CodeEmitterGM107::emitNOP()
2843 {
2844    emitInsn(0x50b00000);
2845 }
2846 
2847 void
emitKIL()2848 CodeEmitterGM107::emitKIL()
2849 {
2850    emitInsn (0xe3300000);
2851    emitCond5(0x00, CC_TR);
2852 }
2853 
2854 void
emitOUT()2855 CodeEmitterGM107::emitOUT()
2856 {
2857    const int cut  = insn->op == OP_RESTART || insn->subOp;
2858    const int emit = insn->op == OP_EMIT;
2859 
2860    switch (insn->src(1).getFile()) {
2861    case FILE_GPR:
2862       emitInsn(0xfbe00000);
2863       emitGPR (0x14, insn->src(1));
2864       break;
2865    case FILE_IMMEDIATE:
2866       emitInsn(0xf6e00000);
2867       emitIMMD(0x14, 19, insn->src(1));
2868       break;
2869    case FILE_MEMORY_CONST:
2870       emitInsn(0xebe00000);
2871       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2872       break;
2873    default:
2874       assert(!"bad src1 file");
2875       break;
2876    }
2877 
2878    emitField(0x27, 2, (cut << 1) | emit);
2879    emitGPR  (0x08, insn->src(0));
2880    emitGPR  (0x00, insn->def(0));
2881 }
2882 
2883 void
emitBAR()2884 CodeEmitterGM107::emitBAR()
2885 {
2886    uint8_t subop;
2887 
2888    emitInsn (0xf0a80000);
2889 
2890    switch (insn->subOp) {
2891    case NV50_IR_SUBOP_BAR_RED_POPC: subop = 0x02; break;
2892    case NV50_IR_SUBOP_BAR_RED_AND:  subop = 0x0a; break;
2893    case NV50_IR_SUBOP_BAR_RED_OR:   subop = 0x12; break;
2894    case NV50_IR_SUBOP_BAR_ARRIVE:   subop = 0x81; break;
2895    default:
2896       subop = 0x80;
2897       assert(insn->subOp == NV50_IR_SUBOP_BAR_SYNC);
2898       break;
2899    }
2900 
2901    emitField(0x20, 8, subop);
2902 
2903    // barrier id
2904    if (insn->src(0).getFile() == FILE_GPR) {
2905       emitGPR(0x08, insn->src(0));
2906    } else {
2907       ImmediateValue *imm = insn->getSrc(0)->asImm();
2908       assert(imm);
2909       emitField(0x08, 8, imm->reg.data.u32);
2910       emitField(0x2b, 1, 1);
2911    }
2912 
2913    // thread count
2914    if (insn->src(1).getFile() == FILE_GPR) {
2915       emitGPR(0x14, insn->src(1));
2916    } else {
2917       ImmediateValue *imm = insn->getSrc(0)->asImm();
2918       assert(imm);
2919       emitField(0x14, 12, imm->reg.data.u32);
2920       emitField(0x2c, 1, 1);
2921    }
2922 
2923    if (insn->srcExists(2) && (insn->predSrc != 2)) {
2924       emitPRED (0x27, insn->src(2));
2925       emitField(0x2a, 1, insn->src(2).mod == Modifier(NV50_IR_MOD_NOT));
2926    } else {
2927       emitField(0x27, 3, 7);
2928    }
2929 }
2930 
2931 void
emitMEMBAR()2932 CodeEmitterGM107::emitMEMBAR()
2933 {
2934    emitInsn (0xef980000);
2935    emitField(0x08, 2, insn->subOp >> 2);
2936 }
2937 
2938 void
emitVOTE()2939 CodeEmitterGM107::emitVOTE()
2940 {
2941    const ImmediateValue *imm;
2942    uint32_t u32;
2943 
2944    int r = -1, p = -1;
2945    for (int i = 0; insn->defExists(i); i++) {
2946       if (insn->def(i).getFile() == FILE_GPR)
2947          r = i;
2948       else if (insn->def(i).getFile() == FILE_PREDICATE)
2949          p = i;
2950    }
2951 
2952    emitInsn (0x50d80000);
2953    emitField(0x30, 2, insn->subOp);
2954    if (r >= 0)
2955       emitGPR  (0x00, insn->def(r));
2956    else
2957       emitGPR  (0x00);
2958    if (p >= 0)
2959       emitPRED (0x2d, insn->def(p));
2960    else
2961       emitPRED (0x2d);
2962 
2963    switch (insn->src(0).getFile()) {
2964    case FILE_PREDICATE:
2965       emitField(0x2a, 1, insn->src(0).mod == Modifier(NV50_IR_MOD_NOT));
2966       emitPRED (0x27, insn->src(0));
2967       break;
2968    case FILE_IMMEDIATE:
2969       imm = insn->getSrc(0)->asImm();
2970       assert(imm);
2971       u32 = imm->reg.data.u32;
2972       assert(u32 == 0 || u32 == 1);
2973       emitPRED(0x27);
2974       emitField(0x2a, 1, u32 == 0);
2975       break;
2976    default:
2977       assert(!"Unhandled src");
2978       break;
2979    }
2980 }
2981 
2982 void
emitSUTarget()2983 CodeEmitterGM107::emitSUTarget()
2984 {
2985    const TexInstruction *insn = this->insn->asTex();
2986    int target = 0;
2987 
2988    assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
2989 
2990    if (insn->tex.target == TEX_TARGET_BUFFER) {
2991       target = 2;
2992    } else if (insn->tex.target == TEX_TARGET_1D_ARRAY) {
2993       target = 4;
2994    } else if (insn->tex.target == TEX_TARGET_2D ||
2995               insn->tex.target == TEX_TARGET_RECT) {
2996       target = 6;
2997    } else if (insn->tex.target == TEX_TARGET_2D_ARRAY ||
2998               insn->tex.target == TEX_TARGET_CUBE ||
2999               insn->tex.target == TEX_TARGET_CUBE_ARRAY) {
3000       target = 8;
3001    } else if (insn->tex.target == TEX_TARGET_3D) {
3002       target = 10;
3003    } else {
3004       assert(insn->tex.target == TEX_TARGET_1D);
3005    }
3006    emitField(0x20, 4, target);
3007 }
3008 
3009 void
emitSUHandle(const int s)3010 CodeEmitterGM107::emitSUHandle(const int s)
3011 {
3012    const TexInstruction *insn = this->insn->asTex();
3013 
3014    assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
3015 
3016    if (insn->src(s).getFile() == FILE_GPR) {
3017       emitGPR(0x27, insn->src(s));
3018    } else {
3019       ImmediateValue *imm = insn->getSrc(s)->asImm();
3020       assert(imm);
3021       emitField(0x33, 1, 1);
3022       emitField(0x24, 13, imm->reg.data.u32);
3023    }
3024 }
3025 
3026 void
emitSUSTx()3027 CodeEmitterGM107::emitSUSTx()
3028 {
3029    const TexInstruction *insn = this->insn->asTex();
3030 
3031    emitInsn(0xeb200000);
3032    if (insn->op == OP_SUSTB)
3033       emitField(0x34, 1, 1);
3034    emitSUTarget();
3035 
3036    emitLDSTc(0x18);
3037    emitField(0x14, 4, 0xf); // rgba
3038    emitGPR  (0x08, insn->src(0));
3039    emitGPR  (0x00, insn->src(1));
3040 
3041    emitSUHandle(2);
3042 }
3043 
3044 void
emitSULDx()3045 CodeEmitterGM107::emitSULDx()
3046 {
3047    const TexInstruction *insn = this->insn->asTex();
3048    int type = 0;
3049 
3050    emitInsn(0xeb000000);
3051    if (insn->op == OP_SULDB)
3052       emitField(0x34, 1, 1);
3053    emitSUTarget();
3054 
3055    switch (insn->dType) {
3056    case TYPE_S8:   type = 1; break;
3057    case TYPE_U16:  type = 2; break;
3058    case TYPE_S16:  type = 3; break;
3059    case TYPE_U32:  type = 4; break;
3060    case TYPE_U64:  type = 5; break;
3061    case TYPE_B128: type = 6; break;
3062    default:
3063       assert(insn->dType == TYPE_U8);
3064       break;
3065    }
3066    emitLDSTc(0x18);
3067    emitField(0x14, 3, type);
3068    emitGPR  (0x00, insn->def(0));
3069    emitGPR  (0x08, insn->src(0));
3070 
3071    emitSUHandle(1);
3072 }
3073 
3074 void
emitSUREDx()3075 CodeEmitterGM107::emitSUREDx()
3076 {
3077    const TexInstruction *insn = this->insn->asTex();
3078    uint8_t type = 0, subOp;
3079 
3080    if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS)
3081       emitInsn(0xeac00000);
3082    else
3083       emitInsn(0xea600000);
3084 
3085    if (insn->op == OP_SUREDB)
3086       emitField(0x34, 1, 1);
3087    emitSUTarget();
3088 
3089    // destination type
3090    switch (insn->dType) {
3091    case TYPE_S32: type = 1; break;
3092    case TYPE_U64: type = 2; break;
3093    case TYPE_F32: type = 3; break;
3094    case TYPE_S64: type = 5; break;
3095    default:
3096       assert(insn->dType == TYPE_U32);
3097       break;
3098    }
3099 
3100    // atomic operation
3101    if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
3102       subOp = 0;
3103    } else if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH) {
3104       subOp = 8;
3105    } else {
3106       subOp = insn->subOp;
3107    }
3108 
3109    emitField(0x24, 3, type);
3110    emitField(0x1d, 4, subOp);
3111    emitGPR  (0x14, insn->src(1));
3112    emitGPR  (0x08, insn->src(0));
3113    emitGPR  (0x00, insn->def(0));
3114 
3115    emitSUHandle(2);
3116 }
3117 
3118 /*******************************************************************************
3119  * assembler front-end
3120  ******************************************************************************/
3121 
3122 bool
emitInstruction(Instruction * i)3123 CodeEmitterGM107::emitInstruction(Instruction *i)
3124 {
3125    const unsigned int size = (writeIssueDelays && !(codeSize & 0x1f)) ? 16 : 8;
3126    bool ret = true;
3127 
3128    insn = i;
3129 
3130    if (insn->encSize != 8) {
3131       ERROR("skipping undecodable instruction: "); insn->print();
3132       return false;
3133    } else
3134    if (codeSize + size > codeSizeLimit) {
3135       ERROR("code emitter output buffer too small\n");
3136       return false;
3137    }
3138 
3139    if (writeIssueDelays) {
3140       int n = ((codeSize & 0x1f) / 8) - 1;
3141       if (n < 0) {
3142          data = code;
3143          data[0] = 0x00000000;
3144          data[1] = 0x00000000;
3145          code += 2;
3146          codeSize += 8;
3147          n++;
3148       }
3149 
3150       emitField(data, n * 21, 21, insn->sched);
3151    }
3152 
3153    switch (insn->op) {
3154    case OP_EXIT:
3155       emitEXIT();
3156       break;
3157    case OP_BRA:
3158       emitBRA();
3159       break;
3160    case OP_CALL:
3161       emitCAL();
3162       break;
3163    case OP_PRECONT:
3164       emitPCNT();
3165       break;
3166    case OP_CONT:
3167       emitCONT();
3168       break;
3169    case OP_PREBREAK:
3170       emitPBK();
3171       break;
3172    case OP_BREAK:
3173       emitBRK();
3174       break;
3175    case OP_PRERET:
3176       emitPRET();
3177       break;
3178    case OP_RET:
3179       emitRET();
3180       break;
3181    case OP_JOINAT:
3182       emitSSY();
3183       break;
3184    case OP_JOIN:
3185       emitSYNC();
3186       break;
3187    case OP_QUADON:
3188       emitSAM();
3189       break;
3190    case OP_QUADPOP:
3191       emitRAM();
3192       break;
3193    case OP_MOV:
3194       emitMOV();
3195       break;
3196    case OP_RDSV:
3197       emitS2R();
3198       break;
3199    case OP_ABS:
3200    case OP_NEG:
3201    case OP_SAT:
3202    case OP_FLOOR:
3203    case OP_CEIL:
3204    case OP_TRUNC:
3205    case OP_CVT:
3206       if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE ||
3207                                  insn->src(0).getFile() == FILE_PREDICATE)) {
3208          emitMOV();
3209       } else if (isFloatType(insn->dType)) {
3210          if (isFloatType(insn->sType))
3211             emitF2F();
3212          else
3213             emitI2F();
3214       } else {
3215          if (isFloatType(insn->sType))
3216             emitF2I();
3217          else
3218             emitI2I();
3219       }
3220       break;
3221    case OP_SHFL:
3222       emitSHFL();
3223       break;
3224    case OP_ADD:
3225    case OP_SUB:
3226       if (isFloatType(insn->dType)) {
3227          if (insn->dType == TYPE_F64)
3228             emitDADD();
3229          else
3230             emitFADD();
3231       } else {
3232          emitIADD();
3233       }
3234       break;
3235    case OP_MUL:
3236       if (isFloatType(insn->dType)) {
3237          if (insn->dType == TYPE_F64)
3238             emitDMUL();
3239          else
3240             emitFMUL();
3241       } else {
3242          emitIMUL();
3243       }
3244       break;
3245    case OP_MAD:
3246    case OP_FMA:
3247       if (isFloatType(insn->dType)) {
3248          if (insn->dType == TYPE_F64)
3249             emitDFMA();
3250          else
3251             emitFFMA();
3252       } else {
3253          emitIMAD();
3254       }
3255       break;
3256    case OP_SHLADD:
3257       emitISCADD();
3258       break;
3259    case OP_MIN:
3260    case OP_MAX:
3261       if (isFloatType(insn->dType)) {
3262          if (insn->dType == TYPE_F64)
3263             emitDMNMX();
3264          else
3265             emitFMNMX();
3266       } else {
3267          emitIMNMX();
3268       }
3269       break;
3270    case OP_SHL:
3271       if (typeSizeof(insn->sType) == 8)
3272          emitSHF();
3273       else
3274          emitSHL();
3275       break;
3276    case OP_SHR:
3277       if (typeSizeof(insn->sType) == 8)
3278          emitSHF();
3279       else
3280          emitSHR();
3281       break;
3282    case OP_POPCNT:
3283       emitPOPC();
3284       break;
3285    case OP_INSBF:
3286       emitBFI();
3287       break;
3288    case OP_EXTBF:
3289       emitBFE();
3290       break;
3291    case OP_BFIND:
3292       emitFLO();
3293       break;
3294    case OP_SLCT:
3295       if (isFloatType(insn->dType))
3296          emitFCMP();
3297       else
3298          emitICMP();
3299       break;
3300    case OP_SET:
3301    case OP_SET_AND:
3302    case OP_SET_OR:
3303    case OP_SET_XOR:
3304       if (insn->def(0).getFile() != FILE_PREDICATE) {
3305          if (isFloatType(insn->sType))
3306             if (insn->sType == TYPE_F64)
3307                emitDSET();
3308             else
3309                emitFSET();
3310          else
3311             emitISET();
3312       } else {
3313          if (isFloatType(insn->sType))
3314             if (insn->sType == TYPE_F64)
3315                emitDSETP();
3316             else
3317                emitFSETP();
3318          else
3319             emitISETP();
3320       }
3321       break;
3322    case OP_SELP:
3323       emitSEL();
3324       break;
3325    case OP_PRESIN:
3326    case OP_PREEX2:
3327       emitRRO();
3328       break;
3329    case OP_COS:
3330    case OP_SIN:
3331    case OP_EX2:
3332    case OP_LG2:
3333    case OP_RCP:
3334    case OP_RSQ:
3335       emitMUFU();
3336       break;
3337    case OP_AND:
3338    case OP_OR:
3339    case OP_XOR:
3340       emitLOP();
3341       break;
3342    case OP_NOT:
3343       emitNOT();
3344       break;
3345    case OP_LOAD:
3346       switch (insn->src(0).getFile()) {
3347       case FILE_MEMORY_CONST : emitLDC(); break;
3348       case FILE_MEMORY_LOCAL : emitLDL(); break;
3349       case FILE_MEMORY_SHARED: emitLDS(); break;
3350       case FILE_MEMORY_GLOBAL: emitLD(); break;
3351       default:
3352          assert(!"invalid load");
3353          emitNOP();
3354          break;
3355       }
3356       break;
3357    case OP_STORE:
3358       switch (insn->src(0).getFile()) {
3359       case FILE_MEMORY_LOCAL : emitSTL(); break;
3360       case FILE_MEMORY_SHARED: emitSTS(); break;
3361       case FILE_MEMORY_GLOBAL: emitST(); break;
3362       default:
3363          assert(!"invalid store");
3364          emitNOP();
3365          break;
3366       }
3367       break;
3368    case OP_ATOM:
3369       if (insn->src(0).getFile() == FILE_MEMORY_SHARED)
3370          emitATOMS();
3371       else
3372          if (!insn->defExists(0) && insn->subOp < NV50_IR_SUBOP_ATOM_CAS)
3373             emitRED();
3374          else
3375             emitATOM();
3376       break;
3377    case OP_CCTL:
3378       emitCCTL();
3379       break;
3380    case OP_VFETCH:
3381       emitALD();
3382       break;
3383    case OP_EXPORT:
3384       emitAST();
3385       break;
3386    case OP_PFETCH:
3387       emitISBERD();
3388       break;
3389    case OP_AFETCH:
3390       emitAL2P();
3391       break;
3392    case OP_LINTERP:
3393    case OP_PINTERP:
3394       emitIPA();
3395       break;
3396    case OP_PIXLD:
3397       emitPIXLD();
3398       break;
3399    case OP_TEX:
3400    case OP_TXB:
3401    case OP_TXL:
3402       emitTEX();
3403       break;
3404    case OP_TXF:
3405       emitTLD();
3406       break;
3407    case OP_TXG:
3408       emitTLD4();
3409       break;
3410    case OP_TXD:
3411       emitTXD();
3412       break;
3413    case OP_TXQ:
3414       emitTXQ();
3415       break;
3416    case OP_TXLQ:
3417       emitTMML();
3418       break;
3419    case OP_TEXBAR:
3420       emitDEPBAR();
3421       break;
3422    case OP_QUADOP:
3423       emitFSWZADD();
3424       break;
3425    case OP_NOP:
3426       emitNOP();
3427       break;
3428    case OP_DISCARD:
3429       emitKIL();
3430       break;
3431    case OP_EMIT:
3432    case OP_RESTART:
3433       emitOUT();
3434       break;
3435    case OP_BAR:
3436       emitBAR();
3437       break;
3438    case OP_MEMBAR:
3439       emitMEMBAR();
3440       break;
3441    case OP_VOTE:
3442       emitVOTE();
3443       break;
3444    case OP_SUSTB:
3445    case OP_SUSTP:
3446       emitSUSTx();
3447       break;
3448    case OP_SULDB:
3449    case OP_SULDP:
3450       emitSULDx();
3451       break;
3452    case OP_SUREDB:
3453    case OP_SUREDP:
3454       emitSUREDx();
3455       break;
3456    default:
3457       assert(!"invalid opcode");
3458       emitNOP();
3459       ret = false;
3460       break;
3461    }
3462 
3463    if (insn->join) {
3464       /*XXX*/
3465    }
3466 
3467    code += 2;
3468    codeSize += 8;
3469    return ret;
3470 }
3471 
3472 uint32_t
getMinEncodingSize(const Instruction * i) const3473 CodeEmitterGM107::getMinEncodingSize(const Instruction *i) const
3474 {
3475    return 8;
3476 }
3477 
3478 /*******************************************************************************
3479  * sched data calculator
3480  ******************************************************************************/
3481 
3482 class SchedDataCalculatorGM107 : public Pass
3483 {
3484 public:
SchedDataCalculatorGM107(const TargetGM107 * targ)3485    SchedDataCalculatorGM107(const TargetGM107 *targ) : targ(targ) {}
3486 
3487 private:
3488    struct RegScores
3489    {
3490       struct ScoreData {
3491          int r[256];
3492          int p[8];
3493          int c;
3494       } rd, wr;
3495       int base;
3496 
rebasenv50_ir::SchedDataCalculatorGM107::RegScores3497       void rebase(const int base)
3498       {
3499          const int delta = this->base - base;
3500          if (!delta)
3501             return;
3502          this->base = 0;
3503 
3504          for (int i = 0; i < 256; ++i) {
3505             rd.r[i] += delta;
3506             wr.r[i] += delta;
3507          }
3508          for (int i = 0; i < 8; ++i) {
3509             rd.p[i] += delta;
3510             wr.p[i] += delta;
3511          }
3512          rd.c += delta;
3513          wr.c += delta;
3514       }
wipenv50_ir::SchedDataCalculatorGM107::RegScores3515       void wipe()
3516       {
3517          memset(&rd, 0, sizeof(rd));
3518          memset(&wr, 0, sizeof(wr));
3519       }
getLatestnv50_ir::SchedDataCalculatorGM107::RegScores3520       int getLatest(const ScoreData& d) const
3521       {
3522          int max = 0;
3523          for (int i = 0; i < 256; ++i)
3524             if (d.r[i] > max)
3525                max = d.r[i];
3526          for (int i = 0; i < 8; ++i)
3527             if (d.p[i] > max)
3528                max = d.p[i];
3529          if (d.c > max)
3530             max = d.c;
3531          return max;
3532       }
getLatestRdnv50_ir::SchedDataCalculatorGM107::RegScores3533       inline int getLatestRd() const
3534       {
3535          return getLatest(rd);
3536       }
getLatestWrnv50_ir::SchedDataCalculatorGM107::RegScores3537       inline int getLatestWr() const
3538       {
3539          return getLatest(wr);
3540       }
getLatestnv50_ir::SchedDataCalculatorGM107::RegScores3541       inline int getLatest() const
3542       {
3543          return MAX2(getLatestRd(), getLatestWr());
3544       }
setMaxnv50_ir::SchedDataCalculatorGM107::RegScores3545       void setMax(const RegScores *that)
3546       {
3547          for (int i = 0; i < 256; ++i) {
3548             rd.r[i] = MAX2(rd.r[i], that->rd.r[i]);
3549             wr.r[i] = MAX2(wr.r[i], that->wr.r[i]);
3550          }
3551          for (int i = 0; i < 8; ++i) {
3552             rd.p[i] = MAX2(rd.p[i], that->rd.p[i]);
3553             wr.p[i] = MAX2(wr.p[i], that->wr.p[i]);
3554          }
3555          rd.c = MAX2(rd.c, that->rd.c);
3556          wr.c = MAX2(wr.c, that->wr.c);
3557       }
printnv50_ir::SchedDataCalculatorGM107::RegScores3558       void print(int cycle)
3559       {
3560          for (int i = 0; i < 256; ++i) {
3561             if (rd.r[i] > cycle)
3562                INFO("rd $r%i @ %i\n", i, rd.r[i]);
3563             if (wr.r[i] > cycle)
3564                INFO("wr $r%i @ %i\n", i, wr.r[i]);
3565          }
3566          for (int i = 0; i < 8; ++i) {
3567             if (rd.p[i] > cycle)
3568                INFO("rd $p%i @ %i\n", i, rd.p[i]);
3569             if (wr.p[i] > cycle)
3570                INFO("wr $p%i @ %i\n", i, wr.p[i]);
3571          }
3572          if (rd.c > cycle)
3573             INFO("rd $c @ %i\n", rd.c);
3574          if (wr.c > cycle)
3575             INFO("wr $c @ %i\n", wr.c);
3576       }
3577    };
3578 
3579    RegScores *score; // for current BB
3580    std::vector<RegScores> scoreBoards;
3581 
3582    const TargetGM107 *targ;
3583    bool visit(Function *);
3584    bool visit(BasicBlock *);
3585 
3586    void commitInsn(const Instruction *, int);
3587    int calcDelay(const Instruction *, int) const;
3588    void setDelay(Instruction *, int, const Instruction *);
3589    void recordWr(const Value *, int, int);
3590    void checkRd(const Value *, int, int&) const;
3591 
3592    inline void emitYield(Instruction *);
3593    inline void emitStall(Instruction *, uint8_t);
3594    inline void emitReuse(Instruction *, uint8_t);
3595    inline void emitWrDepBar(Instruction *, uint8_t);
3596    inline void emitRdDepBar(Instruction *, uint8_t);
3597    inline void emitWtDepBar(Instruction *, uint8_t);
3598 
3599    inline int getStall(const Instruction *) const;
3600    inline int getWrDepBar(const Instruction *) const;
3601    inline int getRdDepBar(const Instruction *) const;
3602    inline int getWtDepBar(const Instruction *) const;
3603 
3604    void setReuseFlag(Instruction *);
3605 
3606    inline void printSchedInfo(int, const Instruction *) const;
3607 
3608    struct LiveBarUse {
LiveBarUsenv50_ir::SchedDataCalculatorGM107::LiveBarUse3609       LiveBarUse(Instruction *insn, Instruction *usei)
3610          : insn(insn), usei(usei) { }
3611       Instruction *insn;
3612       Instruction *usei;
3613    };
3614 
3615    struct LiveBarDef {
LiveBarDefnv50_ir::SchedDataCalculatorGM107::LiveBarDef3616       LiveBarDef(Instruction *insn, Instruction *defi)
3617          : insn(insn), defi(defi) { }
3618       Instruction *insn;
3619       Instruction *defi;
3620    };
3621 
3622    bool insertBarriers(BasicBlock *);
3623 
3624    Instruction *findFirstUse(const Instruction *) const;
3625    Instruction *findFirstDef(const Instruction *) const;
3626 
3627    bool needRdDepBar(const Instruction *) const;
3628    bool needWrDepBar(const Instruction *) const;
3629 };
3630 
3631 inline void
emitStall(Instruction * insn,uint8_t cnt)3632 SchedDataCalculatorGM107::emitStall(Instruction *insn, uint8_t cnt)
3633 {
3634    assert(cnt < 16);
3635    insn->sched |= cnt;
3636 }
3637 
3638 inline void
emitYield(Instruction * insn)3639 SchedDataCalculatorGM107::emitYield(Instruction *insn)
3640 {
3641    insn->sched |= 1 << 4;
3642 }
3643 
3644 inline void
emitWrDepBar(Instruction * insn,uint8_t id)3645 SchedDataCalculatorGM107::emitWrDepBar(Instruction *insn, uint8_t id)
3646 {
3647    assert(id < 6);
3648    if ((insn->sched & 0xe0) == 0xe0)
3649       insn->sched ^= 0xe0;
3650    insn->sched |= id << 5;
3651 }
3652 
3653 inline void
emitRdDepBar(Instruction * insn,uint8_t id)3654 SchedDataCalculatorGM107::emitRdDepBar(Instruction *insn, uint8_t id)
3655 {
3656    assert(id < 6);
3657    if ((insn->sched & 0x700) == 0x700)
3658       insn->sched ^= 0x700;
3659    insn->sched |= id << 8;
3660 }
3661 
3662 inline void
emitWtDepBar(Instruction * insn,uint8_t id)3663 SchedDataCalculatorGM107::emitWtDepBar(Instruction *insn, uint8_t id)
3664 {
3665    assert(id < 6);
3666    insn->sched |= 1 << (11 + id);
3667 }
3668 
3669 inline void
emitReuse(Instruction * insn,uint8_t id)3670 SchedDataCalculatorGM107::emitReuse(Instruction *insn, uint8_t id)
3671 {
3672    assert(id < 4);
3673    insn->sched |= 1 << (17 + id);
3674 }
3675 
3676 inline void
printSchedInfo(int cycle,const Instruction * insn) const3677 SchedDataCalculatorGM107::printSchedInfo(int cycle,
3678                                          const Instruction *insn) const
3679 {
3680    uint8_t st, yl, wr, rd, wt, ru;
3681 
3682    st = (insn->sched & 0x00000f) >> 0;
3683    yl = (insn->sched & 0x000010) >> 4;
3684    wr = (insn->sched & 0x0000e0) >> 5;
3685    rd = (insn->sched & 0x000700) >> 8;
3686    wt = (insn->sched & 0x01f800) >> 11;
3687    ru = (insn->sched & 0x1e0000) >> 17;
3688 
3689    INFO("cycle %i, (st 0x%x, yl 0x%x, wr 0x%x, rd 0x%x, wt 0x%x, ru 0x%x)\n",
3690         cycle, st, yl, wr, rd, wt, ru);
3691 }
3692 
3693 inline int
getStall(const Instruction * insn) const3694 SchedDataCalculatorGM107::getStall(const Instruction *insn) const
3695 {
3696    return insn->sched & 0xf;
3697 }
3698 
3699 inline int
getWrDepBar(const Instruction * insn) const3700 SchedDataCalculatorGM107::getWrDepBar(const Instruction *insn) const
3701 {
3702    return (insn->sched & 0x0000e0) >> 5;
3703 }
3704 
3705 inline int
getRdDepBar(const Instruction * insn) const3706 SchedDataCalculatorGM107::getRdDepBar(const Instruction *insn) const
3707 {
3708    return (insn->sched & 0x000700) >> 8;
3709 }
3710 
3711 inline int
getWtDepBar(const Instruction * insn) const3712 SchedDataCalculatorGM107::getWtDepBar(const Instruction *insn) const
3713 {
3714    return (insn->sched & 0x01f800) >> 11;
3715 }
3716 
3717 // Emit the reuse flag which allows to make use of the new memory hierarchy
3718 // introduced since Maxwell, the operand reuse cache.
3719 //
3720 // It allows to reduce bank conflicts by caching operands. Each time you issue
3721 // an instruction, that flag can tell the hw which operands are going to be
3722 // re-used by the next instruction. Note that the next instruction has to use
3723 // the same GPR id in the same operand slot.
3724 void
setReuseFlag(Instruction * insn)3725 SchedDataCalculatorGM107::setReuseFlag(Instruction *insn)
3726 {
3727    Instruction *next = insn->next;
3728    BitSet defs(255, 1);
3729 
3730    if (!targ->isReuseSupported(insn))
3731       return;
3732 
3733    for (int d = 0; insn->defExists(d); ++d) {
3734       const Value *def = insn->def(d).rep();
3735       if (insn->def(d).getFile() != FILE_GPR)
3736          continue;
3737       if (typeSizeof(insn->dType) != 4 || def->reg.data.id == 255)
3738          continue;
3739       defs.set(def->reg.data.id);
3740    }
3741 
3742    for (int s = 0; insn->srcExists(s); s++) {
3743       const Value *src = insn->src(s).rep();
3744       if (insn->src(s).getFile() != FILE_GPR)
3745          continue;
3746       if (typeSizeof(insn->sType) != 4 || src->reg.data.id == 255)
3747          continue;
3748       if (defs.test(src->reg.data.id))
3749          continue;
3750       if (!next->srcExists(s) || next->src(s).getFile() != FILE_GPR)
3751          continue;
3752       if (src->reg.data.id != next->getSrc(s)->reg.data.id)
3753          continue;
3754       assert(s < 4);
3755       emitReuse(insn, s);
3756    }
3757 }
3758 
3759 void
recordWr(const Value * v,int cycle,int ready)3760 SchedDataCalculatorGM107::recordWr(const Value *v, int cycle, int ready)
3761 {
3762    int a = v->reg.data.id, b;
3763 
3764    switch (v->reg.file) {
3765    case FILE_GPR:
3766       b = a + v->reg.size / 4;
3767       for (int r = a; r < b; ++r)
3768          score->rd.r[r] = ready;
3769       break;
3770    case FILE_PREDICATE:
3771       // To immediately use a predicate set by any instructions, the minimum
3772       // number of stall counts is 13.
3773       score->rd.p[a] = cycle + 13;
3774       break;
3775    case FILE_FLAGS:
3776       score->rd.c = ready;
3777       break;
3778    default:
3779       break;
3780    }
3781 }
3782 
3783 void
checkRd(const Value * v,int cycle,int & delay) const3784 SchedDataCalculatorGM107::checkRd(const Value *v, int cycle, int &delay) const
3785 {
3786    int a = v->reg.data.id, b;
3787    int ready = cycle;
3788 
3789    switch (v->reg.file) {
3790    case FILE_GPR:
3791       b = a + v->reg.size / 4;
3792       for (int r = a; r < b; ++r)
3793          ready = MAX2(ready, score->rd.r[r]);
3794       break;
3795    case FILE_PREDICATE:
3796       ready = MAX2(ready, score->rd.p[a]);
3797       break;
3798    case FILE_FLAGS:
3799       ready = MAX2(ready, score->rd.c);
3800       break;
3801    default:
3802       break;
3803    }
3804    if (cycle < ready)
3805       delay = MAX2(delay, ready - cycle);
3806 }
3807 
3808 void
commitInsn(const Instruction * insn,int cycle)3809 SchedDataCalculatorGM107::commitInsn(const Instruction *insn, int cycle)
3810 {
3811    const int ready = cycle + targ->getLatency(insn);
3812 
3813    for (int d = 0; insn->defExists(d); ++d)
3814       recordWr(insn->getDef(d), cycle, ready);
3815 
3816 #ifdef GM107_DEBUG_SCHED_DATA
3817    score->print(cycle);
3818 #endif
3819 }
3820 
3821 #define GM107_MIN_ISSUE_DELAY 0x1
3822 #define GM107_MAX_ISSUE_DELAY 0xf
3823 
3824 int
calcDelay(const Instruction * insn,int cycle) const3825 SchedDataCalculatorGM107::calcDelay(const Instruction *insn, int cycle) const
3826 {
3827    int delay = 0, ready = cycle;
3828 
3829    for (int s = 0; insn->srcExists(s); ++s)
3830       checkRd(insn->getSrc(s), cycle, delay);
3831 
3832    // TODO: make use of getReadLatency()!
3833 
3834    return MAX2(delay, ready - cycle);
3835 }
3836 
3837 void
setDelay(Instruction * insn,int delay,const Instruction * next)3838 SchedDataCalculatorGM107::setDelay(Instruction *insn, int delay,
3839                                    const Instruction *next)
3840 {
3841    const OpClass cl = targ->getOpClass(insn->op);
3842    int wr, rd;
3843 
3844    if (insn->op == OP_EXIT ||
3845        insn->op == OP_BAR ||
3846        insn->op == OP_MEMBAR) {
3847       delay = GM107_MAX_ISSUE_DELAY;
3848    } else
3849    if (insn->op == OP_QUADON ||
3850        insn->op == OP_QUADPOP) {
3851       delay = 0xd;
3852    } else
3853    if (cl == OPCLASS_FLOW || insn->join) {
3854       delay = 0xd;
3855    }
3856 
3857    if (!next || !targ->canDualIssue(insn, next)) {
3858       delay = CLAMP(delay, GM107_MIN_ISSUE_DELAY, GM107_MAX_ISSUE_DELAY);
3859    } else {
3860       delay = 0x0; // dual-issue
3861    }
3862 
3863    wr = getWrDepBar(insn);
3864    rd = getRdDepBar(insn);
3865 
3866    if (delay == GM107_MIN_ISSUE_DELAY && (wr & rd) != 7) {
3867       // Barriers take one additional clock cycle to become active on top of
3868       // the clock consumed by the instruction producing it.
3869       if (!next || insn->bb != next->bb) {
3870          delay = 0x2;
3871       } else {
3872          int wt = getWtDepBar(next);
3873          if ((wt & (1 << wr)) | (wt & (1 << rd)))
3874             delay = 0x2;
3875       }
3876    }
3877 
3878    emitStall(insn, delay);
3879 }
3880 
3881 
3882 // Return true when the given instruction needs to emit a read dependency
3883 // barrier (for WaR hazards) because it doesn't operate at a fixed latency, and
3884 // setting the maximum number of stall counts is not enough.
3885 bool
needRdDepBar(const Instruction * insn) const3886 SchedDataCalculatorGM107::needRdDepBar(const Instruction *insn) const
3887 {
3888    BitSet srcs(255, 1), defs(255, 1);
3889    int a, b;
3890 
3891    if (!targ->isBarrierRequired(insn))
3892       return false;
3893 
3894    // Do not emit a read dependency barrier when the instruction doesn't use
3895    // any GPR (like st s[0x4] 0x0) as input because it's unnecessary.
3896    for (int s = 0; insn->srcExists(s); ++s) {
3897       const Value *src = insn->src(s).rep();
3898       if (insn->src(s).getFile() != FILE_GPR)
3899          continue;
3900       if (src->reg.data.id == 255)
3901          continue;
3902 
3903       a = src->reg.data.id;
3904       b = a + src->reg.size / 4;
3905       for (int r = a; r < b; ++r)
3906          srcs.set(r);
3907    }
3908 
3909    if (!srcs.popCount())
3910       return false;
3911 
3912    // Do not emit a read dependency barrier when the output GPRs are equal to
3913    // the input GPRs (like rcp $r0 $r0) because a write dependency barrier will
3914    // be produced and WaR hazards are prevented.
3915    for (int d = 0; insn->defExists(d); ++d) {
3916       const Value *def = insn->def(d).rep();
3917       if (insn->def(d).getFile() != FILE_GPR)
3918          continue;
3919       if (def->reg.data.id == 255)
3920          continue;
3921 
3922       a = def->reg.data.id;
3923       b = a + def->reg.size / 4;
3924       for (int r = a; r < b; ++r)
3925          defs.set(r);
3926    }
3927 
3928    srcs.andNot(defs);
3929    if (!srcs.popCount())
3930       return false;
3931 
3932    return true;
3933 }
3934 
3935 // Return true when the given instruction needs to emit a write dependency
3936 // barrier (for RaW hazards) because it doesn't operate at a fixed latency, and
3937 // setting the maximum number of stall counts is not enough. This is only legal
3938 // if the instruction output something.
3939 bool
needWrDepBar(const Instruction * insn) const3940 SchedDataCalculatorGM107::needWrDepBar(const Instruction *insn) const
3941 {
3942    if (!targ->isBarrierRequired(insn))
3943       return false;
3944 
3945    for (int d = 0; insn->defExists(d); ++d) {
3946       if (insn->def(d).getFile() == FILE_GPR ||
3947           insn->def(d).getFile() == FILE_PREDICATE)
3948          return true;
3949    }
3950    return false;
3951 }
3952 
3953 // Find the next instruction inside the same basic block which uses the output
3954 // of the given instruction in order to avoid RaW hazards.
3955 Instruction *
findFirstUse(const Instruction * bari) const3956 SchedDataCalculatorGM107::findFirstUse(const Instruction *bari) const
3957 {
3958    Instruction *insn, *next;
3959    int minGPR, maxGPR;
3960 
3961    if (!bari->defExists(0))
3962       return NULL;
3963 
3964    minGPR = bari->def(0).rep()->reg.data.id;
3965    maxGPR = minGPR + bari->def(0).rep()->reg.size / 4 - 1;
3966 
3967    for (insn = bari->next; insn != NULL; insn = next) {
3968       next = insn->next;
3969 
3970       for (int s = 0; insn->srcExists(s); ++s) {
3971          const Value *src = insn->src(s).rep();
3972          if (bari->def(0).getFile() == FILE_GPR) {
3973             if (insn->src(s).getFile() != FILE_GPR ||
3974                 src->reg.data.id + src->reg.size / 4 - 1 < minGPR ||
3975                 src->reg.data.id > maxGPR)
3976                continue;
3977             return insn;
3978          } else
3979          if (bari->def(0).getFile() == FILE_PREDICATE) {
3980             if (insn->src(s).getFile() != FILE_PREDICATE ||
3981                 src->reg.data.id != minGPR)
3982                continue;
3983             return insn;
3984          }
3985       }
3986    }
3987    return NULL;
3988 }
3989 
3990 // Find the next instruction inside the same basic block which overwrites, at
3991 // least, one source of the given instruction in order to avoid WaR hazards.
3992 Instruction *
findFirstDef(const Instruction * bari) const3993 SchedDataCalculatorGM107::findFirstDef(const Instruction *bari) const
3994 {
3995    Instruction *insn, *next;
3996    int minGPR, maxGPR;
3997 
3998    for (insn = bari->next; insn != NULL; insn = next) {
3999       next = insn->next;
4000 
4001       for (int d = 0; insn->defExists(d); ++d) {
4002          const Value *def = insn->def(d).rep();
4003          if (insn->def(d).getFile() != FILE_GPR)
4004             continue;
4005 
4006          minGPR = def->reg.data.id;
4007          maxGPR = minGPR + def->reg.size / 4 - 1;
4008 
4009          for (int s = 0; bari->srcExists(s); ++s) {
4010             const Value *src = bari->src(s).rep();
4011             if (bari->src(s).getFile() != FILE_GPR ||
4012                 src->reg.data.id + src->reg.size / 4 - 1 < minGPR ||
4013                 src->reg.data.id > maxGPR)
4014                continue;
4015             return insn;
4016          }
4017       }
4018    }
4019    return NULL;
4020 }
4021 
4022 // Dependency barriers:
4023 // This pass is a bit ugly and could probably be improved by performing a
4024 // better allocation.
4025 //
4026 // The main idea is to avoid WaR and RaW hazards by emitting read/write
4027 // dependency barriers using the control codes.
4028 bool
insertBarriers(BasicBlock * bb)4029 SchedDataCalculatorGM107::insertBarriers(BasicBlock *bb)
4030 {
4031    std::list<LiveBarUse> live_uses;
4032    std::list<LiveBarDef> live_defs;
4033    Instruction *insn, *next;
4034    BitSet bars(6, 1);
4035    int bar_id;
4036 
4037    for (insn = bb->getEntry(); insn != NULL; insn = next) {
4038       Instruction *usei = NULL, *defi = NULL;
4039       bool need_wr_bar, need_rd_bar;
4040 
4041       next = insn->next;
4042 
4043       // Expire old barrier uses.
4044       for (std::list<LiveBarUse>::iterator it = live_uses.begin();
4045            it != live_uses.end();) {
4046          if (insn->serial >= it->usei->serial) {
4047             int wr = getWrDepBar(it->insn);
4048             emitWtDepBar(insn, wr);
4049             bars.clr(wr); // free barrier
4050             it = live_uses.erase(it);
4051             continue;
4052          }
4053          ++it;
4054       }
4055 
4056       // Expire old barrier defs.
4057       for (std::list<LiveBarDef>::iterator it = live_defs.begin();
4058            it != live_defs.end();) {
4059          if (insn->serial >= it->defi->serial) {
4060             int rd = getRdDepBar(it->insn);
4061             emitWtDepBar(insn, rd);
4062             bars.clr(rd); // free barrier
4063             it = live_defs.erase(it);
4064             continue;
4065          }
4066          ++it;
4067       }
4068 
4069       need_wr_bar = needWrDepBar(insn);
4070       need_rd_bar = needRdDepBar(insn);
4071 
4072       if (need_wr_bar) {
4073          // When the instruction requires to emit a write dependency barrier
4074          // (all which write something at a variable latency), find the next
4075          // instruction which reads the outputs.
4076          usei = findFirstUse(insn);
4077 
4078          // Allocate and emit a new barrier.
4079          bar_id = bars.findFreeRange(1);
4080          if (bar_id == -1)
4081             bar_id = 5;
4082          bars.set(bar_id);
4083          emitWrDepBar(insn, bar_id);
4084          if (usei)
4085             live_uses.push_back(LiveBarUse(insn, usei));
4086       }
4087 
4088       if (need_rd_bar) {
4089          // When the instruction requires to emit a read dependency barrier
4090          // (all which read something at a variable latency), find the next
4091          // instruction which will write the inputs.
4092          defi = findFirstDef(insn);
4093 
4094          if (usei && defi && usei->serial <= defi->serial)
4095             continue;
4096 
4097          // Allocate and emit a new barrier.
4098          bar_id = bars.findFreeRange(1);
4099          if (bar_id == -1)
4100             bar_id = 5;
4101          bars.set(bar_id);
4102          emitRdDepBar(insn, bar_id);
4103          if (defi)
4104             live_defs.push_back(LiveBarDef(insn, defi));
4105       }
4106    }
4107 
4108    // Remove unnecessary barrier waits.
4109    BitSet alive_bars(6, 1);
4110    for (insn = bb->getEntry(); insn != NULL; insn = next) {
4111       int wr, rd, wt;
4112 
4113       next = insn->next;
4114 
4115       wr = getWrDepBar(insn);
4116       rd = getRdDepBar(insn);
4117       wt = getWtDepBar(insn);
4118 
4119       for (int idx = 0; idx < 6; ++idx) {
4120          if (!(wt & (1 << idx)))
4121             continue;
4122          if (!alive_bars.test(idx)) {
4123             insn->sched &= ~(1 << (11  + idx));
4124          } else {
4125             alive_bars.clr(idx);
4126          }
4127       }
4128 
4129       if (wr < 6)
4130          alive_bars.set(wr);
4131       if (rd < 6)
4132          alive_bars.set(rd);
4133    }
4134 
4135    return true;
4136 }
4137 
4138 bool
visit(Function * func)4139 SchedDataCalculatorGM107::visit(Function *func)
4140 {
4141    ArrayList insns;
4142 
4143    func->orderInstructions(insns);
4144 
4145    scoreBoards.resize(func->cfg.getSize());
4146    for (size_t i = 0; i < scoreBoards.size(); ++i)
4147       scoreBoards[i].wipe();
4148    return true;
4149 }
4150 
4151 bool
visit(BasicBlock * bb)4152 SchedDataCalculatorGM107::visit(BasicBlock *bb)
4153 {
4154    Instruction *insn, *next = NULL;
4155    int cycle = 0;
4156 
4157    for (Instruction *insn = bb->getEntry(); insn; insn = insn->next) {
4158       /*XXX*/
4159       insn->sched = 0x7e0;
4160    }
4161 
4162    if (!debug_get_bool_option("NV50_PROG_SCHED", true))
4163       return true;
4164 
4165    // Insert read/write dependency barriers for instructions which don't
4166    // operate at a fixed latency.
4167    insertBarriers(bb);
4168 
4169    score = &scoreBoards.at(bb->getId());
4170 
4171    for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
4172       // back branches will wait until all target dependencies are satisfied
4173       if (ei.getType() == Graph::Edge::BACK) // sched would be uninitialized
4174          continue;
4175       BasicBlock *in = BasicBlock::get(ei.getNode());
4176       score->setMax(&scoreBoards.at(in->getId()));
4177    }
4178 
4179 #ifdef GM107_DEBUG_SCHED_DATA
4180    INFO("=== BB:%i initial scores\n", bb->getId());
4181    score->print(cycle);
4182 #endif
4183 
4184    // Because barriers are allocated locally (intra-BB), we have to make sure
4185    // that all produced barriers have been consumed before entering inside a
4186    // new basic block. The best way is to do a global allocation pre RA but
4187    // it's really more difficult, especially because of the phi nodes. Anyways,
4188    // it seems like that waiting on a barrier which has already been consumed
4189    // doesn't add any additional cost, it's just not elegant!
4190    Instruction *start = bb->getEntry();
4191    if (start && bb->cfg.incidentCount() > 0) {
4192       for (int b = 0; b < 6; b++)
4193          emitWtDepBar(start, b);
4194    }
4195 
4196    for (insn = bb->getEntry(); insn && insn->next; insn = insn->next) {
4197       next = insn->next;
4198 
4199       commitInsn(insn, cycle);
4200       int delay = calcDelay(next, cycle);
4201       setDelay(insn, delay, next);
4202       cycle += getStall(insn);
4203 
4204       setReuseFlag(insn);
4205 
4206       // XXX: The yield flag seems to destroy a bunch of things when it is
4207       // set on every instruction, need investigation.
4208       //emitYield(insn);
4209 
4210 #ifdef GM107_DEBUG_SCHED_DATA
4211       printSchedInfo(cycle, insn);
4212       insn->print();
4213       next->print();
4214 #endif
4215    }
4216 
4217    if (!insn)
4218       return true;
4219    commitInsn(insn, cycle);
4220 
4221    int bbDelay = -1;
4222 
4223 #ifdef GM107_DEBUG_SCHED_DATA
4224    fprintf(stderr, "last instruction is : ");
4225    insn->print();
4226    fprintf(stderr, "cycle=%d\n", cycle);
4227 #endif
4228 
4229    for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
4230       BasicBlock *out = BasicBlock::get(ei.getNode());
4231 
4232       if (ei.getType() != Graph::Edge::BACK) {
4233          // Only test the first instruction of the outgoing block.
4234          next = out->getEntry();
4235          if (next) {
4236             bbDelay = MAX2(bbDelay, calcDelay(next, cycle));
4237          } else {
4238             // When the outgoing BB is empty, make sure to set the number of
4239             // stall counts needed by the instruction because we don't know the
4240             // next instruction.
4241             bbDelay = MAX2(bbDelay, targ->getLatency(insn));
4242          }
4243       } else {
4244          // Wait until all dependencies are satisfied.
4245          const int regsFree = score->getLatest();
4246          next = out->getFirst();
4247          for (int c = cycle; next && c < regsFree; next = next->next) {
4248             bbDelay = MAX2(bbDelay, calcDelay(next, c));
4249             c += getStall(next);
4250          }
4251          next = NULL;
4252       }
4253    }
4254    if (bb->cfg.outgoingCount() != 1)
4255       next = NULL;
4256    setDelay(insn, bbDelay, next);
4257    cycle += getStall(insn);
4258 
4259    score->rebase(cycle); // common base for initializing out blocks' scores
4260    return true;
4261 }
4262 
4263 /*******************************************************************************
4264  * main
4265  ******************************************************************************/
4266 
4267 void
prepareEmission(Function * func)4268 CodeEmitterGM107::prepareEmission(Function *func)
4269 {
4270    SchedDataCalculatorGM107 sched(targGM107);
4271    CodeEmitter::prepareEmission(func);
4272    sched.run(func, true, true);
4273 }
4274 
sizeToBundlesGM107(uint32_t size)4275 static inline uint32_t sizeToBundlesGM107(uint32_t size)
4276 {
4277    return (size + 23) / 24;
4278 }
4279 
4280 void
prepareEmission(Program * prog)4281 CodeEmitterGM107::prepareEmission(Program *prog)
4282 {
4283    for (ArrayList::Iterator fi = prog->allFuncs.iterator();
4284         !fi.end(); fi.next()) {
4285       Function *func = reinterpret_cast<Function *>(fi.get());
4286       func->binPos = prog->binSize;
4287       prepareEmission(func);
4288 
4289       // adjust sizes & positions for schedulding info:
4290       if (prog->getTarget()->hasSWSched) {
4291          uint32_t adjPos = func->binPos;
4292          BasicBlock *bb = NULL;
4293          for (int i = 0; i < func->bbCount; ++i) {
4294             bb = func->bbArray[i];
4295             int32_t adjSize = bb->binSize;
4296             if (adjPos % 32) {
4297                adjSize -= 32 - adjPos % 32;
4298                if (adjSize < 0)
4299                   adjSize = 0;
4300             }
4301             adjSize = bb->binSize + sizeToBundlesGM107(adjSize) * 8;
4302             bb->binPos = adjPos;
4303             bb->binSize = adjSize;
4304             adjPos += adjSize;
4305          }
4306          if (bb)
4307             func->binSize = adjPos - func->binPos;
4308       }
4309 
4310       prog->binSize += func->binSize;
4311    }
4312 }
4313 
CodeEmitterGM107(const TargetGM107 * target)4314 CodeEmitterGM107::CodeEmitterGM107(const TargetGM107 *target)
4315    : CodeEmitter(target),
4316      targGM107(target),
4317      writeIssueDelays(target->hasSWSched)
4318 {
4319    code = NULL;
4320    codeSize = codeSizeLimit = 0;
4321    relocInfo = NULL;
4322 }
4323 
4324 CodeEmitter *
createCodeEmitterGM107(Program::Type type)4325 TargetGM107::createCodeEmitterGM107(Program::Type type)
4326 {
4327    CodeEmitterGM107 *emit = new CodeEmitterGM107(this);
4328    emit->setProgramType(type);
4329    return emit;
4330 }
4331 
4332 } // namespace nv50_ir
4333