1 /*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #include "nv50_ir_target_nvc0.h"
24
25 namespace nv50_ir {
26
27 // Argh, all these assertions ...
28
29 class CodeEmitterNVC0 : public CodeEmitter
30 {
31 public:
32 CodeEmitterNVC0(const TargetNVC0 *);
33
34 virtual bool emitInstruction(Instruction *);
35 virtual uint32_t getMinEncodingSize(const Instruction *) const;
36 virtual void prepareEmission(Function *);
37
setProgramType(Program::Type pType)38 inline void setProgramType(Program::Type pType) { progType = pType; }
39
40 private:
41 const TargetNVC0 *targ;
42
43 Program::Type progType;
44
45 const bool writeIssueDelays;
46
47 private:
48 void emitForm_A(const Instruction *, uint64_t);
49 void emitForm_B(const Instruction *, uint64_t);
50 void emitForm_S(const Instruction *, uint32_t, bool pred);
51
52 void emitPredicate(const Instruction *);
53
54 void setAddress16(const ValueRef&);
55 void setImmediate(const Instruction *, const int s); // needs op already set
56 void setImmediateS8(const ValueRef&);
57
58 void emitCondCode(CondCode cc, int pos);
59 void emitInterpMode(const Instruction *);
60 void emitLoadStoreType(DataType ty);
61 void emitCachingMode(CacheMode c);
62
63 void emitShortSrc2(const ValueRef&);
64
65 inline uint8_t getSRegEncoding(const ValueRef&);
66
67 void roundMode_A(const Instruction *);
68 void roundMode_C(const Instruction *);
69 void roundMode_CS(const Instruction *);
70
71 void emitNegAbs12(const Instruction *);
72
73 void emitNOP(const Instruction *);
74
75 void emitLOAD(const Instruction *);
76 void emitSTORE(const Instruction *);
77 void emitMOV(const Instruction *);
78
79 void emitINTERP(const Instruction *);
80 void emitPFETCH(const Instruction *);
81 void emitVFETCH(const Instruction *);
82 void emitEXPORT(const Instruction *);
83 void emitOUT(const Instruction *);
84
85 void emitUADD(const Instruction *);
86 void emitFADD(const Instruction *);
87 void emitUMUL(const Instruction *);
88 void emitFMUL(const Instruction *);
89 void emitIMAD(const Instruction *);
90 void emitISAD(const Instruction *);
91 void emitFMAD(const Instruction *);
92
93 void emitNOT(Instruction *);
94 void emitLogicOp(const Instruction *, uint8_t subOp);
95 void emitPOPC(const Instruction *);
96 void emitINSBF(const Instruction *);
97 void emitShift(const Instruction *);
98
99 void emitSFnOp(const Instruction *, uint8_t subOp);
100
101 void emitCVT(Instruction *);
102 void emitMINMAX(const Instruction *);
103 void emitPreOp(const Instruction *);
104
105 void emitSET(const CmpInstruction *);
106 void emitSLCT(const CmpInstruction *);
107 void emitSELP(const Instruction *);
108
109 void emitTEXBAR(const Instruction *);
110 void emitTEX(const TexInstruction *);
111 void emitTEXCSAA(const TexInstruction *);
112 void emitTXQ(const TexInstruction *);
113 void emitPIXLD(const TexInstruction *);
114
115 void emitQUADOP(const Instruction *, uint8_t qOp, uint8_t laneMask);
116
117 void emitFlow(const Instruction *);
118
119 inline void defId(const ValueDef&, const int pos);
120 inline void srcId(const ValueRef&, const int pos);
121 inline void srcId(const ValueRef *, const int pos);
122 inline void srcId(const Instruction *, int s, const int pos);
123
124 inline void srcAddr32(const ValueRef&, const int pos); // address / 4
125
126 inline bool isLIMM(const ValueRef&, DataType ty);
127 };
128
129 // for better visibility
130 #define HEX64(h, l) 0x##h##l##ULL
131
132 #define SDATA(a) ((a).rep()->reg.data)
133 #define DDATA(a) ((a).rep()->reg.data)
134
srcId(const ValueRef & src,const int pos)135 void CodeEmitterNVC0::srcId(const ValueRef& src, const int pos)
136 {
137 code[pos / 32] |= (src.get() ? SDATA(src).id : 63) << (pos % 32);
138 }
139
srcId(const ValueRef * src,const int pos)140 void CodeEmitterNVC0::srcId(const ValueRef *src, const int pos)
141 {
142 code[pos / 32] |= (src ? SDATA(*src).id : 63) << (pos % 32);
143 }
144
srcId(const Instruction * insn,int s,int pos)145 void CodeEmitterNVC0::srcId(const Instruction *insn, int s, int pos)
146 {
147 int r = insn->srcExists(s) ? SDATA(insn->src(s)).id : 63;
148 code[pos / 32] |= r << (pos % 32);
149 }
150
srcAddr32(const ValueRef & src,const int pos)151 void CodeEmitterNVC0::srcAddr32(const ValueRef& src, const int pos)
152 {
153 code[pos / 32] |= (SDATA(src).offset >> 2) << (pos % 32);
154 }
155
defId(const ValueDef & def,const int pos)156 void CodeEmitterNVC0::defId(const ValueDef& def, const int pos)
157 {
158 code[pos / 32] |= (def.get() ? DDATA(def).id : 63) << (pos % 32);
159 }
160
isLIMM(const ValueRef & ref,DataType ty)161 bool CodeEmitterNVC0::isLIMM(const ValueRef& ref, DataType ty)
162 {
163 const ImmediateValue *imm = ref.get()->asImm();
164
165 return imm && (imm->reg.data.u32 & ((ty == TYPE_F32) ? 0xfff : 0xfff00000));
166 }
167
168 void
roundMode_A(const Instruction * insn)169 CodeEmitterNVC0::roundMode_A(const Instruction *insn)
170 {
171 switch (insn->rnd) {
172 case ROUND_M: code[1] |= 1 << 23; break;
173 case ROUND_P: code[1] |= 2 << 23; break;
174 case ROUND_Z: code[1] |= 3 << 23; break;
175 default:
176 assert(insn->rnd == ROUND_N);
177 break;
178 }
179 }
180
181 void
emitNegAbs12(const Instruction * i)182 CodeEmitterNVC0::emitNegAbs12(const Instruction *i)
183 {
184 if (i->src(1).mod.abs()) code[0] |= 1 << 6;
185 if (i->src(0).mod.abs()) code[0] |= 1 << 7;
186 if (i->src(1).mod.neg()) code[0] |= 1 << 8;
187 if (i->src(0).mod.neg()) code[0] |= 1 << 9;
188 }
189
emitCondCode(CondCode cc,int pos)190 void CodeEmitterNVC0::emitCondCode(CondCode cc, int pos)
191 {
192 uint8_t val;
193
194 switch (cc) {
195 case CC_LT: val = 0x1; break;
196 case CC_LTU: val = 0x9; break;
197 case CC_EQ: val = 0x2; break;
198 case CC_EQU: val = 0xa; break;
199 case CC_LE: val = 0x3; break;
200 case CC_LEU: val = 0xb; break;
201 case CC_GT: val = 0x4; break;
202 case CC_GTU: val = 0xc; break;
203 case CC_NE: val = 0x5; break;
204 case CC_NEU: val = 0xd; break;
205 case CC_GE: val = 0x6; break;
206 case CC_GEU: val = 0xe; break;
207 case CC_TR: val = 0xf; break;
208 case CC_FL: val = 0x0; break;
209
210 case CC_A: val = 0x14; break;
211 case CC_NA: val = 0x13; break;
212 case CC_S: val = 0x15; break;
213 case CC_NS: val = 0x12; break;
214 case CC_C: val = 0x16; break;
215 case CC_NC: val = 0x11; break;
216 case CC_O: val = 0x17; break;
217 case CC_NO: val = 0x10; break;
218
219 default:
220 val = 0;
221 assert(!"invalid condition code");
222 break;
223 }
224 code[pos / 32] |= val << (pos % 32);
225 }
226
227 void
emitPredicate(const Instruction * i)228 CodeEmitterNVC0::emitPredicate(const Instruction *i)
229 {
230 if (i->predSrc >= 0) {
231 assert(i->getPredicate()->reg.file == FILE_PREDICATE);
232 srcId(i->src(i->predSrc), 10);
233 if (i->cc == CC_NOT_P)
234 code[0] |= 0x2000; // negate
235 } else {
236 code[0] |= 0x1c00;
237 }
238 }
239
240 void
setAddress16(const ValueRef & src)241 CodeEmitterNVC0::setAddress16(const ValueRef& src)
242 {
243 Symbol *sym = src.get()->asSym();
244
245 assert(sym);
246
247 code[0] |= (sym->reg.data.offset & 0x003f) << 26;
248 code[1] |= (sym->reg.data.offset & 0xffc0) >> 6;
249 }
250
251 void
setImmediate(const Instruction * i,const int s)252 CodeEmitterNVC0::setImmediate(const Instruction *i, const int s)
253 {
254 const ImmediateValue *imm = i->src(s).get()->asImm();
255 uint32_t u32;
256
257 assert(imm);
258 u32 = imm->reg.data.u32;
259
260 if ((code[0] & 0xf) == 0x2) {
261 // LIMM
262 code[0] |= (u32 & 0x3f) << 26;
263 code[1] |= u32 >> 6;
264 } else
265 if ((code[0] & 0xf) == 0x3 || (code[0] & 0xf) == 4) {
266 // integer immediate
267 assert((u32 & 0xfff00000) == 0 || (u32 & 0xfff00000) == 0xfff00000);
268 assert(!(code[1] & 0xc000));
269 u32 &= 0xfffff;
270 code[0] |= (u32 & 0x3f) << 26;
271 code[1] |= 0xc000 | (u32 >> 6);
272 } else {
273 // float immediate
274 assert(!(u32 & 0x00000fff));
275 assert(!(code[1] & 0xc000));
276 code[0] |= ((u32 >> 12) & 0x3f) << 26;
277 code[1] |= 0xc000 | (u32 >> 18);
278 }
279 }
280
setImmediateS8(const ValueRef & ref)281 void CodeEmitterNVC0::setImmediateS8(const ValueRef &ref)
282 {
283 const ImmediateValue *imm = ref.get()->asImm();
284
285 int8_t s8 = static_cast<int8_t>(imm->reg.data.s32);
286
287 assert(s8 == imm->reg.data.s32);
288
289 code[0] |= (s8 & 0x3f) << 26;
290 code[0] |= (s8 >> 6) << 8;
291 }
292
293 void
emitForm_A(const Instruction * i,uint64_t opc)294 CodeEmitterNVC0::emitForm_A(const Instruction *i, uint64_t opc)
295 {
296 code[0] = opc;
297 code[1] = opc >> 32;
298
299 emitPredicate(i);
300
301 defId(i->def(0), 14);
302
303 int s1 = 26;
304 if (i->srcExists(2) && i->getSrc(2)->reg.file == FILE_MEMORY_CONST)
305 s1 = 49;
306
307 for (int s = 0; s < 3 && i->srcExists(s); ++s) {
308 switch (i->getSrc(s)->reg.file) {
309 case FILE_MEMORY_CONST:
310 assert(!(code[1] & 0xc000));
311 code[1] |= (s == 2) ? 0x8000 : 0x4000;
312 code[1] |= i->getSrc(s)->reg.fileIndex << 10;
313 setAddress16(i->src(s));
314 break;
315 case FILE_IMMEDIATE:
316 assert(s == 1 ||
317 i->op == OP_MOV || i->op == OP_PRESIN || i->op == OP_PREEX2);
318 assert(!(code[1] & 0xc000));
319 setImmediate(i, s);
320 break;
321 case FILE_GPR:
322 if ((s == 2) && ((code[0] & 0x7) == 2)) // LIMM: 3rd src == dst
323 break;
324 srcId(i->src(s), s ? ((s == 2) ? 49 : s1) : 20);
325 break;
326 default:
327 // ignore here, can be predicate or flags, but must not be address
328 break;
329 }
330 }
331 }
332
333 void
emitForm_B(const Instruction * i,uint64_t opc)334 CodeEmitterNVC0::emitForm_B(const Instruction *i, uint64_t opc)
335 {
336 code[0] = opc;
337 code[1] = opc >> 32;
338
339 emitPredicate(i);
340
341 defId(i->def(0), 14);
342
343 switch (i->src(0).getFile()) {
344 case FILE_MEMORY_CONST:
345 assert(!(code[1] & 0xc000));
346 code[1] |= 0x4000 | (i->src(0).get()->reg.fileIndex << 10);
347 setAddress16(i->src(0));
348 break;
349 case FILE_IMMEDIATE:
350 assert(!(code[1] & 0xc000));
351 setImmediate(i, 0);
352 break;
353 case FILE_GPR:
354 srcId(i->src(0), 26);
355 break;
356 default:
357 // ignore here, can be predicate or flags, but must not be address
358 break;
359 }
360 }
361
362 void
emitForm_S(const Instruction * i,uint32_t opc,bool pred)363 CodeEmitterNVC0::emitForm_S(const Instruction *i, uint32_t opc, bool pred)
364 {
365 code[0] = opc;
366
367 int ss2a = 0;
368 if (opc == 0x0d || opc == 0x0e)
369 ss2a = 2;
370
371 defId(i->def(0), 14);
372 srcId(i->src(0), 20);
373
374 assert(pred || (i->predSrc < 0));
375 if (pred)
376 emitPredicate(i);
377
378 for (int s = 1; s < 3 && i->srcExists(s); ++s) {
379 if (i->src(s).get()->reg.file == FILE_MEMORY_CONST) {
380 assert(!(code[0] & (0x300 >> ss2a)));
381 switch (i->src(s).get()->reg.fileIndex) {
382 case 0: code[0] |= 0x100 >> ss2a; break;
383 case 1: code[0] |= 0x200 >> ss2a; break;
384 case 16: code[0] |= 0x300 >> ss2a; break;
385 default:
386 ERROR("invalid c[] space for short form\n");
387 break;
388 }
389 if (s == 1)
390 code[0] |= i->getSrc(s)->reg.data.offset << 24;
391 else
392 code[0] |= i->getSrc(s)->reg.data.offset << 6;
393 } else
394 if (i->src(s).getFile() == FILE_IMMEDIATE) {
395 assert(s == 1);
396 setImmediateS8(i->src(s));
397 } else
398 if (i->src(s).getFile() == FILE_GPR) {
399 srcId(i->src(s), (s == 1) ? 26 : 8);
400 }
401 }
402 }
403
404 void
emitShortSrc2(const ValueRef & src)405 CodeEmitterNVC0::emitShortSrc2(const ValueRef &src)
406 {
407 if (src.getFile() == FILE_MEMORY_CONST) {
408 switch (src.get()->reg.fileIndex) {
409 case 0: code[0] |= 0x100; break;
410 case 1: code[0] |= 0x200; break;
411 case 16: code[0] |= 0x300; break;
412 default:
413 assert(!"unsupported file index for short op");
414 break;
415 }
416 srcAddr32(src, 20);
417 } else {
418 srcId(src, 20);
419 assert(src.getFile() == FILE_GPR);
420 }
421 }
422
423 void
emitNOP(const Instruction * i)424 CodeEmitterNVC0::emitNOP(const Instruction *i)
425 {
426 code[0] = 0x000001e4;
427 code[1] = 0x40000000;
428 emitPredicate(i);
429 }
430
431 void
emitFMAD(const Instruction * i)432 CodeEmitterNVC0::emitFMAD(const Instruction *i)
433 {
434 bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
435
436 if (i->encSize == 8) {
437 if (isLIMM(i->src(1), TYPE_F32)) {
438 emitForm_A(i, HEX64(20000000, 00000002));
439 } else {
440 emitForm_A(i, HEX64(30000000, 00000000));
441
442 if (i->src(2).mod.neg())
443 code[0] |= 1 << 8;
444 }
445 roundMode_A(i);
446
447 if (neg1)
448 code[0] |= 1 << 9;
449
450 if (i->saturate)
451 code[0] |= 1 << 5;
452 if (i->ftz)
453 code[0] |= 1 << 6;
454 } else {
455 assert(!i->saturate && !i->src(2).mod.neg());
456 emitForm_S(i, (i->src(2).getFile() == FILE_MEMORY_CONST) ? 0x2e : 0x0e,
457 false);
458 if (neg1)
459 code[0] |= 1 << 4;
460 }
461 }
462
463 void
emitFMUL(const Instruction * i)464 CodeEmitterNVC0::emitFMUL(const Instruction *i)
465 {
466 bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
467
468 assert(i->postFactor >= -3 && i->postFactor <= 3);
469
470 if (i->encSize == 8) {
471 if (isLIMM(i->src(1), TYPE_F32)) {
472 assert(i->postFactor == 0); // constant folded, hopefully
473 emitForm_A(i, HEX64(30000000, 00000002));
474 } else {
475 emitForm_A(i, HEX64(58000000, 00000000));
476 roundMode_A(i);
477 code[1] |= ((i->postFactor > 0) ?
478 (7 - i->postFactor) : (0 - i->postFactor)) << 17;
479 }
480 if (neg)
481 code[1] ^= 1 << 25; // aliases with LIMM sign bit
482
483 if (i->saturate)
484 code[0] |= 1 << 5;
485
486 if (i->dnz)
487 code[0] |= 1 << 7;
488 else
489 if (i->ftz)
490 code[0] |= 1 << 6;
491 } else {
492 assert(!neg && !i->saturate && !i->ftz && !i->postFactor);
493 emitForm_S(i, 0xa8, true);
494 }
495 }
496
497 void
emitUMUL(const Instruction * i)498 CodeEmitterNVC0::emitUMUL(const Instruction *i)
499 {
500 if (i->encSize == 8) {
501 if (i->src(1).getFile() == FILE_IMMEDIATE) {
502 emitForm_A(i, HEX64(10000000, 00000002));
503 } else {
504 emitForm_A(i, HEX64(50000000, 00000003));
505 }
506 if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
507 code[0] |= 1 << 6;
508 if (i->sType == TYPE_S32)
509 code[0] |= 1 << 5;
510 if (i->dType == TYPE_S32)
511 code[0] |= 1 << 7;
512 } else {
513 emitForm_S(i, i->src(1).getFile() == FILE_IMMEDIATE ? 0xaa : 0x2a, true);
514
515 if (i->sType == TYPE_S32)
516 code[0] |= 1 << 6;
517 }
518 }
519
520 void
emitFADD(const Instruction * i)521 CodeEmitterNVC0::emitFADD(const Instruction *i)
522 {
523 if (i->encSize == 8) {
524 if (isLIMM(i->src(1), TYPE_F32)) {
525 assert(!i->saturate);
526 emitForm_A(i, HEX64(28000000, 00000002));
527
528 code[0] |= i->src(0).mod.abs() << 7;
529 code[0] |= i->src(0).mod.neg() << 9;
530
531 if (i->src(1).mod.abs())
532 code[1] &= 0xfdffffff;
533 if ((i->op == OP_SUB) != static_cast<bool>(i->src(1).mod.neg()))
534 code[1] ^= 0x02000000;
535 } else {
536 emitForm_A(i, HEX64(50000000, 00000000));
537
538 roundMode_A(i);
539 if (i->saturate)
540 code[1] |= 1 << 17;
541
542 emitNegAbs12(i);
543 if (i->op == OP_SUB) code[0] ^= 1 << 8;
544 }
545 if (i->ftz)
546 code[0] |= 1 << 5;
547 } else {
548 assert(!i->saturate && i->op != OP_SUB &&
549 !i->src(0).mod.abs() &&
550 !i->src(1).mod.neg() && !i->src(1).mod.abs());
551
552 emitForm_S(i, 0x49, true);
553
554 if (i->src(0).mod.neg())
555 code[0] |= 1 << 7;
556 }
557 }
558
559 void
emitUADD(const Instruction * i)560 CodeEmitterNVC0::emitUADD(const Instruction *i)
561 {
562 uint32_t addOp = 0;
563
564 assert(!i->src(0).mod.abs() && !i->src(1).mod.abs());
565 assert(!i->src(0).mod.neg() || !i->src(1).mod.neg());
566
567 if (i->src(0).mod.neg())
568 addOp |= 0x200;
569 if (i->src(1).mod.neg())
570 addOp |= 0x100;
571 if (i->op == OP_SUB) {
572 addOp ^= 0x100;
573 assert(addOp != 0x300); // would be add-plus-one
574 }
575
576 if (i->encSize == 8) {
577 if (isLIMM(i->src(1), TYPE_U32)) {
578 emitForm_A(i, HEX64(08000000, 00000002));
579 if (i->defExists(1))
580 code[1] |= 1 << 26; // write carry
581 } else {
582 emitForm_A(i, HEX64(48000000, 00000003));
583 if (i->defExists(1))
584 code[1] |= 1 << 16; // write carry
585 }
586 code[0] |= addOp;
587
588 if (i->saturate)
589 code[0] |= 1 << 5;
590 if (i->flagsSrc >= 0) // add carry
591 code[0] |= 1 << 6;
592 } else {
593 assert(!(addOp & 0x100));
594 emitForm_S(i, (addOp >> 3) |
595 ((i->src(1).getFile() == FILE_IMMEDIATE) ? 0xac : 0x2c), true);
596 }
597 }
598
599 // TODO: shl-add
600 void
emitIMAD(const Instruction * i)601 CodeEmitterNVC0::emitIMAD(const Instruction *i)
602 {
603 assert(i->encSize == 8);
604 emitForm_A(i, HEX64(20000000, 00000003));
605
606 if (isSignedType(i->dType))
607 code[0] |= 1 << 7;
608 if (isSignedType(i->sType))
609 code[0] |= 1 << 5;
610
611 code[1] |= i->saturate << 24;
612
613 if (i->flagsDef >= 0) code[1] |= 1 << 16;
614 if (i->flagsSrc >= 0) code[1] |= 1 << 23;
615
616 if (i->src(2).mod.neg()) code[0] |= 0x10;
617 if (i->src(1).mod.neg() ^
618 i->src(0).mod.neg()) code[0] |= 0x20;
619
620 if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
621 code[0] |= 1 << 6;
622 }
623
624 void
emitISAD(const Instruction * i)625 CodeEmitterNVC0::emitISAD(const Instruction *i)
626 {
627 assert(i->dType == TYPE_S32 || i->dType == TYPE_U32);
628 assert(i->encSize == 8);
629
630 emitForm_A(i, HEX64(38000000, 00000003));
631
632 if (i->dType == TYPE_S32)
633 code[0] |= 1 << 5;
634 }
635
636 void
emitNOT(Instruction * i)637 CodeEmitterNVC0::emitNOT(Instruction *i)
638 {
639 assert(i->encSize == 8);
640 i->setSrc(1, i->src(0));
641 emitForm_A(i, HEX64(68000000, 000001c3));
642 }
643
644 void
emitLogicOp(const Instruction * i,uint8_t subOp)645 CodeEmitterNVC0::emitLogicOp(const Instruction *i, uint8_t subOp)
646 {
647 if (i->encSize == 8) {
648 if (isLIMM(i->src(1), TYPE_U32)) {
649 emitForm_A(i, HEX64(38000000, 00000002));
650
651 if (i->srcExists(2))
652 code[1] |= 1 << 26;
653 } else {
654 emitForm_A(i, HEX64(68000000, 00000003));
655
656 if (i->srcExists(2))
657 code[1] |= 1 << 16;
658 }
659 code[0] |= subOp << 6;
660
661 if (i->srcExists(2)) // carry
662 code[0] |= 1 << 5;
663
664 if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9;
665 if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8;
666 } else {
667 emitForm_S(i, (subOp << 5) |
668 ((i->src(1).getFile() == FILE_IMMEDIATE) ? 0x1d : 0x8d), true);
669 }
670 }
671
672 void
emitPOPC(const Instruction * i)673 CodeEmitterNVC0::emitPOPC(const Instruction *i)
674 {
675 emitForm_A(i, HEX64(54000000, 00000004));
676
677 if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9;
678 if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8;
679 }
680
681 void
emitINSBF(const Instruction * i)682 CodeEmitterNVC0::emitINSBF(const Instruction *i)
683 {
684 emitForm_A(i, HEX64(28000000, 30000000));
685 }
686
687 void
emitShift(const Instruction * i)688 CodeEmitterNVC0::emitShift(const Instruction *i)
689 {
690 if (i->op == OP_SHR) {
691 emitForm_A(i, HEX64(58000000, 00000003)
692 | (isSignedType(i->dType) ? 0x20 : 0x00));
693 } else {
694 emitForm_A(i, HEX64(60000000, 00000003));
695 }
696
697 if (i->subOp == NV50_IR_SUBOP_SHIFT_WRAP)
698 code[0] |= 1 << 9;
699 }
700
701 void
emitPreOp(const Instruction * i)702 CodeEmitterNVC0::emitPreOp(const Instruction *i)
703 {
704 if (i->encSize == 8) {
705 emitForm_B(i, HEX64(60000000, 00000000));
706
707 if (i->op == OP_PREEX2)
708 code[0] |= 0x20;
709
710 if (i->src(0).mod.abs()) code[0] |= 1 << 6;
711 if (i->src(0).mod.neg()) code[0] |= 1 << 8;
712 } else {
713 emitForm_S(i, i->op == OP_PREEX2 ? 0x74000008 : 0x70000008, true);
714 }
715 }
716
717 void
emitSFnOp(const Instruction * i,uint8_t subOp)718 CodeEmitterNVC0::emitSFnOp(const Instruction *i, uint8_t subOp)
719 {
720 if (i->encSize == 8) {
721 code[0] = 0x00000000 | (subOp << 26);
722 code[1] = 0xc8000000;
723
724 emitPredicate(i);
725
726 defId(i->def(0), 14);
727 srcId(i->src(0), 20);
728
729 assert(i->src(0).getFile() == FILE_GPR);
730
731 if (i->saturate) code[0] |= 1 << 5;
732
733 if (i->src(0).mod.abs()) code[0] |= 1 << 7;
734 if (i->src(0).mod.neg()) code[0] |= 1 << 9;
735 } else {
736 emitForm_S(i, 0x80000008 | (subOp << 26), true);
737
738 assert(!i->src(0).mod.neg());
739 if (i->src(0).mod.abs()) code[0] |= 1 << 30;
740 }
741 }
742
743 void
emitMINMAX(const Instruction * i)744 CodeEmitterNVC0::emitMINMAX(const Instruction *i)
745 {
746 uint64_t op;
747
748 assert(i->encSize == 8);
749
750 op = (i->op == OP_MIN) ? 0x080e000000000000ULL : 0x081e000000000000ULL;
751
752 if (i->ftz)
753 op |= 1 << 5;
754 else
755 if (!isFloatType(i->dType))
756 op |= isSignedType(i->dType) ? 0x23 : 0x03;
757
758 emitForm_A(i, op);
759 emitNegAbs12(i);
760 }
761
762 void
roundMode_C(const Instruction * i)763 CodeEmitterNVC0::roundMode_C(const Instruction *i)
764 {
765 switch (i->rnd) {
766 case ROUND_M: code[1] |= 1 << 17; break;
767 case ROUND_P: code[1] |= 2 << 17; break;
768 case ROUND_Z: code[1] |= 3 << 17; break;
769 case ROUND_NI: code[0] |= 1 << 7; break;
770 case ROUND_MI: code[0] |= 1 << 7; code[1] |= 1 << 17; break;
771 case ROUND_PI: code[0] |= 1 << 7; code[1] |= 2 << 17; break;
772 case ROUND_ZI: code[0] |= 1 << 7; code[1] |= 3 << 17; break;
773 case ROUND_N: break;
774 default:
775 assert(!"invalid round mode");
776 break;
777 }
778 }
779
780 void
roundMode_CS(const Instruction * i)781 CodeEmitterNVC0::roundMode_CS(const Instruction *i)
782 {
783 switch (i->rnd) {
784 case ROUND_M:
785 case ROUND_MI: code[0] |= 1 << 16; break;
786 case ROUND_P:
787 case ROUND_PI: code[0] |= 2 << 16; break;
788 case ROUND_Z:
789 case ROUND_ZI: code[0] |= 3 << 16; break;
790 default:
791 break;
792 }
793 }
794
795 void
emitCVT(Instruction * i)796 CodeEmitterNVC0::emitCVT(Instruction *i)
797 {
798 const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
799
800 switch (i->op) {
801 case OP_CEIL: i->rnd = f2f ? ROUND_PI : ROUND_P; break;
802 case OP_FLOOR: i->rnd = f2f ? ROUND_MI : ROUND_M; break;
803 case OP_TRUNC: i->rnd = f2f ? ROUND_ZI : ROUND_Z; break;
804 default:
805 break;
806 }
807
808 const bool sat = (i->op == OP_SAT) || i->saturate;
809 const bool abs = (i->op == OP_ABS) || i->src(0).mod.abs();
810 const bool neg = (i->op == OP_NEG) || i->src(0).mod.neg();
811
812 if (i->encSize == 8) {
813 emitForm_B(i, HEX64(10000000, 00000004));
814
815 roundMode_C(i);
816
817 // cvt u16 f32 sets high bits to 0, so we don't have to use Value::Size()
818 code[0] |= util_logbase2(typeSizeof(i->dType)) << 20;
819 code[0] |= util_logbase2(typeSizeof(i->sType)) << 23;
820
821 if (sat)
822 code[0] |= 0x20;
823 if (abs)
824 code[0] |= 1 << 6;
825 if (neg && i->op != OP_ABS)
826 code[0] |= 1 << 8;
827
828 if (i->ftz)
829 code[1] |= 1 << 23;
830
831 if (isSignedIntType(i->dType))
832 code[0] |= 0x080;
833 if (isSignedIntType(i->sType))
834 code[0] |= 0x200;
835
836 if (isFloatType(i->dType)) {
837 if (!isFloatType(i->sType))
838 code[1] |= 0x08000000;
839 } else {
840 if (isFloatType(i->sType))
841 code[1] |= 0x04000000;
842 else
843 code[1] |= 0x0c000000;
844 }
845 } else {
846 if (i->op == OP_CEIL || i->op == OP_FLOOR || i->op == OP_TRUNC) {
847 code[0] = 0x298;
848 } else
849 if (isFloatType(i->dType)) {
850 if (isFloatType(i->sType))
851 code[0] = 0x098;
852 else
853 code[0] = 0x088 | (isSignedType(i->sType) ? (1 << 8) : 0);
854 } else {
855 assert(isFloatType(i->sType));
856
857 code[0] = 0x288 | (isSignedType(i->sType) ? (1 << 8) : 0);
858 }
859
860 if (neg) code[0] |= 1 << 16;
861 if (sat) code[0] |= 1 << 18;
862 if (abs) code[0] |= 1 << 19;
863
864 roundMode_CS(i);
865 }
866 }
867
868 void
emitSET(const CmpInstruction * i)869 CodeEmitterNVC0::emitSET(const CmpInstruction *i)
870 {
871 uint32_t hi;
872 uint32_t lo = 0;
873
874 if (i->sType == TYPE_F64)
875 lo = 0x1;
876 else
877 if (!isFloatType(i->sType))
878 lo = 0x3;
879
880 if (isFloatType(i->dType) || isSignedIntType(i->sType))
881 lo |= 0x20;
882
883 switch (i->op) {
884 case OP_SET_AND: hi = 0x10000000; break;
885 case OP_SET_OR: hi = 0x10200000; break;
886 case OP_SET_XOR: hi = 0x10400000; break;
887 default:
888 hi = 0x100e0000;
889 break;
890 }
891 emitForm_A(i, (static_cast<uint64_t>(hi) << 32) | lo);
892
893 if (i->op != OP_SET)
894 srcId(i->src(2), 32 + 17);
895
896 if (i->def(0).getFile() == FILE_PREDICATE) {
897 if (i->sType == TYPE_F32)
898 code[1] += 0x10000000;
899 else
900 code[1] += 0x08000000;
901
902 code[0] &= ~0xfc000;
903 defId(i->def(0), 17);
904 if (i->defExists(1))
905 defId(i->def(1), 14);
906 else
907 code[0] |= 0x1c000;
908 }
909
910 if (i->ftz)
911 code[1] |= 1 << 27;
912
913 emitCondCode(i->setCond, 32 + 23);
914 emitNegAbs12(i);
915 }
916
917 void
emitSLCT(const CmpInstruction * i)918 CodeEmitterNVC0::emitSLCT(const CmpInstruction *i)
919 {
920 uint64_t op;
921
922 switch (i->dType) {
923 case TYPE_S32:
924 op = HEX64(30000000, 00000023);
925 break;
926 case TYPE_U32:
927 op = HEX64(30000000, 00000003);
928 break;
929 case TYPE_F32:
930 op = HEX64(38000000, 00000000);
931 break;
932 default:
933 assert(!"invalid type for SLCT");
934 op = 0;
935 break;
936 }
937 emitForm_A(i, op);
938
939 CondCode cc = i->setCond;
940
941 if (i->src(2).mod.neg())
942 cc = reverseCondCode(cc);
943
944 emitCondCode(cc, 32 + 23);
945
946 if (i->ftz)
947 code[0] |= 1 << 5;
948 }
949
emitSELP(const Instruction * i)950 void CodeEmitterNVC0::emitSELP(const Instruction *i)
951 {
952 emitForm_A(i, HEX64(20000000, 00000004));
953
954 if (i->cc == CC_NOT_P || i->src(2).mod & Modifier(NV50_IR_MOD_NOT))
955 code[1] |= 1 << 20;
956 }
957
emitTEXBAR(const Instruction * i)958 void CodeEmitterNVC0::emitTEXBAR(const Instruction *i)
959 {
960 code[0] = 0x00000006 | (i->subOp << 26);
961 code[1] = 0xf0000000;
962 emitPredicate(i);
963 emitCondCode(i->flagsSrc >= 0 ? i->cc : CC_ALWAYS, 5);
964 }
965
emitTEXCSAA(const TexInstruction * i)966 void CodeEmitterNVC0::emitTEXCSAA(const TexInstruction *i)
967 {
968 code[0] = 0x00000086;
969 code[1] = 0xd0000000;
970
971 code[1] |= i->tex.r;
972 code[1] |= i->tex.s << 8;
973
974 if (i->tex.liveOnly)
975 code[0] |= 1 << 9;
976
977 defId(i->def(0), 14);
978 srcId(i->src(0), 20);
979 }
980
981 static inline bool
isNextIndependentTex(const TexInstruction * i)982 isNextIndependentTex(const TexInstruction *i)
983 {
984 if (!i->next || !isTextureOp(i->next->op))
985 return false;
986 if (i->getDef(0)->interfers(i->next->getSrc(0)))
987 return false;
988 return !i->next->srcExists(1) || !i->getDef(0)->interfers(i->next->getSrc(1));
989 }
990
991 void
emitTEX(const TexInstruction * i)992 CodeEmitterNVC0::emitTEX(const TexInstruction *i)
993 {
994 code[0] = 0x00000006;
995
996 if (isNextIndependentTex(i))
997 code[0] |= 0x080; // t mode
998 else
999 code[0] |= 0x100; // p mode
1000
1001 if (i->tex.liveOnly)
1002 code[0] |= 1 << 9;
1003
1004 switch (i->op) {
1005 case OP_TEX: code[1] = 0x80000000; break;
1006 case OP_TXB: code[1] = 0x84000000; break;
1007 case OP_TXL: code[1] = 0x86000000; break;
1008 case OP_TXF: code[1] = 0x90000000; break;
1009 case OP_TXG: code[1] = 0xa0000000; break;
1010 case OP_TXD: code[1] = 0xe0000000; break;
1011 default:
1012 assert(!"invalid texture op");
1013 break;
1014 }
1015 if (i->op == OP_TXF) {
1016 if (!i->tex.levelZero)
1017 code[1] |= 0x02000000;
1018 } else
1019 if (i->tex.levelZero) {
1020 code[1] |= 0x02000000;
1021 }
1022
1023 if (i->op != OP_TXD && i->tex.derivAll)
1024 code[1] |= 1 << 13;
1025
1026 defId(i->def(0), 14);
1027 srcId(i->src(0), 20);
1028
1029 emitPredicate(i);
1030
1031 if (i->op == OP_TXG) code[0] |= i->tex.gatherComp << 5;
1032
1033 code[1] |= i->tex.mask << 14;
1034
1035 code[1] |= i->tex.r;
1036 code[1] |= i->tex.s << 8;
1037 if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0)
1038 code[1] |= 1 << 18; // in 1st source (with array index)
1039
1040 // texture target:
1041 code[1] |= (i->tex.target.getDim() - 1) << 20;
1042 if (i->tex.target.isCube())
1043 code[1] += 2 << 20;
1044 if (i->tex.target.isArray())
1045 code[1] |= 1 << 19;
1046 if (i->tex.target.isShadow())
1047 code[1] |= 1 << 24;
1048
1049 const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1050
1051 if (i->srcExists(src1) && i->src(src1).getFile() == FILE_IMMEDIATE) {
1052 // lzero
1053 if (i->op == OP_TXL)
1054 code[1] &= ~(1 << 26);
1055 else
1056 if (i->op == OP_TXF)
1057 code[1] &= ~(1 << 25);
1058 }
1059 if (i->tex.target == TEX_TARGET_2D_MS ||
1060 i->tex.target == TEX_TARGET_2D_MS_ARRAY)
1061 code[1] |= 1 << 23;
1062
1063 if (i->tex.useOffsets) // in vecSrc0.w
1064 code[1] |= 1 << 22;
1065
1066 srcId(i, src1, 26);
1067 }
1068
1069 void
emitTXQ(const TexInstruction * i)1070 CodeEmitterNVC0::emitTXQ(const TexInstruction *i)
1071 {
1072 code[0] = 0x00000086;
1073 code[1] = 0xc0000000;
1074
1075 switch (i->tex.query) {
1076 case TXQ_DIMS: code[1] |= 0 << 22; break;
1077 case TXQ_TYPE: code[1] |= 1 << 22; break;
1078 case TXQ_SAMPLE_POSITION: code[1] |= 2 << 22; break;
1079 case TXQ_FILTER: code[1] |= 3 << 22; break;
1080 case TXQ_LOD: code[1] |= 4 << 22; break;
1081 case TXQ_BORDER_COLOUR: code[1] |= 5 << 22; break;
1082 default:
1083 assert(!"invalid texture query");
1084 break;
1085 }
1086
1087 code[1] |= i->tex.mask << 14;
1088
1089 code[1] |= i->tex.r;
1090 code[1] |= i->tex.s << 8;
1091 if (i->tex.sIndirectSrc >= 0 || i->tex.rIndirectSrc >= 0)
1092 code[1] |= 1 << 18;
1093
1094 const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1095
1096 defId(i->def(0), 14);
1097 srcId(i->src(0), 20);
1098 srcId(i, src1, 26);
1099
1100 emitPredicate(i);
1101 }
1102
1103 void
emitQUADOP(const Instruction * i,uint8_t qOp,uint8_t laneMask)1104 CodeEmitterNVC0::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask)
1105 {
1106 code[0] = 0x00000000 | (laneMask << 6);
1107 code[1] = 0x48000000 | qOp;
1108
1109 defId(i->def(0), 14);
1110 srcId(i->src(0), 20);
1111 srcId(i->srcExists(1) ? i->src(1) : i->src(0), 26);
1112
1113 if (i->op == OP_QUADOP && progType != Program::TYPE_FRAGMENT)
1114 code[0] |= 1 << 9; // dall
1115
1116 emitPredicate(i);
1117 }
1118
1119 void
emitFlow(const Instruction * i)1120 CodeEmitterNVC0::emitFlow(const Instruction *i)
1121 {
1122 const FlowInstruction *f = i->asFlow();
1123
1124 unsigned mask; // bit 0: predicate, bit 1: target
1125
1126 code[0] = 0x00000007;
1127
1128 switch (i->op) {
1129 case OP_BRA:
1130 code[1] = f->absolute ? 0x00000000 : 0x40000000;
1131 if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST)
1132 code[0] |= 0x4000;
1133 mask = 3;
1134 break;
1135 case OP_CALL:
1136 code[1] = f->absolute ? 0x10000000 : 0x50000000;
1137 if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST)
1138 code[0] |= 0x4000;
1139 mask = 2;
1140 break;
1141
1142 case OP_EXIT: code[1] = 0x80000000; mask = 1; break;
1143 case OP_RET: code[1] = 0x90000000; mask = 1; break;
1144 case OP_DISCARD: code[1] = 0x98000000; mask = 1; break;
1145 case OP_BREAK: code[1] = 0xa8000000; mask = 1; break;
1146 case OP_CONT: code[1] = 0xb0000000; mask = 1; break;
1147
1148 case OP_JOINAT: code[1] = 0x60000000; mask = 2; break;
1149 case OP_PREBREAK: code[1] = 0x68000000; mask = 2; break;
1150 case OP_PRECONT: code[1] = 0x70000000; mask = 2; break;
1151 case OP_PRERET: code[1] = 0x78000000; mask = 2; break;
1152
1153 case OP_QUADON: code[1] = 0xc0000000; mask = 0; break;
1154 case OP_QUADPOP: code[1] = 0xc8000000; mask = 0; break;
1155 case OP_BRKPT: code[1] = 0xd0000000; mask = 0; break;
1156 default:
1157 assert(!"invalid flow operation");
1158 return;
1159 }
1160
1161 if (mask & 1) {
1162 emitPredicate(i);
1163 if (i->flagsSrc < 0)
1164 code[0] |= 0x1e0;
1165 }
1166
1167 if (!f)
1168 return;
1169
1170 if (f->allWarp)
1171 code[0] |= 1 << 15;
1172 if (f->limit)
1173 code[0] |= 1 << 16;
1174
1175 if (f->op == OP_CALL) {
1176 if (f->builtin) {
1177 assert(f->absolute);
1178 uint32_t pcAbs = targ->getBuiltinOffset(f->target.builtin);
1179 addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfc000000, 26);
1180 addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x03ffffff, -6);
1181 } else {
1182 assert(!f->absolute);
1183 int32_t pcRel = f->target.fn->binPos - (codeSize + 8);
1184 code[0] |= (pcRel & 0x3f) << 26;
1185 code[1] |= (pcRel >> 6) & 0x3ffff;
1186 }
1187 } else
1188 if (mask & 2) {
1189 int32_t pcRel = f->target.bb->binPos - (codeSize + 8);
1190 // currently we don't want absolute branches
1191 assert(!f->absolute);
1192 code[0] |= (pcRel & 0x3f) << 26;
1193 code[1] |= (pcRel >> 6) & 0x3ffff;
1194 }
1195 }
1196
1197 void
emitPFETCH(const Instruction * i)1198 CodeEmitterNVC0::emitPFETCH(const Instruction *i)
1199 {
1200 uint32_t prim = i->src(0).get()->reg.data.u32;
1201
1202 code[0] = 0x00000006 | ((prim & 0x3f) << 26);
1203 code[1] = 0x00000000 | (prim >> 6);
1204
1205 emitPredicate(i);
1206
1207 defId(i->def(0), 14);
1208 srcId(i->src(1), 20);
1209 }
1210
1211 void
emitVFETCH(const Instruction * i)1212 CodeEmitterNVC0::emitVFETCH(const Instruction *i)
1213 {
1214 code[0] = 0x00000006;
1215 code[1] = 0x06000000 | i->src(0).get()->reg.data.offset;
1216
1217 if (i->perPatch)
1218 code[0] |= 0x100;
1219 if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
1220 code[0] |= 0x200; // yes, TCPs can read from *outputs* of other threads
1221
1222 emitPredicate(i);
1223
1224 code[0] |= ((i->getDef(0)->reg.size / 4) - 1) << 5;
1225
1226 defId(i->def(0), 14);
1227 srcId(i->src(0).getIndirect(0), 20);
1228 srcId(i->src(0).getIndirect(1), 26); // vertex address
1229 }
1230
1231 void
emitEXPORT(const Instruction * i)1232 CodeEmitterNVC0::emitEXPORT(const Instruction *i)
1233 {
1234 unsigned int size = typeSizeof(i->dType);
1235
1236 code[0] = 0x00000006 | ((size / 4 - 1) << 5);
1237 code[1] = 0x0a000000 | i->src(0).get()->reg.data.offset;
1238
1239 assert(!(code[1] & ((size == 12) ? 15 : (size - 1))));
1240
1241 if (i->perPatch)
1242 code[0] |= 0x100;
1243
1244 emitPredicate(i);
1245
1246 assert(i->src(1).getFile() == FILE_GPR);
1247
1248 srcId(i->src(0).getIndirect(0), 20);
1249 srcId(i->src(0).getIndirect(1), 32 + 17); // vertex base address
1250 srcId(i->src(1), 26);
1251 }
1252
1253 void
emitOUT(const Instruction * i)1254 CodeEmitterNVC0::emitOUT(const Instruction *i)
1255 {
1256 code[0] = 0x00000006;
1257 code[1] = 0x1c000000;
1258
1259 emitPredicate(i);
1260
1261 defId(i->def(0), 14); // new secret address
1262 srcId(i->src(0), 20); // old secret address, should be 0 initially
1263
1264 assert(i->src(0).getFile() == FILE_GPR);
1265
1266 if (i->op == OP_EMIT)
1267 code[0] |= 1 << 5;
1268 if (i->op == OP_RESTART || i->subOp == NV50_IR_SUBOP_EMIT_RESTART)
1269 code[0] |= 1 << 6;
1270
1271 // vertex stream
1272 if (i->src(1).getFile() == FILE_IMMEDIATE) {
1273 code[1] |= 0xc000;
1274 code[0] |= SDATA(i->src(1)).u32 << 26;
1275 } else {
1276 srcId(i->src(1), 26);
1277 }
1278 }
1279
1280 void
emitInterpMode(const Instruction * i)1281 CodeEmitterNVC0::emitInterpMode(const Instruction *i)
1282 {
1283 if (i->encSize == 8) {
1284 code[0] |= i->ipa << 6; // TODO: INTERP_SAMPLEID
1285 } else {
1286 if (i->getInterpMode() == NV50_IR_INTERP_SC)
1287 code[0] |= 0x80;
1288 assert(i->op == OP_PINTERP && i->getSampleMode() == 0);
1289 }
1290 }
1291
1292 void
emitINTERP(const Instruction * i)1293 CodeEmitterNVC0::emitINTERP(const Instruction *i)
1294 {
1295 const uint32_t base = i->getSrc(0)->reg.data.offset;
1296
1297 if (i->encSize == 8) {
1298 code[0] = 0x00000000;
1299 code[1] = 0xc0000000 | (base & 0xffff);
1300
1301 if (i->saturate)
1302 code[0] |= 1 << 5;
1303
1304 if (i->op == OP_PINTERP)
1305 srcId(i->src(1), 26);
1306 else
1307 code[0] |= 0x3f << 26;
1308
1309 srcId(i->src(0).getIndirect(0), 20);
1310 } else {
1311 assert(i->op == OP_PINTERP);
1312 code[0] = 0x00000009 | ((base & 0xc) << 6) | ((base >> 4) << 26);
1313 srcId(i->src(1), 20);
1314 }
1315 emitInterpMode(i);
1316
1317 emitPredicate(i);
1318 defId(i->def(0), 14);
1319
1320 if (i->getSampleMode() == NV50_IR_INTERP_OFFSET)
1321 srcId(i->src(i->op == OP_PINTERP ? 2 : 1), 17);
1322 else
1323 code[1] |= 0x3f << 17;
1324 }
1325
1326 void
emitLoadStoreType(DataType ty)1327 CodeEmitterNVC0::emitLoadStoreType(DataType ty)
1328 {
1329 uint8_t val;
1330
1331 switch (ty) {
1332 case TYPE_U8:
1333 val = 0x00;
1334 break;
1335 case TYPE_S8:
1336 val = 0x20;
1337 break;
1338 case TYPE_F16:
1339 case TYPE_U16:
1340 val = 0x40;
1341 break;
1342 case TYPE_S16:
1343 val = 0x60;
1344 break;
1345 case TYPE_F32:
1346 case TYPE_U32:
1347 case TYPE_S32:
1348 val = 0x80;
1349 break;
1350 case TYPE_F64:
1351 case TYPE_U64:
1352 case TYPE_S64:
1353 val = 0xa0;
1354 break;
1355 case TYPE_B128:
1356 val = 0xc0;
1357 break;
1358 default:
1359 val = 0x80;
1360 assert(!"invalid type");
1361 break;
1362 }
1363 code[0] |= val;
1364 }
1365
1366 void
emitCachingMode(CacheMode c)1367 CodeEmitterNVC0::emitCachingMode(CacheMode c)
1368 {
1369 uint32_t val;
1370
1371 switch (c) {
1372 case CACHE_CA:
1373 // case CACHE_WB:
1374 val = 0x000;
1375 break;
1376 case CACHE_CG:
1377 val = 0x100;
1378 break;
1379 case CACHE_CS:
1380 val = 0x200;
1381 break;
1382 case CACHE_CV:
1383 // case CACHE_WT:
1384 val = 0x300;
1385 break;
1386 default:
1387 val = 0;
1388 assert(!"invalid caching mode");
1389 break;
1390 }
1391 code[0] |= val;
1392 }
1393
1394 void
emitSTORE(const Instruction * i)1395 CodeEmitterNVC0::emitSTORE(const Instruction *i)
1396 {
1397 uint32_t opc;
1398
1399 switch (i->src(0).getFile()) {
1400 case FILE_MEMORY_GLOBAL: opc = 0x90000000; break;
1401 case FILE_MEMORY_LOCAL: opc = 0xc8000000; break;
1402 case FILE_MEMORY_SHARED: opc = 0xc9000000; break;
1403 default:
1404 assert(!"invalid memory file");
1405 opc = 0;
1406 break;
1407 }
1408 code[0] = 0x00000005;
1409 code[1] = opc;
1410
1411 setAddress16(i->src(0));
1412 srcId(i->src(1), 14);
1413 srcId(i->src(0).getIndirect(0), 20);
1414
1415 emitPredicate(i);
1416
1417 emitLoadStoreType(i->dType);
1418 emitCachingMode(i->cache);
1419 }
1420
1421 void
emitLOAD(const Instruction * i)1422 CodeEmitterNVC0::emitLOAD(const Instruction *i)
1423 {
1424 uint32_t opc;
1425
1426 code[0] = 0x00000005;
1427
1428 switch (i->src(0).getFile()) {
1429 case FILE_MEMORY_GLOBAL: opc = 0x80000000; break;
1430 case FILE_MEMORY_LOCAL: opc = 0xc0000000; break;
1431 case FILE_MEMORY_SHARED: opc = 0xc1000000; break;
1432 case FILE_MEMORY_CONST:
1433 if (!i->src(0).isIndirect(0) && typeSizeof(i->dType) == 4) {
1434 emitMOV(i); // not sure if this is any better
1435 return;
1436 }
1437 opc = 0x14000000 | (i->src(0).get()->reg.fileIndex << 10);
1438 code[0] = 0x00000006 | (i->subOp << 8);
1439 break;
1440 default:
1441 assert(!"invalid memory file");
1442 opc = 0;
1443 break;
1444 }
1445 code[1] = opc;
1446
1447 defId(i->def(0), 14);
1448
1449 setAddress16(i->src(0));
1450 srcId(i->src(0).getIndirect(0), 20);
1451
1452 emitPredicate(i);
1453
1454 emitLoadStoreType(i->dType);
1455 emitCachingMode(i->cache);
1456 }
1457
1458 uint8_t
getSRegEncoding(const ValueRef & ref)1459 CodeEmitterNVC0::getSRegEncoding(const ValueRef& ref)
1460 {
1461 switch (SDATA(ref).sv.sv) {
1462 case SV_LANEID: return 0x00;
1463 case SV_PHYSID: return 0x03;
1464 case SV_VERTEX_COUNT: return 0x10;
1465 case SV_INVOCATION_ID: return 0x11;
1466 case SV_YDIR: return 0x12;
1467 case SV_TID: return 0x21 + SDATA(ref).sv.index;
1468 case SV_CTAID: return 0x25 + SDATA(ref).sv.index;
1469 case SV_NTID: return 0x29 + SDATA(ref).sv.index;
1470 case SV_GRIDID: return 0x2c;
1471 case SV_NCTAID: return 0x2d + SDATA(ref).sv.index;
1472 case SV_LBASE: return 0x34;
1473 case SV_SBASE: return 0x30;
1474 case SV_CLOCK: return 0x50 + SDATA(ref).sv.index;
1475 default:
1476 assert(!"no sreg for system value");
1477 return 0;
1478 }
1479 }
1480
1481 void
emitMOV(const Instruction * i)1482 CodeEmitterNVC0::emitMOV(const Instruction *i)
1483 {
1484 if (i->src(0).getFile() == FILE_SYSTEM_VALUE) {
1485 uint8_t sr = getSRegEncoding(i->src(0));
1486
1487 if (i->encSize == 8) {
1488 code[0] = 0x00000004 | (sr << 26);
1489 code[1] = 0x2c000000;
1490 } else {
1491 code[0] = 0x40000008 | (sr << 20);
1492 }
1493 defId(i->def(0), 14);
1494
1495 emitPredicate(i);
1496 } else
1497 if (i->encSize == 8) {
1498 uint64_t opc;
1499
1500 if (i->src(0).getFile() == FILE_IMMEDIATE)
1501 opc = HEX64(18000000, 000001e2);
1502 else
1503 if (i->src(0).getFile() == FILE_PREDICATE)
1504 opc = HEX64(080e0000, 1c000004);
1505 else
1506 opc = HEX64(28000000, 00000004);
1507
1508 opc |= i->lanes << 5;
1509
1510 emitForm_B(i, opc);
1511 } else {
1512 uint32_t imm;
1513
1514 if (i->src(0).getFile() == FILE_IMMEDIATE) {
1515 imm = SDATA(i->src(0)).u32;
1516 if (imm & 0xfff00000) {
1517 assert(!(imm & 0x000fffff));
1518 code[0] = 0x00000318 | imm;
1519 } else {
1520 assert(imm < 0x800 || ((int32_t)imm >= -0x800));
1521 code[0] = 0x00000118 | (imm << 20);
1522 }
1523 } else {
1524 code[0] = 0x0028;
1525 emitShortSrc2(i->src(0));
1526 }
1527 defId(i->def(0), 14);
1528
1529 emitPredicate(i);
1530 }
1531 }
1532
1533 bool
emitInstruction(Instruction * insn)1534 CodeEmitterNVC0::emitInstruction(Instruction *insn)
1535 {
1536 unsigned int size = insn->encSize;
1537
1538 if (writeIssueDelays && !(codeSize & 0x3f))
1539 size += 8;
1540
1541 if (!insn->encSize) {
1542 ERROR("skipping unencodable instruction: "); insn->print();
1543 return false;
1544 } else
1545 if (codeSize + size > codeSizeLimit) {
1546 ERROR("code emitter output buffer too small\n");
1547 return false;
1548 }
1549
1550 if (writeIssueDelays) {
1551 if (!(codeSize & 0x3f)) {
1552 code[0] = 0x00000007; // cf issue delay "instruction"
1553 code[1] = 0x20000000;
1554 code += 2;
1555 codeSize += 8;
1556 }
1557 const unsigned int id = (codeSize & 0x3f) / 8 - 1;
1558 uint32_t *data = code - (id * 2 + 2);
1559 if (id <= 2) {
1560 data[0] |= insn->sched << (id * 8 + 4);
1561 } else
1562 if (id == 3) {
1563 data[0] |= insn->sched << 28;
1564 data[1] |= insn->sched >> 4;
1565 } else {
1566 data[1] |= insn->sched << ((id - 4) * 8 + 4);
1567 }
1568 }
1569
1570 // assert that instructions with multiple defs don't corrupt registers
1571 for (int d = 0; insn->defExists(d); ++d)
1572 assert(insn->asTex() || insn->def(d).rep()->reg.data.id >= 0);
1573
1574 switch (insn->op) {
1575 case OP_MOV:
1576 case OP_RDSV:
1577 emitMOV(insn);
1578 break;
1579 case OP_NOP:
1580 break;
1581 case OP_LOAD:
1582 emitLOAD(insn);
1583 break;
1584 case OP_STORE:
1585 emitSTORE(insn);
1586 break;
1587 case OP_LINTERP:
1588 case OP_PINTERP:
1589 emitINTERP(insn);
1590 break;
1591 case OP_VFETCH:
1592 emitVFETCH(insn);
1593 break;
1594 case OP_EXPORT:
1595 emitEXPORT(insn);
1596 break;
1597 case OP_PFETCH:
1598 emitPFETCH(insn);
1599 break;
1600 case OP_EMIT:
1601 case OP_RESTART:
1602 emitOUT(insn);
1603 break;
1604 case OP_ADD:
1605 case OP_SUB:
1606 if (isFloatType(insn->dType))
1607 emitFADD(insn);
1608 else
1609 emitUADD(insn);
1610 break;
1611 case OP_MUL:
1612 if (isFloatType(insn->dType))
1613 emitFMUL(insn);
1614 else
1615 emitUMUL(insn);
1616 break;
1617 case OP_MAD:
1618 case OP_FMA:
1619 if (isFloatType(insn->dType))
1620 emitFMAD(insn);
1621 else
1622 emitIMAD(insn);
1623 break;
1624 case OP_SAD:
1625 emitISAD(insn);
1626 break;
1627 case OP_NOT:
1628 emitNOT(insn);
1629 break;
1630 case OP_AND:
1631 emitLogicOp(insn, 0);
1632 break;
1633 case OP_OR:
1634 emitLogicOp(insn, 1);
1635 break;
1636 case OP_XOR:
1637 emitLogicOp(insn, 2);
1638 break;
1639 case OP_SHL:
1640 case OP_SHR:
1641 emitShift(insn);
1642 break;
1643 case OP_SET:
1644 case OP_SET_AND:
1645 case OP_SET_OR:
1646 case OP_SET_XOR:
1647 emitSET(insn->asCmp());
1648 break;
1649 case OP_SELP:
1650 emitSELP(insn);
1651 break;
1652 case OP_SLCT:
1653 emitSLCT(insn->asCmp());
1654 break;
1655 case OP_MIN:
1656 case OP_MAX:
1657 emitMINMAX(insn);
1658 break;
1659 case OP_ABS:
1660 case OP_NEG:
1661 case OP_CEIL:
1662 case OP_FLOOR:
1663 case OP_TRUNC:
1664 case OP_CVT:
1665 case OP_SAT:
1666 emitCVT(insn);
1667 break;
1668 case OP_RSQ:
1669 emitSFnOp(insn, 5);
1670 break;
1671 case OP_RCP:
1672 emitSFnOp(insn, 4);
1673 break;
1674 case OP_LG2:
1675 emitSFnOp(insn, 3);
1676 break;
1677 case OP_EX2:
1678 emitSFnOp(insn, 2);
1679 break;
1680 case OP_SIN:
1681 emitSFnOp(insn, 1);
1682 break;
1683 case OP_COS:
1684 emitSFnOp(insn, 0);
1685 break;
1686 case OP_PRESIN:
1687 case OP_PREEX2:
1688 emitPreOp(insn);
1689 break;
1690 case OP_TEX:
1691 case OP_TXB:
1692 case OP_TXL:
1693 case OP_TXD:
1694 case OP_TXF:
1695 emitTEX(insn->asTex());
1696 break;
1697 case OP_TXQ:
1698 emitTXQ(insn->asTex());
1699 break;
1700 case OP_TEXBAR:
1701 emitTEXBAR(insn);
1702 break;
1703 case OP_BRA:
1704 case OP_CALL:
1705 case OP_PRERET:
1706 case OP_RET:
1707 case OP_DISCARD:
1708 case OP_EXIT:
1709 case OP_PRECONT:
1710 case OP_CONT:
1711 case OP_PREBREAK:
1712 case OP_BREAK:
1713 case OP_JOINAT:
1714 case OP_BRKPT:
1715 case OP_QUADON:
1716 case OP_QUADPOP:
1717 emitFlow(insn);
1718 break;
1719 case OP_QUADOP:
1720 emitQUADOP(insn, insn->subOp, insn->lanes);
1721 break;
1722 case OP_DFDX:
1723 emitQUADOP(insn, insn->src(0).mod.neg() ? 0x66 : 0x99, 0x4);
1724 break;
1725 case OP_DFDY:
1726 emitQUADOP(insn, insn->src(0).mod.neg() ? 0x5a : 0xa5, 0x5);
1727 break;
1728 case OP_POPCNT:
1729 emitPOPC(insn);
1730 break;
1731 case OP_JOIN:
1732 emitNOP(insn);
1733 insn->join = 1;
1734 break;
1735 case OP_PHI:
1736 case OP_UNION:
1737 case OP_CONSTRAINT:
1738 ERROR("operation should have been eliminated");
1739 return false;
1740 case OP_EXP:
1741 case OP_LOG:
1742 case OP_SQRT:
1743 case OP_POW:
1744 ERROR("operation should have been lowered\n");
1745 return false;
1746 default:
1747 ERROR("unknow op\n");
1748 return false;
1749 }
1750
1751 if (insn->join) {
1752 code[0] |= 0x10;
1753 assert(insn->encSize == 8);
1754 }
1755
1756 code += insn->encSize / 4;
1757 codeSize += insn->encSize;
1758 return true;
1759 }
1760
1761 uint32_t
getMinEncodingSize(const Instruction * i) const1762 CodeEmitterNVC0::getMinEncodingSize(const Instruction *i) const
1763 {
1764 const Target::OpInfo &info = targ->getOpInfo(i);
1765
1766 if (writeIssueDelays || info.minEncSize == 8 || 1)
1767 return 8;
1768
1769 if (i->ftz || i->saturate || i->join)
1770 return 8;
1771 if (i->rnd != ROUND_N)
1772 return 8;
1773 if (i->predSrc >= 0 && i->op == OP_MAD)
1774 return 8;
1775
1776 if (i->op == OP_PINTERP) {
1777 if (i->getSampleMode() || 1) // XXX: grr, short op doesn't work
1778 return 8;
1779 } else
1780 if (i->op == OP_MOV && i->lanes != 0xf) {
1781 return 8;
1782 }
1783
1784 for (int s = 0; i->srcExists(s); ++s) {
1785 if (i->src(s).isIndirect(0))
1786 return 8;
1787
1788 if (i->src(s).getFile() == FILE_MEMORY_CONST) {
1789 if (SDATA(i->src(s)).offset >= 0x100)
1790 return 8;
1791 if (i->getSrc(s)->reg.fileIndex > 1 &&
1792 i->getSrc(s)->reg.fileIndex != 16)
1793 return 8;
1794 } else
1795 if (i->src(s).getFile() == FILE_IMMEDIATE) {
1796 if (i->dType == TYPE_F32) {
1797 if (SDATA(i->src(s)).u32 >= 0x100)
1798 return 8;
1799 } else {
1800 if (SDATA(i->src(s)).u32 > 0xff)
1801 return 8;
1802 }
1803 }
1804
1805 if (i->op == OP_CVT)
1806 continue;
1807 if (i->src(s).mod != Modifier(0)) {
1808 if (i->src(s).mod == Modifier(NV50_IR_MOD_ABS))
1809 if (i->op != OP_RSQ)
1810 return 8;
1811 if (i->src(s).mod == Modifier(NV50_IR_MOD_NEG))
1812 if (i->op != OP_ADD || s != 0)
1813 return 8;
1814 }
1815 }
1816
1817 return 4;
1818 }
1819
1820 // Simplified, erring on safe side.
1821 class SchedDataCalculator : public Pass
1822 {
1823 public:
SchedDataCalculator(const Target * targ)1824 SchedDataCalculator(const Target *targ) : targ(targ) { }
1825
1826 private:
1827 struct RegScores
1828 {
1829 struct Resource {
1830 int st[DATA_FILE_COUNT]; // LD to LD delay 3
1831 int ld[DATA_FILE_COUNT]; // ST to ST delay 3
1832 int tex; // TEX to non-TEX delay 17 (0x11)
1833 int sfu; // SFU to SFU delay 3 (except PRE-ops)
1834 int imul; // integer MUL to MUL delay 3
1835 } res;
1836 struct ScoreData {
1837 int r[64];
1838 int p[8];
1839 int c;
1840 } rd, wr;
1841 int base;
1842
rebasenv50_ir::SchedDataCalculator::RegScores1843 void rebase(const int base)
1844 {
1845 const int delta = this->base - base;
1846 if (!delta)
1847 return;
1848 this->base = 0;
1849
1850 for (int i = 0; i < 64; ++i) {
1851 rd.r[i] += delta;
1852 wr.r[i] += delta;
1853 }
1854 for (int i = 0; i < 8; ++i) {
1855 rd.p[i] += delta;
1856 wr.p[i] += delta;
1857 }
1858 rd.c += delta;
1859 wr.c += delta;
1860
1861 for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
1862 res.ld[f] += delta;
1863 res.st[f] += delta;
1864 }
1865 res.sfu += delta;
1866 res.imul += delta;
1867 res.tex += delta;
1868 }
wipenv50_ir::SchedDataCalculator::RegScores1869 void wipe()
1870 {
1871 memset(&rd, 0, sizeof(rd));
1872 memset(&wr, 0, sizeof(wr));
1873 memset(&res, 0, sizeof(res));
1874 }
getLatestnv50_ir::SchedDataCalculator::RegScores1875 int getLatest(const ScoreData& d) const
1876 {
1877 int max = 0;
1878 for (int i = 0; i < 64; ++i)
1879 if (d.r[i] > max)
1880 max = d.r[i];
1881 for (int i = 0; i < 8; ++i)
1882 if (d.p[i] > max)
1883 max = d.p[i];
1884 if (d.c > max)
1885 max = d.c;
1886 return max;
1887 }
getLatestRdnv50_ir::SchedDataCalculator::RegScores1888 inline int getLatestRd() const
1889 {
1890 return getLatest(rd);
1891 }
getLatestWrnv50_ir::SchedDataCalculator::RegScores1892 inline int getLatestWr() const
1893 {
1894 return getLatest(wr);
1895 }
getLatestnv50_ir::SchedDataCalculator::RegScores1896 inline int getLatest() const
1897 {
1898 const int a = getLatestRd();
1899 const int b = getLatestWr();
1900
1901 int max = MAX2(a, b);
1902 for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
1903 max = MAX2(res.ld[f], max);
1904 max = MAX2(res.st[f], max);
1905 }
1906 max = MAX2(res.sfu, max);
1907 max = MAX2(res.imul, max);
1908 max = MAX2(res.tex, max);
1909 return max;
1910 }
setMaxnv50_ir::SchedDataCalculator::RegScores1911 void setMax(const RegScores *that)
1912 {
1913 for (int i = 0; i < 64; ++i) {
1914 rd.r[i] = MAX2(rd.r[i], that->rd.r[i]);
1915 wr.r[i] = MAX2(wr.r[i], that->wr.r[i]);
1916 }
1917 for (int i = 0; i < 8; ++i) {
1918 rd.p[i] = MAX2(rd.p[i], that->rd.p[i]);
1919 wr.p[i] = MAX2(wr.p[i], that->wr.p[i]);
1920 }
1921 rd.c = MAX2(rd.c, that->rd.c);
1922 wr.c = MAX2(wr.c, that->wr.c);
1923
1924 for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
1925 res.ld[f] = MAX2(res.ld[f], that->res.ld[f]);
1926 res.st[f] = MAX2(res.st[f], that->res.st[f]);
1927 }
1928 res.sfu = MAX2(res.sfu, that->res.sfu);
1929 res.imul = MAX2(res.imul, that->res.imul);
1930 res.tex = MAX2(res.tex, that->res.tex);
1931 }
printnv50_ir::SchedDataCalculator::RegScores1932 void print(int cycle)
1933 {
1934 for (int i = 0; i < 64; ++i) {
1935 if (rd.r[i] > cycle)
1936 INFO("rd $r%i @ %i\n", i, rd.r[i]);
1937 if (wr.r[i] > cycle)
1938 INFO("wr $r%i @ %i\n", i, wr.r[i]);
1939 }
1940 for (int i = 0; i < 8; ++i) {
1941 if (rd.p[i] > cycle)
1942 INFO("rd $p%i @ %i\n", i, rd.p[i]);
1943 if (wr.p[i] > cycle)
1944 INFO("wr $p%i @ %i\n", i, wr.p[i]);
1945 }
1946 if (rd.c > cycle)
1947 INFO("rd $c @ %i\n", rd.c);
1948 if (wr.c > cycle)
1949 INFO("wr $c @ %i\n", wr.c);
1950 if (res.sfu > cycle)
1951 INFO("sfu @ %i\n", res.sfu);
1952 if (res.imul > cycle)
1953 INFO("imul @ %i\n", res.imul);
1954 if (res.tex > cycle)
1955 INFO("tex @ %i\n", res.tex);
1956 }
1957 };
1958
1959 RegScores *score; // for current BB
1960 std::vector<RegScores> scoreBoards;
1961 int cycle;
1962 int prevData;
1963 operation prevOp;
1964
1965 const Target *targ;
1966
1967 bool visit(Function *);
1968 bool visit(BasicBlock *);
1969
1970 void commitInsn(const Instruction *, int cycle);
1971 int calcDelay(const Instruction *, int cycle) const;
1972 void setDelay(Instruction *, int delay, Instruction *next);
1973
1974 void recordRd(const Value *, const int ready);
1975 void recordWr(const Value *, const int ready);
1976 void checkRd(const Value *, int cycle, int& delay) const;
1977 void checkWr(const Value *, int cycle, int& delay) const;
1978
1979 int getCycles(const Instruction *, int origDelay) const;
1980 };
1981
1982 void
setDelay(Instruction * insn,int delay,Instruction * next)1983 SchedDataCalculator::setDelay(Instruction *insn, int delay, Instruction *next)
1984 {
1985 if (insn->op == OP_EXIT)
1986 delay = MAX2(delay, 14);
1987
1988 if (insn->op == OP_TEXBAR) {
1989 // TODO: except if results not used before EXIT
1990 insn->sched = 0xc2;
1991 } else
1992 if (insn->op == OP_JOIN || insn->join) {
1993 insn->sched = 0x00;
1994 } else
1995 if (delay >= 0 || prevData == 0x04 ||
1996 !next || !targ->canDualIssue(insn, next)) {
1997 insn->sched = static_cast<uint8_t>(MAX2(delay, 0));
1998 if (prevOp == OP_EXPORT)
1999 insn->sched |= 0x40;
2000 else
2001 insn->sched |= 0x20;
2002 } else {
2003 insn->sched = 0x04; // dual-issue
2004 }
2005
2006 if (prevData != 0x04 || prevOp != OP_EXPORT)
2007 if (insn->sched != 0x04 || insn->op == OP_EXPORT)
2008 prevOp = insn->op;
2009
2010 prevData = insn->sched;
2011 }
2012
2013 int
getCycles(const Instruction * insn,int origDelay) const2014 SchedDataCalculator::getCycles(const Instruction *insn, int origDelay) const
2015 {
2016 if (insn->sched & 0x80) {
2017 int c = (insn->sched & 0x0f) * 2 + 1;
2018 if (insn->op == OP_TEXBAR && origDelay > 0)
2019 c += origDelay;
2020 return c;
2021 }
2022 if (insn->sched & 0x60)
2023 return (insn->sched & 0x1f) + 1;
2024 return (insn->sched == 0x04) ? 0 : 32;
2025 }
2026
2027 bool
visit(Function * func)2028 SchedDataCalculator::visit(Function *func)
2029 {
2030 scoreBoards.resize(func->cfg.getSize());
2031 for (size_t i = 0; i < scoreBoards.size(); ++i)
2032 scoreBoards[i].wipe();
2033 return true;
2034 }
2035
2036 bool
visit(BasicBlock * bb)2037 SchedDataCalculator::visit(BasicBlock *bb)
2038 {
2039 Instruction *insn;
2040 Instruction *next = NULL;
2041
2042 int cycle = 0;
2043
2044 prevData = 0x00;
2045 prevOp = OP_NOP;
2046 score = &scoreBoards.at(bb->getId());
2047
2048 for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
2049 BasicBlock *in = BasicBlock::get(ei.getNode());
2050 if (in->getExit()) {
2051 if (prevData != 0x04)
2052 prevData = in->getExit()->sched;
2053 prevOp = in->getExit()->op;
2054 }
2055 if (ei.getType() != Graph::Edge::BACK)
2056 score->setMax(&scoreBoards.at(in->getId()));
2057 // back branches will wait until all target dependencies are satisfied
2058 }
2059 if (bb->cfg.incidentCount() > 1)
2060 prevOp = OP_NOP;
2061
2062 #ifdef NVC0_DEBUG_SCHED_DATA
2063 INFO("=== BB:%i initial scores\n", bb->getId());
2064 score->print(cycle);
2065 #endif
2066
2067 for (insn = bb->getEntry(); insn && insn->next; insn = insn->next) {
2068 next = insn->next;
2069
2070 commitInsn(insn, cycle);
2071 int delay = calcDelay(next, cycle);
2072 setDelay(insn, delay, next);
2073 cycle += getCycles(insn, delay);
2074
2075 #ifdef NVC0_DEBUG_SCHED_DATA
2076 INFO("cycle %i, sched %02x\n", cycle, insn->sched);
2077 insn->print();
2078 next->print();
2079 #endif
2080 }
2081 if (!insn)
2082 return true;
2083 commitInsn(insn, cycle);
2084
2085 int bbDelay = -1;
2086
2087 for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
2088 BasicBlock *out = BasicBlock::get(ei.getNode());
2089
2090 if (ei.getType() != Graph::Edge::BACK) {
2091 // only test the first instruction of the outgoing block
2092 next = out->getEntry();
2093 if (next)
2094 bbDelay = MAX2(bbDelay, calcDelay(next, cycle));
2095 } else {
2096 // wait until all dependencies are satisfied
2097 const int regsFree = score->getLatest();
2098 next = out->getFirst();
2099 for (int c = cycle; next && c < regsFree; next = next->next) {
2100 bbDelay = MAX2(bbDelay, calcDelay(next, c));
2101 c += getCycles(next, bbDelay);
2102 }
2103 next = NULL;
2104 }
2105 }
2106 if (bb->cfg.outgoingCount() != 1)
2107 next = NULL;
2108 setDelay(insn, bbDelay, next);
2109 cycle += getCycles(insn, bbDelay);
2110
2111 score->rebase(cycle); // common base for initializing out blocks' scores
2112 return true;
2113 }
2114
2115 #define NVE4_MAX_ISSUE_DELAY 0x1f
2116 int
calcDelay(const Instruction * insn,int cycle) const2117 SchedDataCalculator::calcDelay(const Instruction *insn, int cycle) const
2118 {
2119 int delay = 0, ready = cycle;
2120
2121 for (int s = 0; insn->srcExists(s); ++s)
2122 checkRd(insn->getSrc(s), cycle, delay);
2123 // WAR & WAW don't seem to matter
2124 // for (int s = 0; insn->srcExists(s); ++s)
2125 // recordRd(insn->getSrc(s), cycle);
2126
2127 switch (Target::getOpClass(insn->op)) {
2128 case OPCLASS_SFU:
2129 ready = score->res.sfu;
2130 break;
2131 case OPCLASS_ARITH:
2132 if (insn->op == OP_MUL && !isFloatType(insn->dType))
2133 ready = score->res.imul;
2134 break;
2135 case OPCLASS_TEXTURE:
2136 ready = score->res.tex;
2137 break;
2138 case OPCLASS_LOAD:
2139 ready = score->res.ld[insn->src(0).getFile()];
2140 break;
2141 case OPCLASS_STORE:
2142 ready = score->res.st[insn->src(0).getFile()];
2143 break;
2144 default:
2145 break;
2146 }
2147 if (Target::getOpClass(insn->op) != OPCLASS_TEXTURE)
2148 ready = MAX2(ready, score->res.tex);
2149
2150 delay = MAX2(delay, ready - cycle);
2151
2152 // if can issue next cycle, delay is 0, not 1
2153 return MIN2(delay - 1, NVE4_MAX_ISSUE_DELAY);
2154 }
2155
2156 void
commitInsn(const Instruction * insn,int cycle)2157 SchedDataCalculator::commitInsn(const Instruction *insn, int cycle)
2158 {
2159 const int ready = cycle + targ->getLatency(insn);
2160
2161 for (int d = 0; insn->defExists(d); ++d)
2162 recordWr(insn->getDef(d), ready);
2163 // WAR & WAW don't seem to matter
2164 // for (int s = 0; insn->srcExists(s); ++s)
2165 // recordRd(insn->getSrc(s), cycle);
2166
2167 switch (Target::getOpClass(insn->op)) {
2168 case OPCLASS_SFU:
2169 score->res.sfu = cycle + 4;
2170 break;
2171 case OPCLASS_ARITH:
2172 if (insn->op == OP_MUL && !isFloatType(insn->dType))
2173 score->res.imul = cycle + 4;
2174 break;
2175 case OPCLASS_TEXTURE:
2176 score->res.tex = cycle + 18;
2177 break;
2178 case OPCLASS_LOAD:
2179 if (insn->src(0).getFile() == FILE_MEMORY_CONST)
2180 break;
2181 score->res.ld[insn->src(0).getFile()] = cycle + 4;
2182 score->res.st[insn->src(0).getFile()] = ready;
2183 break;
2184 case OPCLASS_STORE:
2185 score->res.st[insn->src(0).getFile()] = cycle + 4;
2186 score->res.ld[insn->src(0).getFile()] = ready;
2187 break;
2188 case OPCLASS_OTHER:
2189 if (insn->op == OP_TEXBAR)
2190 score->res.tex = cycle;
2191 break;
2192 default:
2193 break;
2194 }
2195
2196 #ifdef NVC0_DEBUG_SCHED_DATA
2197 score->print(cycle);
2198 #endif
2199 }
2200
2201 void
checkRd(const Value * v,int cycle,int & delay) const2202 SchedDataCalculator::checkRd(const Value *v, int cycle, int& delay) const
2203 {
2204 int ready = cycle;
2205 int a, b;
2206
2207 switch (v->reg.file) {
2208 case FILE_GPR:
2209 a = v->reg.data.id;
2210 b = a + v->reg.size / 4;
2211 for (int r = a; r < b; ++r)
2212 ready = MAX2(ready, score->rd.r[r]);
2213 break;
2214 case FILE_PREDICATE:
2215 ready = MAX2(ready, score->rd.p[v->reg.data.id]);
2216 break;
2217 case FILE_FLAGS:
2218 ready = MAX2(ready, score->rd.c);
2219 break;
2220 case FILE_SHADER_INPUT:
2221 case FILE_SHADER_OUTPUT: // yes, TCPs can read outputs
2222 case FILE_MEMORY_LOCAL:
2223 case FILE_MEMORY_CONST:
2224 case FILE_MEMORY_SHARED:
2225 case FILE_MEMORY_GLOBAL:
2226 case FILE_SYSTEM_VALUE:
2227 // TODO: any restrictions here ?
2228 break;
2229 case FILE_IMMEDIATE:
2230 break;
2231 default:
2232 assert(0);
2233 break;
2234 }
2235 if (cycle < ready)
2236 delay = MAX2(delay, ready - cycle);
2237 }
2238
2239 void
checkWr(const Value * v,int cycle,int & delay) const2240 SchedDataCalculator::checkWr(const Value *v, int cycle, int& delay) const
2241 {
2242 int ready = cycle;
2243 int a, b;
2244
2245 switch (v->reg.file) {
2246 case FILE_GPR:
2247 a = v->reg.data.id;
2248 b = a + v->reg.size / 4;
2249 for (int r = a; r < b; ++r)
2250 ready = MAX2(ready, score->wr.r[r]);
2251 break;
2252 case FILE_PREDICATE:
2253 ready = MAX2(ready, score->wr.p[v->reg.data.id]);
2254 break;
2255 default:
2256 assert(v->reg.file == FILE_FLAGS);
2257 ready = MAX2(ready, score->wr.c);
2258 break;
2259 }
2260 if (cycle < ready)
2261 delay = MAX2(delay, ready - cycle);
2262 }
2263
2264 void
recordWr(const Value * v,const int ready)2265 SchedDataCalculator::recordWr(const Value *v, const int ready)
2266 {
2267 int a = v->reg.data.id;
2268
2269 if (v->reg.file == FILE_GPR) {
2270 int b = a + v->reg.size / 4;
2271 for (int r = a; r < b; ++r)
2272 score->rd.r[r] = ready;
2273 } else
2274 // $c, $pX: shorter issue-to-read delay (at least as exec pred and carry)
2275 if (v->reg.file == FILE_PREDICATE) {
2276 score->rd.p[a] = ready + 4;
2277 } else {
2278 assert(v->reg.file == FILE_FLAGS);
2279 score->rd.c = ready + 4;
2280 }
2281 }
2282
2283 void
recordRd(const Value * v,const int ready)2284 SchedDataCalculator::recordRd(const Value *v, const int ready)
2285 {
2286 int a = v->reg.data.id;
2287
2288 if (v->reg.file == FILE_GPR) {
2289 int b = a + v->reg.size / 4;
2290 for (int r = a; r < b; ++r)
2291 score->wr.r[r] = ready;
2292 } else
2293 if (v->reg.file == FILE_PREDICATE) {
2294 score->wr.p[a] = ready;
2295 } else
2296 if (v->reg.file == FILE_FLAGS) {
2297 score->wr.c = ready;
2298 }
2299 }
2300
2301 void
prepareEmission(Function * func)2302 CodeEmitterNVC0::prepareEmission(Function *func)
2303 {
2304 const Target *targ = func->getProgram()->getTarget();
2305
2306 CodeEmitter::prepareEmission(func);
2307
2308 if (targ->hasSWSched) {
2309 SchedDataCalculator sched(targ);
2310 sched.run(func, true, true);
2311 }
2312 }
2313
CodeEmitterNVC0(const TargetNVC0 * target)2314 CodeEmitterNVC0::CodeEmitterNVC0(const TargetNVC0 *target)
2315 : CodeEmitter(target),
2316 writeIssueDelays(target->hasSWSched)
2317 {
2318 code = NULL;
2319 codeSize = codeSizeLimit = 0;
2320 relocInfo = NULL;
2321 }
2322
2323 CodeEmitter *
getCodeEmitter(Program::Type type)2324 TargetNVC0::getCodeEmitter(Program::Type type)
2325 {
2326 CodeEmitterNVC0 *emit = new CodeEmitterNVC0(this);
2327 emit->setProgramType(type);
2328 return emit;
2329 }
2330
2331 } // namespace nv50_ir
2332