1 /*
2  * Copyright 2011 Christoph Bumiller
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include "codegen/nv50_ir.h"
24 #include "codegen/nv50_ir_target.h"
25 
26 namespace nv50_ir {
27 
28 const uint8_t Target::operationSrcNr[] =
29 {
30    0, 0,                   // NOP, PHI
31    0, 0, 0, 0,             // UNION, SPLIT, MERGE, CONSTRAINT
32    1, 1, 2,                // MOV, LOAD, STORE
33    2, 2, 2, 2, 2, 3, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD, SHLADD
34    1, 1, 1,                // ABS, NEG, NOT
35    2, 2, 2, 2, 2,          // AND, OR, XOR, SHL, SHR
36    2, 2, 1,                // MAX, MIN, SAT
37    1, 1, 1, 1,             // CEIL, FLOOR, TRUNC, CVT
38    3, 3, 3, 2, 3, 3,       // SET_AND,OR,XOR, SET, SELP, SLCT
39    1, 1, 1, 1, 1, 1,       // RCP, RSQ, LG2, SIN, COS, EX2
40    1, 1, 1, 1, 1, 2,       // EXP, LOG, PRESIN, PREEX2, SQRT, POW
41    0, 0, 0, 0, 0,          // BRA, CALL, RET, CONT, BREAK,
42    0, 0, 0,                // PRERET,CONT,BREAK
43    0, 0, 0, 0, 0, 0,       // BRKPT, JOINAT, JOIN, DISCARD, EXIT, MEMBAR
44    1, 1, 1, 2, 1, 2,       // VFETCH, PFETCH, AFETCH, EXPORT, LINTERP, PINTERP
45    1, 1,                   // EMIT, RESTART
46    1, 1, 1,                // TEX, TXB, TXL,
47    1, 1, 1, 1, 1, 1, 2,    // TXF, TXQ, TXD, TXG, TXLQ, TEXCSAA, TEXPREP
48    1, 1, 2, 2, 2, 2, 2,    // SULDB, SULDP, SUSTB, SUSTP, SUREDB, SUREDP, SULEA
49    3, 3, 3, 1, 3,          // SUBFM, SUCLAMP, SUEAU, SUQ, MADSP
50    0,                      // TEXBAR
51    1, 1,                   // DFDX, DFDY
52    1, 2, 1, 2, 0, 0,       // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP
53    2, 3, 2, 1, 3,          // POPCNT, INSBF, EXTBF, BFIND, PERMT
54    2, 2,                   // ATOM, BAR
55    2, 2, 2, 2, 3, 2,       // VADD, VAVG, VMIN, VMAX, VSAD, VSET,
56    2, 2, 2, 1,             // VSHR, VSHL, VSEL, CCTL
57    3,                      // SHFL
58    1,                      // VOTE
59    1,                      // BUFQ
60    0
61 };
62 
63 const OpClass Target::operationClass[] =
64 {
65    // NOP; PHI; UNION, SPLIT, MERGE, CONSTRAINT
66    OPCLASS_OTHER,
67    OPCLASS_PSEUDO,
68    OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO,
69    // MOV; LOAD; STORE
70    OPCLASS_MOVE,
71    OPCLASS_LOAD,
72    OPCLASS_STORE,
73    // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD, SHLADD
74    OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
75    OPCLASS_ARITH, OPCLASS_ARITH,
76    OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
77    // ABS, NEG; NOT, AND, OR, XOR; SHL, SHR
78    OPCLASS_CONVERT, OPCLASS_CONVERT,
79    OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC,
80    OPCLASS_SHIFT, OPCLASS_SHIFT,
81    // MAX, MIN
82    OPCLASS_COMPARE, OPCLASS_COMPARE,
83    // SAT, CEIL, FLOOR, TRUNC; CVT
84    OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT,
85    OPCLASS_CONVERT,
86    // SET(AND,OR,XOR); SELP, SLCT
87    OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE,
88    OPCLASS_COMPARE, OPCLASS_COMPARE,
89    // RCP, RSQ, LG2, SIN, COS; EX2, EXP, LOG, PRESIN, PREEX2; SQRT, POW
90    OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU,
91    OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU,
92    OPCLASS_SFU, OPCLASS_SFU,
93    // BRA, CALL, RET; CONT, BREAK, PRE(RET,CONT,BREAK); BRKPT, JOINAT, JOIN
94    OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
95    OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
96    OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
97    // DISCARD, EXIT
98    OPCLASS_FLOW, OPCLASS_FLOW,
99    // MEMBAR
100    OPCLASS_CONTROL,
101    // VFETCH, PFETCH, AFETCH, EXPORT
102    OPCLASS_LOAD, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_STORE,
103    // LINTERP, PINTERP
104    OPCLASS_SFU, OPCLASS_SFU,
105    // EMIT, RESTART
106    OPCLASS_CONTROL, OPCLASS_CONTROL,
107    // TEX, TXB, TXL, TXF; TXQ, TXD, TXG, TXLQ; TEXCSAA, TEXPREP
108    OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
109    OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
110    OPCLASS_TEXTURE, OPCLASS_TEXTURE,
111    // SULDB, SULDP, SUSTB, SUSTP; SUREDB, SUREDP, SULEA
112    OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_ATOMIC, OPCLASS_SURFACE,
113    OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_SURFACE,
114    // SUBFM, SUCLAMP, SUEAU, SUQ, MADSP
115    OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_ARITH,
116    // TEXBAR
117    OPCLASS_OTHER,
118    // DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP
119    OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER,
120    OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_CONTROL, OPCLASS_CONTROL,
121    // POPCNT, INSBF, EXTBF, BFIND; PERMT
122    OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD,
123    OPCLASS_BITFIELD,
124    // ATOM, BAR
125    OPCLASS_ATOMIC, OPCLASS_CONTROL,
126    // VADD, VAVG, VMIN, VMAX
127    OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR,
128    // VSAD, VSET, VSHR, VSHL
129    OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR,
130    // VSEL, CCTL
131    OPCLASS_VECTOR, OPCLASS_CONTROL,
132    // SHFL
133    OPCLASS_OTHER,
134    // VOTE
135    OPCLASS_OTHER,
136    // BUFQ
137    OPCLASS_OTHER,
138    OPCLASS_PSEUDO // LAST
139 };
140 
141 
142 extern Target *getTargetGM107(unsigned int chipset);
143 extern Target *getTargetNVC0(unsigned int chipset);
144 extern Target *getTargetNV50(unsigned int chipset);
145 
create(unsigned int chipset)146 Target *Target::create(unsigned int chipset)
147 {
148    STATIC_ASSERT(ARRAY_SIZE(operationSrcNr) == OP_LAST + 1);
149    STATIC_ASSERT(ARRAY_SIZE(operationClass) == OP_LAST + 1);
150    switch (chipset & ~0xf) {
151    case 0x110:
152    case 0x120:
153    case 0x130:
154       return getTargetGM107(chipset);
155    case 0xc0:
156    case 0xd0:
157    case 0xe0:
158    case 0xf0:
159    case 0x100:
160       return getTargetNVC0(chipset);
161    case 0x50:
162    case 0x80:
163    case 0x90:
164    case 0xa0:
165       return getTargetNV50(chipset);
166    default:
167       ERROR("unsupported target: NV%x\n", chipset);
168       return 0;
169    }
170 }
171 
destroy(Target * targ)172 void Target::destroy(Target *targ)
173 {
174    delete targ;
175 }
176 
CodeEmitter(const Target * target)177 CodeEmitter::CodeEmitter(const Target *target) : targ(target), fixupInfo(NULL)
178 {
179 }
180 
181 void
setCodeLocation(void * ptr,uint32_t size)182 CodeEmitter::setCodeLocation(void *ptr, uint32_t size)
183 {
184    code = reinterpret_cast<uint32_t *>(ptr);
185    codeSize = 0;
186    codeSizeLimit = size;
187 }
188 
189 void
printBinary() const190 CodeEmitter::printBinary() const
191 {
192    uint32_t *bin = code - codeSize / 4;
193    INFO("program binary (%u bytes)", codeSize);
194    for (unsigned int pos = 0; pos < codeSize / 4; ++pos) {
195       if ((pos % 8) == 0)
196          INFO("\n");
197       INFO("%08x ", bin[pos]);
198    }
199    INFO("\n");
200 }
201 
sizeToBundlesNVE4(uint32_t size)202 static inline uint32_t sizeToBundlesNVE4(uint32_t size)
203 {
204    return (size + 55) / 56;
205 }
206 
207 void
prepareEmission(Program * prog)208 CodeEmitter::prepareEmission(Program *prog)
209 {
210    for (ArrayList::Iterator fi = prog->allFuncs.iterator();
211         !fi.end(); fi.next()) {
212       Function *func = reinterpret_cast<Function *>(fi.get());
213       func->binPos = prog->binSize;
214       prepareEmission(func);
215 
216       // adjust sizes & positions for schedulding info:
217       if (prog->getTarget()->hasSWSched) {
218          uint32_t adjPos = func->binPos;
219          BasicBlock *bb = NULL;
220          for (int i = 0; i < func->bbCount; ++i) {
221             bb = func->bbArray[i];
222             int32_t adjSize = bb->binSize;
223             if (adjPos % 64) {
224                adjSize -= 64 - adjPos % 64;
225                if (adjSize < 0)
226                   adjSize = 0;
227             }
228             adjSize = bb->binSize + sizeToBundlesNVE4(adjSize) * 8;
229             bb->binPos = adjPos;
230             bb->binSize = adjSize;
231             adjPos += adjSize;
232          }
233          if (bb)
234             func->binSize = adjPos - func->binPos;
235       }
236 
237       prog->binSize += func->binSize;
238    }
239 }
240 
241 void
prepareEmission(Function * func)242 CodeEmitter::prepareEmission(Function *func)
243 {
244    func->bbCount = 0;
245    func->bbArray = new BasicBlock * [func->cfg.getSize()];
246 
247    BasicBlock::get(func->cfg.getRoot())->binPos = func->binPos;
248 
249    for (IteratorRef it = func->cfg.iteratorCFG(); !it->end(); it->next())
250       prepareEmission(BasicBlock::get(*it));
251 }
252 
253 void
prepareEmission(BasicBlock * bb)254 CodeEmitter::prepareEmission(BasicBlock *bb)
255 {
256    Instruction *i, *next;
257    Function *func = bb->getFunction();
258    int j;
259    unsigned int nShort;
260 
261    for (j = func->bbCount - 1; j >= 0 && !func->bbArray[j]->binSize; --j);
262 
263    for (; j >= 0; --j) {
264       BasicBlock *in = func->bbArray[j];
265       Instruction *exit = in->getExit();
266 
267       if (exit && exit->op == OP_BRA && exit->asFlow()->target.bb == bb) {
268          in->binSize -= 8;
269          func->binSize -= 8;
270 
271          for (++j; j < func->bbCount; ++j)
272             func->bbArray[j]->binPos -= 8;
273 
274          in->remove(exit);
275       }
276       bb->binPos = in->binPos + in->binSize;
277       if (in->binSize) // no more no-op branches to bb
278          break;
279    }
280    func->bbArray[func->bbCount++] = bb;
281 
282    if (!bb->getExit())
283       return;
284 
285    // determine encoding size, try to group short instructions
286    nShort = 0;
287    for (i = bb->getEntry(); i; i = next) {
288       next = i->next;
289 
290       if (i->op == OP_MEMBAR && !targ->isOpSupported(OP_MEMBAR, TYPE_NONE)) {
291          bb->remove(i);
292          continue;
293       }
294 
295       i->encSize = getMinEncodingSize(i);
296       if (next && i->encSize < 8)
297          ++nShort;
298       else
299       if ((nShort & 1) && next && getMinEncodingSize(next) == 4) {
300          if (i->isCommutationLegal(i->next)) {
301             bb->permuteAdjacent(i, next);
302             next->encSize = 4;
303             next = i;
304             i = i->prev;
305             ++nShort;
306          } else
307          if (i->isCommutationLegal(i->prev) && next->next) {
308             bb->permuteAdjacent(i->prev, i);
309             next->encSize = 4;
310             next = next->next;
311             bb->binSize += 4;
312             ++nShort;
313          } else {
314             i->encSize = 8;
315             i->prev->encSize = 8;
316             bb->binSize += 4;
317             nShort = 0;
318          }
319       } else {
320          i->encSize = 8;
321          if (nShort & 1) {
322             i->prev->encSize = 8;
323             bb->binSize += 4;
324          }
325          nShort = 0;
326       }
327       bb->binSize += i->encSize;
328    }
329 
330    if (bb->getExit()->encSize == 4) {
331       assert(nShort);
332       bb->getExit()->encSize = 8;
333       bb->binSize += 4;
334 
335       if ((bb->getExit()->prev->encSize == 4) && !(nShort & 1)) {
336          bb->binSize += 8;
337          bb->getExit()->prev->encSize = 8;
338       }
339    }
340    assert(!bb->getEntry() || (bb->getExit() && bb->getExit()->encSize == 8));
341 
342    func->binSize += bb->binSize;
343 }
344 
345 void
emitSymbolTable(struct nv50_ir_prog_info * info)346 Program::emitSymbolTable(struct nv50_ir_prog_info *info)
347 {
348    unsigned int n = 0, nMax = allFuncs.getSize();
349 
350    info->bin.syms =
351       (struct nv50_ir_prog_symbol *)MALLOC(nMax * sizeof(*info->bin.syms));
352 
353    for (ArrayList::Iterator fi = allFuncs.iterator();
354         !fi.end();
355         fi.next(), ++n) {
356       Function *f = (Function *)fi.get();
357       assert(n < nMax);
358 
359       info->bin.syms[n].label = f->getLabel();
360       info->bin.syms[n].offset = f->binPos;
361    }
362 
363    info->bin.numSyms = n;
364 }
365 
366 bool
emitBinary(struct nv50_ir_prog_info * info)367 Program::emitBinary(struct nv50_ir_prog_info *info)
368 {
369    CodeEmitter *emit = target->getCodeEmitter(progType);
370 
371    emit->prepareEmission(this);
372 
373    if (dbgFlags & NV50_IR_DEBUG_BASIC)
374       this->print();
375 
376    if (!binSize) {
377       code = NULL;
378       return false;
379    }
380    code = reinterpret_cast<uint32_t *>(MALLOC(binSize));
381    if (!code)
382       return false;
383    emit->setCodeLocation(code, binSize);
384    info->bin.instructions = 0;
385 
386    for (ArrayList::Iterator fi = allFuncs.iterator(); !fi.end(); fi.next()) {
387       Function *fn = reinterpret_cast<Function *>(fi.get());
388 
389       assert(emit->getCodeSize() == fn->binPos);
390 
391       for (int b = 0; b < fn->bbCount; ++b) {
392          for (Instruction *i = fn->bbArray[b]->getEntry(); i; i = i->next) {
393             emit->emitInstruction(i);
394             info->bin.instructions++;
395             if ((typeSizeof(i->sType) == 8 || typeSizeof(i->dType) == 8) &&
396                 (isFloatType(i->sType) || isFloatType(i->dType)))
397                info->io.fp64 = true;
398          }
399       }
400    }
401    info->bin.relocData = emit->getRelocInfo();
402    info->bin.fixupData = emit->getFixupInfo();
403 
404    emitSymbolTable(info);
405 
406    // the nvc0 driver will print the binary iself together with the header
407    if ((dbgFlags & NV50_IR_DEBUG_BASIC) && getTarget()->getChipset() < 0xc0)
408       emit->printBinary();
409 
410    delete emit;
411    return true;
412 }
413 
414 #define RELOC_ALLOC_INCREMENT 8
415 
416 bool
addReloc(RelocEntry::Type ty,int w,uint32_t data,uint32_t m,int s)417 CodeEmitter::addReloc(RelocEntry::Type ty, int w, uint32_t data, uint32_t m,
418                       int s)
419 {
420    unsigned int n = relocInfo ? relocInfo->count : 0;
421 
422    if (!(n % RELOC_ALLOC_INCREMENT)) {
423       size_t size = sizeof(RelocInfo) + n * sizeof(RelocEntry);
424       relocInfo = reinterpret_cast<RelocInfo *>(
425          REALLOC(relocInfo, n ? size : 0,
426                  size + RELOC_ALLOC_INCREMENT * sizeof(RelocEntry)));
427       if (!relocInfo)
428          return false;
429       if (n == 0)
430          memset(relocInfo, 0, sizeof(RelocInfo));
431    }
432    ++relocInfo->count;
433 
434    relocInfo->entry[n].data = data;
435    relocInfo->entry[n].mask = m;
436    relocInfo->entry[n].offset = codeSize + w * 4;
437    relocInfo->entry[n].bitPos = s;
438    relocInfo->entry[n].type = ty;
439 
440    return true;
441 }
442 
443 bool
addInterp(int ipa,int reg,FixupApply apply)444 CodeEmitter::addInterp(int ipa, int reg, FixupApply apply)
445 {
446    unsigned int n = fixupInfo ? fixupInfo->count : 0;
447 
448    if (!(n % RELOC_ALLOC_INCREMENT)) {
449       size_t size = sizeof(FixupInfo) + n * sizeof(FixupEntry);
450       fixupInfo = reinterpret_cast<FixupInfo *>(
451          REALLOC(fixupInfo, n ? size : 0,
452                  size + RELOC_ALLOC_INCREMENT * sizeof(FixupEntry)));
453       if (!fixupInfo)
454          return false;
455       if (n == 0)
456          memset(fixupInfo, 0, sizeof(FixupInfo));
457    }
458    ++fixupInfo->count;
459 
460    fixupInfo->entry[n] = FixupEntry(apply, ipa, reg, codeSize >> 2);
461 
462    return true;
463 }
464 
465 void
apply(uint32_t * binary,const RelocInfo * info) const466 RelocEntry::apply(uint32_t *binary, const RelocInfo *info) const
467 {
468    uint32_t value = 0;
469 
470    switch (type) {
471    case TYPE_CODE: value = info->codePos; break;
472    case TYPE_BUILTIN: value = info->libPos; break;
473    case TYPE_DATA: value = info->dataPos; break;
474    default:
475       assert(0);
476       break;
477    }
478    value += data;
479    value = (bitPos < 0) ? (value >> -bitPos) : (value << bitPos);
480 
481    binary[offset / 4] &= ~mask;
482    binary[offset / 4] |= value & mask;
483 }
484 
485 } // namespace nv50_ir
486 
487 
488 #include "codegen/nv50_ir_driver.h"
489 
490 extern "C" {
491 
492 void
nv50_ir_relocate_code(void * relocData,uint32_t * code,uint32_t codePos,uint32_t libPos,uint32_t dataPos)493 nv50_ir_relocate_code(void *relocData, uint32_t *code,
494                       uint32_t codePos,
495                       uint32_t libPos,
496                       uint32_t dataPos)
497 {
498    nv50_ir::RelocInfo *info = reinterpret_cast<nv50_ir::RelocInfo *>(relocData);
499 
500    info->codePos = codePos;
501    info->libPos = libPos;
502    info->dataPos = dataPos;
503 
504    for (unsigned int i = 0; i < info->count; ++i)
505       info->entry[i].apply(code, info);
506 }
507 
508 void
nv50_ir_apply_fixups(void * fixupData,uint32_t * code,bool force_persample_interp,bool flatshade,uint8_t alphatest)509 nv50_ir_apply_fixups(void *fixupData, uint32_t *code,
510                      bool force_persample_interp, bool flatshade,
511                      uint8_t alphatest)
512 {
513    nv50_ir::FixupInfo *info = reinterpret_cast<nv50_ir::FixupInfo *>(
514       fixupData);
515 
516    // force_persample_interp: all non-flat -> per-sample
517    // flatshade: all color -> flat
518    // alphatest: PIPE_FUNC_* to use with alphatest
519    nv50_ir::FixupData data(force_persample_interp, flatshade, alphatest);
520    for (unsigned i = 0; i < info->count; ++i)
521       info->entry[i].apply(&info->entry[i], code, data);
522 }
523 
524 void
nv50_ir_get_target_library(uint32_t chipset,const uint32_t ** code,uint32_t * size)525 nv50_ir_get_target_library(uint32_t chipset,
526                            const uint32_t **code, uint32_t *size)
527 {
528    nv50_ir::Target *targ = nv50_ir::Target::create(chipset);
529    targ->getBuiltinCode(code, size);
530    nv50_ir::Target::destroy(targ);
531 }
532 
533 }
534