1 /*
2  * Copyright 2011 Christoph Bumiller
3  *           2014 Red Hat Inc.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21  * OTHER DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "codegen/nv50_ir_target_gm107.h"
25 #include "codegen/nv50_ir_lowering_gm107.h"
26 
27 namespace nv50_ir {
28 
getTargetGM107(unsigned int chipset)29 Target *getTargetGM107(unsigned int chipset)
30 {
31    return new TargetGM107(chipset);
32 }
33 
34 // BULTINS / LIBRARY FUNCTIONS:
35 
36 // lazyness -> will just hardcode everything for the time being
37 
38 #include "lib/gm107.asm.h"
39 
40 void
getBuiltinCode(const uint32_t ** code,uint32_t * size) const41 TargetGM107::getBuiltinCode(const uint32_t **code, uint32_t *size) const
42 {
43    *code = (const uint32_t *)&gm107_builtin_code[0];
44    *size = sizeof(gm107_builtin_code);
45 }
46 
47 uint32_t
getBuiltinOffset(int builtin) const48 TargetGM107::getBuiltinOffset(int builtin) const
49 {
50    assert(builtin < NVC0_BUILTIN_COUNT);
51    return gm107_builtin_offsets[builtin];
52 }
53 
54 bool
isOpSupported(operation op,DataType ty) const55 TargetGM107::isOpSupported(operation op, DataType ty) const
56 {
57    switch (op) {
58    case OP_SAD:
59    case OP_POW:
60    case OP_SQRT:
61    case OP_DIV:
62    case OP_MOD:
63       return false;
64    default:
65       break;
66    }
67 
68    return true;
69 }
70 
71 // Return true when an instruction supports the reuse flag. When supported, the
72 // hardware will use the operand reuse cache introduced since Maxwell, which
73 // should try to reduce bank conflicts by caching values for the subsequent
74 // instructions. Note that the next instructions have to use the same GPR id in
75 // the same operand slot.
76 bool
isReuseSupported(const Instruction * insn) const77 TargetGM107::isReuseSupported(const Instruction *insn) const
78 {
79    const OpClass cl = getOpClass(insn->op);
80 
81    // TODO: double-check!
82    switch (cl) {
83    case OPCLASS_ARITH:
84    case OPCLASS_COMPARE:
85    case OPCLASS_LOGIC:
86    case OPCLASS_MOVE:
87    case OPCLASS_SHIFT:
88       return true;
89    case OPCLASS_BITFIELD:
90       if (insn->op == OP_INSBF || insn->op == OP_EXTBF)
91          return true;
92       break;
93    default:
94       break;
95    }
96    return false;
97 }
98 
99 // Return true when an instruction requires to set up a barrier because it
100 // doesn't operate at a fixed latency. Variable latency instructions are memory
101 // operations, double precision operations, special function unit operations
102 // and other low throughput instructions.
103 bool
isBarrierRequired(const Instruction * insn) const104 TargetGM107::isBarrierRequired(const Instruction *insn) const
105 {
106    const OpClass cl = getOpClass(insn->op);
107 
108    if (insn->dType == TYPE_F64 || insn->sType == TYPE_F64)
109       return true;
110 
111    switch (cl) {
112    case OPCLASS_ATOMIC:
113    case OPCLASS_LOAD:
114    case OPCLASS_STORE:
115    case OPCLASS_SURFACE:
116    case OPCLASS_TEXTURE:
117       return true;
118    case OPCLASS_SFU:
119       switch (insn->op) {
120       case OP_COS:
121       case OP_EX2:
122       case OP_LG2:
123       case OP_LINTERP:
124       case OP_PINTERP:
125       case OP_RCP:
126       case OP_RSQ:
127       case OP_SIN:
128          return true;
129       default:
130          break;
131       }
132       break;
133    case OPCLASS_BITFIELD:
134       switch (insn->op) {
135       case OP_BFIND:
136       case OP_POPCNT:
137          return true;
138       default:
139          break;
140       }
141       break;
142    case OPCLASS_CONTROL:
143       switch (insn->op) {
144       case OP_EMIT:
145       case OP_RESTART:
146          return true;
147       default:
148          break;
149       }
150       break;
151    case OPCLASS_OTHER:
152       switch (insn->op) {
153       case OP_AFETCH:
154       case OP_PFETCH:
155       case OP_PIXLD:
156       case OP_RDSV:
157       case OP_SHFL:
158          return true;
159       default:
160          break;
161       }
162       break;
163    case OPCLASS_ARITH:
164       // TODO: IMUL/IMAD require barriers too, use of XMAD instead!
165       if ((insn->op == OP_MUL || insn->op == OP_MAD) &&
166           !isFloatType(insn->dType))
167          return true;
168       break;
169    case OPCLASS_CONVERT:
170       if (insn->def(0).getFile() != FILE_PREDICATE &&
171           insn->src(0).getFile() != FILE_PREDICATE)
172          return true;
173       break;
174    default:
175       break;
176    }
177    return false;
178 }
179 
180 bool
canDualIssue(const Instruction * a,const Instruction * b) const181 TargetGM107::canDualIssue(const Instruction *a, const Instruction *b) const
182 {
183    // TODO
184    return false;
185 }
186 
187 // Return the number of stall counts needed to complete a single instruction.
188 // On Maxwell GPUs, the pipeline depth is 6, but some instructions require
189 // different number of stall counts like memory operations.
190 int
getLatency(const Instruction * insn) const191 TargetGM107::getLatency(const Instruction *insn) const
192 {
193    // TODO: better values! This should be good enough for now though.
194    switch (insn->op) {
195    case OP_EMIT:
196    case OP_EXPORT:
197    case OP_PIXLD:
198    case OP_RESTART:
199    case OP_STORE:
200    case OP_SUSTB:
201    case OP_SUSTP:
202       return 1;
203    case OP_SHFL:
204       return 2;
205    case OP_ADD:
206    case OP_AND:
207    case OP_EXTBF:
208    case OP_FMA:
209    case OP_INSBF:
210    case OP_MAD:
211    case OP_MAX:
212    case OP_MIN:
213    case OP_MOV:
214    case OP_MUL:
215    case OP_NOT:
216    case OP_OR:
217    case OP_PREEX2:
218    case OP_PRESIN:
219    case OP_QUADOP:
220    case OP_SELP:
221    case OP_SET:
222    case OP_SET_AND:
223    case OP_SET_OR:
224    case OP_SET_XOR:
225    case OP_SHL:
226    case OP_SHLADD:
227    case OP_SHR:
228    case OP_SLCT:
229    case OP_SUB:
230    case OP_VOTE:
231    case OP_XOR:
232       if (insn->dType != TYPE_F64)
233          return 6;
234       break;
235    case OP_ABS:
236    case OP_CEIL:
237    case OP_CVT:
238    case OP_FLOOR:
239    case OP_NEG:
240    case OP_SAT:
241    case OP_TRUNC:
242       if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE ||
243                                  insn->src(0).getFile() == FILE_PREDICATE))
244          return 6;
245       break;
246    case OP_BFIND:
247    case OP_COS:
248    case OP_EX2:
249    case OP_LG2:
250    case OP_POPCNT:
251    case OP_QUADON:
252    case OP_QUADPOP:
253    case OP_RCP:
254    case OP_RSQ:
255    case OP_SIN:
256       return 13;
257    default:
258       break;
259    }
260    // Use the maximum number of stall counts for other instructions.
261    return 15;
262 }
263 
264 // Return the operand read latency which is the number of stall counts before
265 // an instruction can read its sources. For memory operations like ATOM, LOAD
266 // and STORE, the memory access has to be indirect.
267 int
getReadLatency(const Instruction * insn) const268 TargetGM107::getReadLatency(const Instruction *insn) const
269 {
270    switch (insn->op) {
271    case OP_ABS:
272    case OP_BFIND:
273    case OP_CEIL:
274    case OP_COS:
275    case OP_EX2:
276    case OP_FLOOR:
277    case OP_LG2:
278    case OP_NEG:
279    case OP_POPCNT:
280    case OP_RCP:
281    case OP_RSQ:
282    case OP_SAT:
283    case OP_SIN:
284    case OP_SULDB:
285    case OP_SULDP:
286    case OP_SUREDB:
287    case OP_SUREDP:
288    case OP_SUSTB:
289    case OP_SUSTP:
290    case OP_TRUNC:
291       return 4;
292    case OP_CVT:
293       if (insn->def(0).getFile() != FILE_PREDICATE &&
294           insn->src(0).getFile() != FILE_PREDICATE)
295          return 4;
296       break;
297    case OP_ATOM:
298    case OP_LOAD:
299    case OP_STORE:
300       if (insn->src(0).isIndirect(0)) {
301          switch (insn->src(0).getFile()) {
302          case FILE_MEMORY_SHARED:
303          case FILE_MEMORY_CONST:
304             return 2;
305          case FILE_MEMORY_GLOBAL:
306          case FILE_MEMORY_LOCAL:
307             return 4;
308          default:
309             break;
310          }
311       }
312       break;
313    case OP_EXPORT:
314    case OP_PFETCH:
315    case OP_SHFL:
316    case OP_VFETCH:
317       return 2;
318    default:
319       break;
320    }
321    return 0;
322 }
323 
324 bool
runLegalizePass(Program * prog,CGStage stage) const325 TargetGM107::runLegalizePass(Program *prog, CGStage stage) const
326 {
327    if (stage == CG_STAGE_PRE_SSA) {
328       GM107LoweringPass pass(prog);
329       return pass.run(prog, false, true);
330    } else
331    if (stage == CG_STAGE_POST_RA) {
332       NVC0LegalizePostRA pass(prog);
333       return pass.run(prog, false, true);
334    } else
335    if (stage == CG_STAGE_SSA) {
336       GM107LegalizeSSA pass;
337       return pass.run(prog, false, true);
338    }
339    return false;
340 }
341 
342 CodeEmitter *
getCodeEmitter(Program::Type type)343 TargetGM107::getCodeEmitter(Program::Type type)
344 {
345    return createCodeEmitterGM107(type);
346 }
347 
348 } // namespace nv50_ir
349