1 /*
2  * Copyright © 2016 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "compiler/v3d_compiler.h"
25 #include "qpu/qpu_instr.h"
26 #include "qpu/qpu_disasm.h"
27 
28 static inline struct qpu_reg
qpu_reg(int index)29 qpu_reg(int index)
30 {
31         struct qpu_reg reg = {
32                 .magic = false,
33                 .index = index,
34         };
35         return reg;
36 }
37 
38 static inline struct qpu_reg
qpu_magic(enum v3d_qpu_waddr waddr)39 qpu_magic(enum v3d_qpu_waddr waddr)
40 {
41         struct qpu_reg reg = {
42                 .magic = true,
43                 .index = waddr,
44         };
45         return reg;
46 }
47 
48 static inline struct qpu_reg
qpu_acc(int acc)49 qpu_acc(int acc)
50 {
51         return qpu_magic(V3D_QPU_WADDR_R0 + acc);
52 }
53 
54 struct v3d_qpu_instr
v3d_qpu_nop(void)55 v3d_qpu_nop(void)
56 {
57         struct v3d_qpu_instr instr = {
58                 .type = V3D_QPU_INSTR_TYPE_ALU,
59                 .alu = {
60                         .add = {
61                                 .op = V3D_QPU_A_NOP,
62                                 .waddr = V3D_QPU_WADDR_NOP,
63                                 .magic_write = true,
64                         },
65                         .mul = {
66                                 .op = V3D_QPU_M_NOP,
67                                 .waddr = V3D_QPU_WADDR_NOP,
68                                 .magic_write = true,
69                         },
70                 }
71         };
72 
73         return instr;
74 }
75 
76 static struct qinst *
vir_nop(void)77 vir_nop(void)
78 {
79         struct qreg undef = { QFILE_NULL, 0 };
80         struct qinst *qinst = vir_add_inst(V3D_QPU_A_NOP, undef, undef, undef);
81 
82         return qinst;
83 }
84 
85 static struct qinst *
new_qpu_nop_before(struct qinst * inst)86 new_qpu_nop_before(struct qinst *inst)
87 {
88         struct qinst *q = vir_nop();
89 
90         list_addtail(&q->link, &inst->link);
91 
92         return q;
93 }
94 
95 static void
new_ldunif_instr(struct qinst * inst,int i)96 new_ldunif_instr(struct qinst *inst, int i)
97 {
98         struct qinst *ldunif = new_qpu_nop_before(inst);
99 
100         ldunif->qpu.sig.ldunif = true;
101         assert(inst->src[i].file == QFILE_UNIF);
102         ldunif->uniform = inst->src[i].index;
103 }
104 
105 /**
106  * Allocates the src register (accumulator or register file) into the RADDR
107  * fields of the instruction.
108  */
109 static void
set_src(struct v3d_qpu_instr * instr,enum v3d_qpu_mux * mux,struct qpu_reg src)110 set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src)
111 {
112         if (src.magic) {
113                 assert(src.index >= V3D_QPU_WADDR_R0 &&
114                        src.index <= V3D_QPU_WADDR_R5);
115                 *mux = src.index - V3D_QPU_WADDR_R0 + V3D_QPU_MUX_R0;
116                 return;
117         }
118 
119         if (instr->alu.add.a != V3D_QPU_MUX_A &&
120             instr->alu.add.b != V3D_QPU_MUX_A &&
121             instr->alu.mul.a != V3D_QPU_MUX_A &&
122             instr->alu.mul.b != V3D_QPU_MUX_A) {
123                 instr->raddr_a = src.index;
124                 *mux = V3D_QPU_MUX_A;
125         } else {
126                 if (instr->raddr_a == src.index) {
127                         *mux = V3D_QPU_MUX_A;
128                 } else {
129                         assert(!(instr->alu.add.a == V3D_QPU_MUX_B &&
130                                  instr->alu.add.b == V3D_QPU_MUX_B &&
131                                  instr->alu.mul.a == V3D_QPU_MUX_B &&
132                                  instr->alu.mul.b == V3D_QPU_MUX_B) ||
133                                src.index == instr->raddr_b);
134 
135                         instr->raddr_b = src.index;
136                         *mux = V3D_QPU_MUX_B;
137                 }
138         }
139 }
140 
141 static void
v3d_generate_code_block(struct v3d_compile * c,struct qblock * block,struct qpu_reg * temp_registers)142 v3d_generate_code_block(struct v3d_compile *c,
143                         struct qblock *block,
144                         struct qpu_reg *temp_registers)
145 {
146         int last_vpm_read_index = -1;
147 
148         vir_for_each_inst(qinst, block) {
149 #if 0
150                 fprintf(stderr, "translating qinst to qpu: ");
151                 vir_dump_inst(c, qinst);
152                 fprintf(stderr, "\n");
153 #endif
154 
155                 struct qinst *temp;
156 
157                 if (vir_has_implicit_uniform(qinst)) {
158                         int src = vir_get_implicit_uniform_src(qinst);
159                         assert(qinst->src[src].file == QFILE_UNIF);
160                         qinst->uniform = qinst->src[src].index;
161                         c->num_uniforms++;
162                 }
163 
164                 int nsrc = vir_get_non_sideband_nsrc(qinst);
165                 struct qpu_reg src[ARRAY_SIZE(qinst->src)];
166                 bool emitted_ldunif = false;
167                 for (int i = 0; i < nsrc; i++) {
168                         int index = qinst->src[i].index;
169                         switch (qinst->src[i].file) {
170                         case QFILE_REG:
171                                 src[i] = qpu_reg(qinst->src[i].index);
172                                 break;
173                         case QFILE_MAGIC:
174                                 src[i] = qpu_magic(qinst->src[i].index);
175                                 break;
176                         case QFILE_NULL:
177                         case QFILE_LOAD_IMM:
178                                 src[i] = qpu_acc(0);
179                                 break;
180                         case QFILE_TEMP:
181                                 src[i] = temp_registers[index];
182                                 break;
183                         case QFILE_UNIF:
184                                 if (!emitted_ldunif) {
185                                         new_ldunif_instr(qinst, i);
186                                         c->num_uniforms++;
187                                         emitted_ldunif = true;
188                                 }
189 
190                                 src[i] = qpu_acc(5);
191                                 break;
192                         case QFILE_SMALL_IMM:
193                                 abort(); /* XXX */
194 #if 0
195                                 src[i].mux = QPU_MUX_SMALL_IMM;
196                                 src[i].addr = qpu_encode_small_immediate(qinst->src[i].index);
197                                 /* This should only have returned a valid
198                                  * small immediate field, not ~0 for failure.
199                                  */
200                                 assert(src[i].addr <= 47);
201 #endif
202                                 break;
203 
204                         case QFILE_VPM:
205                                 assert((int)qinst->src[i].index >=
206                                        last_vpm_read_index);
207                                 (void)last_vpm_read_index;
208                                 last_vpm_read_index = qinst->src[i].index;
209 
210                                 temp = new_qpu_nop_before(qinst);
211                                 temp->qpu.sig.ldvpm = true;
212 
213                                 src[i] = qpu_acc(3);
214                                 break;
215 
216                         case QFILE_TLB:
217                         case QFILE_TLBU:
218                                 unreachable("bad vir src file");
219                         }
220                 }
221 
222                 struct qpu_reg dst;
223                 switch (qinst->dst.file) {
224                 case QFILE_NULL:
225                         dst = qpu_magic(V3D_QPU_WADDR_NOP);
226                         break;
227 
228                 case QFILE_REG:
229                         dst = qpu_reg(qinst->dst.index);
230                         break;
231 
232                 case QFILE_MAGIC:
233                         dst = qpu_magic(qinst->dst.index);
234                         break;
235 
236                 case QFILE_TEMP:
237                         dst = temp_registers[qinst->dst.index];
238                         break;
239 
240                 case QFILE_VPM:
241                         dst = qpu_magic(V3D_QPU_WADDR_VPM);
242                         break;
243 
244                 case QFILE_TLB:
245                         dst = qpu_magic(V3D_QPU_WADDR_TLB);
246                         break;
247 
248                 case QFILE_TLBU:
249                         dst = qpu_magic(V3D_QPU_WADDR_TLBU);
250                         break;
251 
252                 case QFILE_UNIF:
253                 case QFILE_SMALL_IMM:
254                 case QFILE_LOAD_IMM:
255                         assert(!"not reached");
256                         break;
257                 }
258 
259                 if (qinst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
260                         if (v3d_qpu_sig_writes_address(c->devinfo,
261                                                        &qinst->qpu.sig)) {
262                                 assert(qinst->qpu.alu.add.op == V3D_QPU_A_NOP);
263                                 assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
264 
265                                 qinst->qpu.sig_addr = dst.index;
266                                 qinst->qpu.sig_magic = dst.magic;
267                         } else if (qinst->qpu.alu.add.op != V3D_QPU_A_NOP) {
268                                 assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
269                                 if (nsrc >= 1) {
270                                         set_src(&qinst->qpu,
271                                                 &qinst->qpu.alu.add.a, src[0]);
272                                 }
273                                 if (nsrc >= 2) {
274                                         set_src(&qinst->qpu,
275                                                 &qinst->qpu.alu.add.b, src[1]);
276                                 }
277 
278                                 qinst->qpu.alu.add.waddr = dst.index;
279                                 qinst->qpu.alu.add.magic_write = dst.magic;
280                         } else {
281                                 if (nsrc >= 1) {
282                                         set_src(&qinst->qpu,
283                                                 &qinst->qpu.alu.mul.a, src[0]);
284                                 }
285                                 if (nsrc >= 2) {
286                                         set_src(&qinst->qpu,
287                                                 &qinst->qpu.alu.mul.b, src[1]);
288                                 }
289 
290                                 qinst->qpu.alu.mul.waddr = dst.index;
291                                 qinst->qpu.alu.mul.magic_write = dst.magic;
292                         }
293                 } else {
294                         assert(qinst->qpu.type == V3D_QPU_INSTR_TYPE_BRANCH);
295                 }
296         }
297 }
298 
299 
300 static void
v3d_dump_qpu(struct v3d_compile * c)301 v3d_dump_qpu(struct v3d_compile *c)
302 {
303         fprintf(stderr, "%s prog %d/%d QPU:\n",
304                 vir_get_stage_name(c),
305                 c->program_id, c->variant_id);
306 
307         for (int i = 0; i < c->qpu_inst_count; i++) {
308                 const char *str = v3d_qpu_disasm(c->devinfo, c->qpu_insts[i]);
309                 fprintf(stderr, "0x%016"PRIx64" %s\n", c->qpu_insts[i], str);
310         }
311         fprintf(stderr, "\n");
312 }
313 
314 void
v3d_vir_to_qpu(struct v3d_compile * c,struct qpu_reg * temp_registers)315 v3d_vir_to_qpu(struct v3d_compile *c, struct qpu_reg *temp_registers)
316 {
317         /* Reset the uniform count to how many will be actually loaded by the
318          * generated QPU code.
319          */
320         c->num_uniforms = 0;
321 
322         vir_for_each_block(block, c)
323                 v3d_generate_code_block(c, block, temp_registers);
324 
325         uint32_t cycles = v3d_qpu_schedule_instructions(c);
326 
327         c->qpu_insts = rzalloc_array(c, uint64_t, c->qpu_inst_count);
328         int i = 0;
329         vir_for_each_inst_inorder(inst, c) {
330                 bool ok = v3d_qpu_instr_pack(c->devinfo, &inst->qpu,
331                                              &c->qpu_insts[i++]);
332                 assert(ok); (void) ok;
333         }
334         assert(i == c->qpu_inst_count);
335 
336         if (V3D_DEBUG & V3D_DEBUG_SHADERDB) {
337                 fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d estimated cycles\n",
338                         vir_get_stage_name(c),
339                         c->program_id, c->variant_id,
340                         cycles);
341         }
342 
343         if (V3D_DEBUG & (V3D_DEBUG_QPU |
344                          v3d_debug_flag_for_shader_stage(c->s->info.stage))) {
345                 v3d_dump_qpu(c);
346         }
347 
348         qpu_validate(c);
349 
350         free(temp_registers);
351 }
352