1 /*
2  * Copyright © 2014 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <stdbool.h>
25 #include "util/ralloc.h"
26 #include "vc4_qir.h"
27 #include "vc4_qpu.h"
28 
29 #define QPU_MUX(mux, muxfield)                                  \
30         QPU_SET_FIELD(mux != QPU_MUX_SMALL_IMM ? mux : QPU_MUX_B, muxfield)
31 
32 static uint64_t
set_src_raddr(uint64_t inst,struct qpu_reg src)33 set_src_raddr(uint64_t inst, struct qpu_reg src)
34 {
35         if (src.mux == QPU_MUX_A) {
36                 assert(QPU_GET_FIELD(inst, QPU_RADDR_A) == QPU_R_NOP ||
37                        QPU_GET_FIELD(inst, QPU_RADDR_A) == src.addr);
38                 return QPU_UPDATE_FIELD(inst, src.addr, QPU_RADDR_A);
39         }
40 
41         if (src.mux == QPU_MUX_B) {
42                 assert((QPU_GET_FIELD(inst, QPU_RADDR_B) == QPU_R_NOP ||
43                         QPU_GET_FIELD(inst, QPU_RADDR_B) == src.addr) &&
44                        QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_SMALL_IMM);
45                 return QPU_UPDATE_FIELD(inst, src.addr, QPU_RADDR_B);
46         }
47 
48         if (src.mux == QPU_MUX_SMALL_IMM) {
49                 if (QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_SMALL_IMM) {
50                         assert(QPU_GET_FIELD(inst, QPU_RADDR_B) == src.addr);
51                 } else {
52                         inst = qpu_set_sig(inst, QPU_SIG_SMALL_IMM);
53                         assert(QPU_GET_FIELD(inst, QPU_RADDR_B) == QPU_R_NOP);
54                 }
55                 return ((inst & ~QPU_RADDR_B_MASK) |
56                         QPU_SET_FIELD(src.addr, QPU_RADDR_B));
57         }
58 
59         return inst;
60 }
61 
62 uint64_t
qpu_NOP()63 qpu_NOP()
64 {
65         uint64_t inst = 0;
66 
67         inst |= QPU_SET_FIELD(QPU_A_NOP, QPU_OP_ADD);
68         inst |= QPU_SET_FIELD(QPU_M_NOP, QPU_OP_MUL);
69 
70         /* Note: These field values are actually non-zero */
71         inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD);
72         inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL);
73         inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
74         inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
75         inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
76 
77         return inst;
78 }
79 
80 static uint64_t
qpu_a_dst(struct qpu_reg dst)81 qpu_a_dst(struct qpu_reg dst)
82 {
83         uint64_t inst = 0;
84 
85         if (dst.mux <= QPU_MUX_R5) {
86                 /* Translate the mux to the ACCn values. */
87                 inst |= QPU_SET_FIELD(32 + dst.mux, QPU_WADDR_ADD);
88         } else {
89                 inst |= QPU_SET_FIELD(dst.addr, QPU_WADDR_ADD);
90                 if (dst.mux == QPU_MUX_B)
91                         inst |= QPU_WS;
92         }
93 
94         return inst;
95 }
96 
97 static uint64_t
qpu_m_dst(struct qpu_reg dst)98 qpu_m_dst(struct qpu_reg dst)
99 {
100         uint64_t inst = 0;
101 
102         if (dst.mux <= QPU_MUX_R5) {
103                 /* Translate the mux to the ACCn values. */
104                 inst |= QPU_SET_FIELD(32 + dst.mux, QPU_WADDR_MUL);
105         } else {
106                 inst |= QPU_SET_FIELD(dst.addr, QPU_WADDR_MUL);
107                 if (dst.mux == QPU_MUX_A)
108                         inst |= QPU_WS;
109         }
110 
111         return inst;
112 }
113 
114 uint64_t
qpu_a_MOV(struct qpu_reg dst,struct qpu_reg src)115 qpu_a_MOV(struct qpu_reg dst, struct qpu_reg src)
116 {
117         uint64_t inst = 0;
118 
119         inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
120         inst |= QPU_SET_FIELD(QPU_A_OR, QPU_OP_ADD);
121         inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
122         inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
123         inst |= qpu_a_dst(dst);
124         inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_ADD);
125         inst |= QPU_MUX(src.mux, QPU_ADD_A);
126         inst |= QPU_MUX(src.mux, QPU_ADD_B);
127         inst = set_src_raddr(inst, src);
128         inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL);
129 
130         return inst;
131 }
132 
133 uint64_t
qpu_m_MOV(struct qpu_reg dst,struct qpu_reg src)134 qpu_m_MOV(struct qpu_reg dst, struct qpu_reg src)
135 {
136         uint64_t inst = 0;
137 
138         inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
139         inst |= QPU_SET_FIELD(QPU_M_V8MIN, QPU_OP_MUL);
140         inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
141         inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
142         inst |= qpu_m_dst(dst);
143         inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_MUL);
144         inst |= QPU_MUX(src.mux, QPU_MUL_A);
145         inst |= QPU_MUX(src.mux, QPU_MUL_B);
146         inst = set_src_raddr(inst, src);
147         inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD);
148 
149         return inst;
150 }
151 
152 uint64_t
qpu_load_imm_ui(struct qpu_reg dst,uint32_t val)153 qpu_load_imm_ui(struct qpu_reg dst, uint32_t val)
154 {
155         uint64_t inst = 0;
156 
157         inst |= qpu_a_dst(dst);
158         inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL);
159         inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_ADD);
160         inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_MUL);
161         inst |= QPU_SET_FIELD(QPU_SIG_LOAD_IMM, QPU_SIG);
162         inst |= val;
163 
164         return inst;
165 }
166 
167 uint64_t
qpu_load_imm_u2(struct qpu_reg dst,uint32_t val)168 qpu_load_imm_u2(struct qpu_reg dst, uint32_t val)
169 {
170         return qpu_load_imm_ui(dst, val) | QPU_SET_FIELD(QPU_LOAD_IMM_MODE_U2,
171                                                          QPU_LOAD_IMM_MODE);
172 }
173 
174 uint64_t
qpu_load_imm_i2(struct qpu_reg dst,uint32_t val)175 qpu_load_imm_i2(struct qpu_reg dst, uint32_t val)
176 {
177         return qpu_load_imm_ui(dst, val) | QPU_SET_FIELD(QPU_LOAD_IMM_MODE_I2,
178                                                          QPU_LOAD_IMM_MODE);
179 }
180 
181 uint64_t
qpu_branch(uint32_t cond,uint32_t target)182 qpu_branch(uint32_t cond, uint32_t target)
183 {
184         uint64_t inst = 0;
185 
186         inst |= qpu_a_dst(qpu_ra(QPU_W_NOP));
187         inst |= qpu_m_dst(qpu_rb(QPU_W_NOP));
188         inst |= QPU_SET_FIELD(cond, QPU_BRANCH_COND);
189         inst |= QPU_SET_FIELD(QPU_SIG_BRANCH, QPU_SIG);
190         inst |= QPU_SET_FIELD(target, QPU_BRANCH_TARGET);
191 
192         return inst;
193 }
194 
195 uint64_t
qpu_a_alu2(enum qpu_op_add op,struct qpu_reg dst,struct qpu_reg src0,struct qpu_reg src1)196 qpu_a_alu2(enum qpu_op_add op,
197            struct qpu_reg dst, struct qpu_reg src0, struct qpu_reg src1)
198 {
199         uint64_t inst = 0;
200 
201         inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
202         inst |= QPU_SET_FIELD(op, QPU_OP_ADD);
203         inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
204         inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
205         inst |= qpu_a_dst(dst);
206         inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_ADD);
207         inst |= QPU_MUX(src0.mux, QPU_ADD_A);
208         inst = set_src_raddr(inst, src0);
209         inst |= QPU_MUX(src1.mux, QPU_ADD_B);
210         inst = set_src_raddr(inst, src1);
211         inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL);
212 
213         return inst;
214 }
215 
216 uint64_t
qpu_m_alu2(enum qpu_op_mul op,struct qpu_reg dst,struct qpu_reg src0,struct qpu_reg src1)217 qpu_m_alu2(enum qpu_op_mul op,
218            struct qpu_reg dst, struct qpu_reg src0, struct qpu_reg src1)
219 {
220         uint64_t inst = 0;
221 
222         inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
223         inst |= QPU_SET_FIELD(op, QPU_OP_MUL);
224         inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
225         inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
226         inst |= qpu_m_dst(dst);
227         inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_MUL);
228         inst |= QPU_MUX(src0.mux, QPU_MUL_A);
229         inst = set_src_raddr(inst, src0);
230         inst |= QPU_MUX(src1.mux, QPU_MUL_B);
231         inst = set_src_raddr(inst, src1);
232         inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD);
233 
234         return inst;
235 }
236 
237 uint64_t
qpu_m_rot(struct qpu_reg dst,struct qpu_reg src0,int rot)238 qpu_m_rot(struct qpu_reg dst, struct qpu_reg src0, int rot)
239 {
240 	uint64_t inst = 0;
241 	inst = qpu_m_alu2(QPU_M_V8MIN, dst, src0, src0);
242 
243 	inst = QPU_UPDATE_FIELD(inst, QPU_SIG_SMALL_IMM, QPU_SIG);
244 	inst = QPU_UPDATE_FIELD(inst, QPU_SMALL_IMM_MUL_ROT + rot,
245                                 QPU_SMALL_IMM);
246 
247 	return inst;
248 }
249 
250 static bool
merge_fields(uint64_t * merge,uint64_t a,uint64_t b,uint64_t mask,uint64_t ignore)251 merge_fields(uint64_t *merge,
252              uint64_t a, uint64_t b,
253              uint64_t mask, uint64_t ignore)
254 {
255         if ((a & mask) == ignore) {
256                 *merge = (*merge & ~mask) | (b & mask);
257         } else if ((b & mask) == ignore) {
258                 *merge = (*merge & ~mask) | (a & mask);
259         } else {
260                 if ((a & mask) != (b & mask))
261                         return false;
262         }
263 
264         return true;
265 }
266 
267 int
qpu_num_sf_accesses(uint64_t inst)268 qpu_num_sf_accesses(uint64_t inst)
269 {
270         int accesses = 0;
271         static const uint32_t specials[] = {
272                 QPU_W_TLB_COLOR_MS,
273                 QPU_W_TLB_COLOR_ALL,
274                 QPU_W_TLB_Z,
275                 QPU_W_TMU0_S,
276                 QPU_W_TMU0_T,
277                 QPU_W_TMU0_R,
278                 QPU_W_TMU0_B,
279                 QPU_W_TMU1_S,
280                 QPU_W_TMU1_T,
281                 QPU_W_TMU1_R,
282                 QPU_W_TMU1_B,
283                 QPU_W_SFU_RECIP,
284                 QPU_W_SFU_RECIPSQRT,
285                 QPU_W_SFU_EXP,
286                 QPU_W_SFU_LOG,
287         };
288         uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
289         uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
290         uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
291         uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
292 
293         for (int j = 0; j < ARRAY_SIZE(specials); j++) {
294                 if (waddr_add == specials[j])
295                         accesses++;
296                 if (waddr_mul == specials[j])
297                         accesses++;
298         }
299 
300         if (raddr_a == QPU_R_MUTEX_ACQUIRE)
301                 accesses++;
302         if (raddr_b == QPU_R_MUTEX_ACQUIRE &&
303             QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_SMALL_IMM)
304                 accesses++;
305 
306         /* XXX: semaphore, combined color read/write? */
307         switch (QPU_GET_FIELD(inst, QPU_SIG)) {
308         case QPU_SIG_COLOR_LOAD:
309         case QPU_SIG_COLOR_LOAD_END:
310         case QPU_SIG_LOAD_TMU0:
311         case QPU_SIG_LOAD_TMU1:
312                 accesses++;
313         }
314 
315         return accesses;
316 }
317 
318 static bool
qpu_waddr_ignores_ws(uint32_t waddr)319 qpu_waddr_ignores_ws(uint32_t waddr)
320 {
321         switch(waddr) {
322         case QPU_W_ACC0:
323         case QPU_W_ACC1:
324         case QPU_W_ACC2:
325         case QPU_W_ACC3:
326         case QPU_W_NOP:
327         case QPU_W_TLB_Z:
328         case QPU_W_TLB_COLOR_MS:
329         case QPU_W_TLB_COLOR_ALL:
330         case QPU_W_TLB_ALPHA_MASK:
331         case QPU_W_VPM:
332         case QPU_W_SFU_RECIP:
333         case QPU_W_SFU_RECIPSQRT:
334         case QPU_W_SFU_EXP:
335         case QPU_W_SFU_LOG:
336         case QPU_W_TMU0_S:
337         case QPU_W_TMU0_T:
338         case QPU_W_TMU0_R:
339         case QPU_W_TMU0_B:
340         case QPU_W_TMU1_S:
341         case QPU_W_TMU1_T:
342         case QPU_W_TMU1_R:
343         case QPU_W_TMU1_B:
344                 return true;
345         }
346 
347         return false;
348 }
349 
350 static void
swap_ra_file_mux_helper(uint64_t * merge,uint64_t * a,uint32_t mux_shift)351 swap_ra_file_mux_helper(uint64_t *merge, uint64_t *a, uint32_t mux_shift)
352 {
353         uint64_t mux_mask = (uint64_t)0x7 << mux_shift;
354         uint64_t mux_a_val = (uint64_t)QPU_MUX_A << mux_shift;
355         uint64_t mux_b_val = (uint64_t)QPU_MUX_B << mux_shift;
356 
357         if ((*a & mux_mask) == mux_a_val) {
358                 *a = (*a & ~mux_mask) | mux_b_val;
359                 *merge = (*merge & ~mux_mask) | mux_b_val;
360         }
361 }
362 
363 static bool
try_swap_ra_file(uint64_t * merge,uint64_t * a,uint64_t * b)364 try_swap_ra_file(uint64_t *merge, uint64_t *a, uint64_t *b)
365 {
366         uint32_t raddr_a_a = QPU_GET_FIELD(*a, QPU_RADDR_A);
367         uint32_t raddr_a_b = QPU_GET_FIELD(*a, QPU_RADDR_B);
368         uint32_t raddr_b_a = QPU_GET_FIELD(*b, QPU_RADDR_A);
369         uint32_t raddr_b_b = QPU_GET_FIELD(*b, QPU_RADDR_B);
370 
371         if (raddr_a_b != QPU_R_NOP)
372                 return false;
373 
374         switch (raddr_a_a) {
375         case QPU_R_UNIF:
376         case QPU_R_VARY:
377                 break;
378         default:
379                 return false;
380         }
381 
382         if (!(*merge & QPU_PM) &&
383             QPU_GET_FIELD(*merge, QPU_UNPACK) != QPU_UNPACK_NOP) {
384                 return false;
385         }
386 
387         if (raddr_b_b != QPU_R_NOP &&
388             raddr_b_b != raddr_a_a)
389                 return false;
390 
391         /* Move raddr A to B in instruction a. */
392         *a = (*a & ~QPU_RADDR_A_MASK) | QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
393         *a = (*a & ~QPU_RADDR_B_MASK) | QPU_SET_FIELD(raddr_a_a, QPU_RADDR_B);
394         *merge = QPU_UPDATE_FIELD(*merge, raddr_b_a, QPU_RADDR_A);
395         *merge = QPU_UPDATE_FIELD(*merge, raddr_a_a, QPU_RADDR_B);
396         swap_ra_file_mux_helper(merge, a, QPU_ADD_A_SHIFT);
397         swap_ra_file_mux_helper(merge, a, QPU_ADD_B_SHIFT);
398         swap_ra_file_mux_helper(merge, a, QPU_MUL_A_SHIFT);
399         swap_ra_file_mux_helper(merge, a, QPU_MUL_B_SHIFT);
400 
401         return true;
402 }
403 
404 static bool
convert_mov(uint64_t * inst)405 convert_mov(uint64_t *inst)
406 {
407         uint32_t add_a = QPU_GET_FIELD(*inst, QPU_ADD_A);
408         uint32_t waddr_add = QPU_GET_FIELD(*inst, QPU_WADDR_ADD);
409         uint32_t cond_add = QPU_GET_FIELD(*inst, QPU_COND_ADD);
410 
411         /* Is it a MOV? */
412         if (QPU_GET_FIELD(*inst, QPU_OP_ADD) != QPU_A_OR ||
413             (add_a != QPU_GET_FIELD(*inst, QPU_ADD_B))) {
414                 return false;
415         }
416 
417         if (QPU_GET_FIELD(*inst, QPU_SIG) != QPU_SIG_NONE)
418                 return false;
419 
420         /* We could maybe support this in the .8888 and .8a-.8d cases. */
421         if (*inst & QPU_PM)
422                 return false;
423 
424         *inst = QPU_UPDATE_FIELD(*inst, QPU_A_NOP, QPU_OP_ADD);
425         *inst = QPU_UPDATE_FIELD(*inst, QPU_M_V8MIN, QPU_OP_MUL);
426 
427         *inst = QPU_UPDATE_FIELD(*inst, add_a, QPU_MUL_A);
428         *inst = QPU_UPDATE_FIELD(*inst, add_a, QPU_MUL_B);
429         *inst = QPU_UPDATE_FIELD(*inst, QPU_MUX_R0, QPU_ADD_A);
430         *inst = QPU_UPDATE_FIELD(*inst, QPU_MUX_R0, QPU_ADD_B);
431 
432         *inst = QPU_UPDATE_FIELD(*inst, waddr_add, QPU_WADDR_MUL);
433         *inst = QPU_UPDATE_FIELD(*inst, QPU_W_NOP, QPU_WADDR_ADD);
434 
435         *inst = QPU_UPDATE_FIELD(*inst, cond_add, QPU_COND_MUL);
436         *inst = QPU_UPDATE_FIELD(*inst, QPU_COND_NEVER, QPU_COND_ADD);
437 
438         if (!qpu_waddr_ignores_ws(waddr_add))
439                 *inst ^= QPU_WS;
440 
441         return true;
442 }
443 
444 static bool
writes_a_file(uint64_t inst)445 writes_a_file(uint64_t inst)
446 {
447         if (!(inst & QPU_WS))
448                 return QPU_GET_FIELD(inst, QPU_WADDR_ADD) < 32;
449         else
450                 return QPU_GET_FIELD(inst, QPU_WADDR_MUL) < 32;
451 }
452 
453 static bool
reads_r4(uint64_t inst)454 reads_r4(uint64_t inst)
455 {
456         return (QPU_GET_FIELD(inst, QPU_ADD_A) == QPU_MUX_R4 ||
457                 QPU_GET_FIELD(inst, QPU_ADD_B) == QPU_MUX_R4 ||
458                 QPU_GET_FIELD(inst, QPU_MUL_A) == QPU_MUX_R4 ||
459                 QPU_GET_FIELD(inst, QPU_MUL_B) == QPU_MUX_R4);
460 }
461 
462 uint64_t
qpu_merge_inst(uint64_t a,uint64_t b)463 qpu_merge_inst(uint64_t a, uint64_t b)
464 {
465         uint64_t merge = a | b;
466         bool ok = true;
467         uint32_t a_sig = QPU_GET_FIELD(a, QPU_SIG);
468         uint32_t b_sig = QPU_GET_FIELD(b, QPU_SIG);
469 
470         if (QPU_GET_FIELD(a, QPU_OP_ADD) != QPU_A_NOP &&
471             QPU_GET_FIELD(b, QPU_OP_ADD) != QPU_A_NOP) {
472                 if (QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP ||
473                     QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP ||
474                     !(convert_mov(&a) || convert_mov(&b))) {
475                         return 0;
476                 } else {
477                         merge = a | b;
478                 }
479         }
480 
481         if (QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP &&
482             QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP)
483                 return 0;
484 
485         if (qpu_num_sf_accesses(a) && qpu_num_sf_accesses(b))
486                 return 0;
487 
488         if (a_sig == QPU_SIG_LOAD_IMM ||
489             b_sig == QPU_SIG_LOAD_IMM ||
490             a_sig == QPU_SIG_SMALL_IMM ||
491             b_sig == QPU_SIG_SMALL_IMM ||
492             a_sig == QPU_SIG_BRANCH ||
493             b_sig == QPU_SIG_BRANCH) {
494                 return 0;
495         }
496 
497         ok = ok && merge_fields(&merge, a, b, QPU_SIG_MASK,
498                                 QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG));
499 
500         /* Misc fields that have to match exactly. */
501         ok = ok && merge_fields(&merge, a, b, QPU_SF, ~0);
502 
503         if (!merge_fields(&merge, a, b, QPU_RADDR_A_MASK,
504                           QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A))) {
505                 /* Since we tend to use regfile A by default both for register
506                  * allocation and for our special values (uniforms and
507                  * varyings), try swapping uniforms and varyings to regfile B
508                  * to resolve raddr A conflicts.
509                  */
510                 if (!try_swap_ra_file(&merge, &a, &b) &&
511                     !try_swap_ra_file(&merge, &b, &a)) {
512                         return 0;
513                 }
514         }
515 
516         ok = ok && merge_fields(&merge, a, b, QPU_RADDR_B_MASK,
517                                 QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B));
518 
519         ok = ok && merge_fields(&merge, a, b, QPU_WADDR_ADD_MASK,
520                                 QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD));
521         ok = ok && merge_fields(&merge, a, b, QPU_WADDR_MUL_MASK,
522                                 QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL));
523 
524         /* Allow disagreement on WS (swapping A vs B physical reg file as the
525          * destination for ADD/MUL) if one of the original instructions
526          * ignores it (probably because it's just writing to accumulators).
527          */
528         if (qpu_waddr_ignores_ws(QPU_GET_FIELD(a, QPU_WADDR_ADD)) &&
529             qpu_waddr_ignores_ws(QPU_GET_FIELD(a, QPU_WADDR_MUL))) {
530                 merge = (merge & ~QPU_WS) | (b & QPU_WS);
531         } else if (qpu_waddr_ignores_ws(QPU_GET_FIELD(b, QPU_WADDR_ADD)) &&
532                    qpu_waddr_ignores_ws(QPU_GET_FIELD(b, QPU_WADDR_MUL))) {
533                 merge = (merge & ~QPU_WS) | (a & QPU_WS);
534         } else {
535                 if ((a & QPU_WS) != (b & QPU_WS))
536                         return 0;
537         }
538 
539         if (!merge_fields(&merge, a, b, QPU_PM, ~0)) {
540                 /* If one instruction has PM bit set and the other not, the
541                  * one without PM shouldn't do packing/unpacking, and we
542                  * have to make sure non-NOP packing/unpacking from PM
543                  * instruction aren't added to it.
544                  */
545                 uint64_t temp;
546 
547                 /* Let a be the one with PM bit */
548                 if (!(a & QPU_PM)) {
549                         temp = a;
550                         a = b;
551                         b = temp;
552                 }
553 
554                 if ((b & (QPU_PACK_MASK | QPU_UNPACK_MASK)) != 0)
555                         return 0;
556 
557                 if ((a & QPU_PACK_MASK) != 0 &&
558                     QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP)
559                         return 0;
560 
561                 if ((a & QPU_UNPACK_MASK) != 0 && reads_r4(b))
562                         return 0;
563         } else {
564                 /* packing: Make sure that non-NOP packs agree, then deal with
565                  * special-case failing of adding a non-NOP pack to something
566                  * with a NOP pack.
567                  */
568                 if (!merge_fields(&merge, a, b, QPU_PACK_MASK, 0))
569                         return 0;
570                 bool new_a_pack = (QPU_GET_FIELD(a, QPU_PACK) !=
571                                 QPU_GET_FIELD(merge, QPU_PACK));
572                 bool new_b_pack = (QPU_GET_FIELD(b, QPU_PACK) !=
573                                 QPU_GET_FIELD(merge, QPU_PACK));
574                 if (!(merge & QPU_PM)) {
575                         /* Make sure we're not going to be putting a new
576                          * a-file packing on either half.
577                          */
578                         if (new_a_pack && writes_a_file(a))
579                                 return 0;
580 
581                         if (new_b_pack && writes_a_file(b))
582                                 return 0;
583                 } else {
584                         /* Make sure we're not going to be putting new MUL
585                          * packing oneither half.
586                          */
587                         if (new_a_pack &&
588                             QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP)
589                                 return 0;
590 
591                         if (new_b_pack &&
592                             QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP)
593                                 return 0;
594                 }
595 
596                 /* unpacking: Make sure that non-NOP unpacks agree, then deal
597                  * with special-case failing of adding a non-NOP unpack to
598                  * something with a NOP unpack.
599                  */
600                 if (!merge_fields(&merge, a, b, QPU_UNPACK_MASK, 0))
601                         return 0;
602                 bool new_a_unpack = (QPU_GET_FIELD(a, QPU_UNPACK) !=
603                                 QPU_GET_FIELD(merge, QPU_UNPACK));
604                 bool new_b_unpack = (QPU_GET_FIELD(b, QPU_UNPACK) !=
605                                 QPU_GET_FIELD(merge, QPU_UNPACK));
606                 if (!(merge & QPU_PM)) {
607                         /* Make sure we're not going to be putting a new
608                          * a-file packing on either half.
609                          */
610                         if (new_a_unpack &&
611                             QPU_GET_FIELD(a, QPU_RADDR_A) != QPU_R_NOP)
612                                 return 0;
613 
614                         if (new_b_unpack &&
615                             QPU_GET_FIELD(b, QPU_RADDR_A) != QPU_R_NOP)
616                                 return 0;
617                 } else {
618                         /* Make sure we're not going to be putting new r4
619                          * unpack on either half.
620                          */
621                         if (new_a_unpack && reads_r4(a))
622                                 return 0;
623 
624                         if (new_b_unpack && reads_r4(b))
625                                 return 0;
626                 }
627         }
628 
629         if (ok)
630                 return merge;
631         else
632                 return 0;
633 }
634 
635 uint64_t
qpu_set_sig(uint64_t inst,uint32_t sig)636 qpu_set_sig(uint64_t inst, uint32_t sig)
637 {
638         assert(QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_NONE);
639         return QPU_UPDATE_FIELD(inst, sig, QPU_SIG);
640 }
641 
642 uint64_t
qpu_set_cond_add(uint64_t inst,uint32_t cond)643 qpu_set_cond_add(uint64_t inst, uint32_t cond)
644 {
645         assert(QPU_GET_FIELD(inst, QPU_COND_ADD) == QPU_COND_ALWAYS);
646         return QPU_UPDATE_FIELD(inst, cond, QPU_COND_ADD);
647 }
648 
649 uint64_t
qpu_set_cond_mul(uint64_t inst,uint32_t cond)650 qpu_set_cond_mul(uint64_t inst, uint32_t cond)
651 {
652         assert(QPU_GET_FIELD(inst, QPU_COND_MUL) == QPU_COND_ALWAYS);
653         return QPU_UPDATE_FIELD(inst, cond, QPU_COND_MUL);
654 }
655 
656 bool
qpu_waddr_is_tlb(uint32_t waddr)657 qpu_waddr_is_tlb(uint32_t waddr)
658 {
659         switch (waddr) {
660         case QPU_W_TLB_COLOR_ALL:
661         case QPU_W_TLB_COLOR_MS:
662         case QPU_W_TLB_Z:
663                 return true;
664         default:
665                 return false;
666         }
667 }
668 
669 bool
qpu_inst_is_tlb(uint64_t inst)670 qpu_inst_is_tlb(uint64_t inst)
671 {
672         uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
673 
674         return (qpu_waddr_is_tlb(QPU_GET_FIELD(inst, QPU_WADDR_ADD)) ||
675                 qpu_waddr_is_tlb(QPU_GET_FIELD(inst, QPU_WADDR_MUL)) ||
676                 sig == QPU_SIG_COLOR_LOAD ||
677                 sig == QPU_SIG_WAIT_FOR_SCOREBOARD);
678 }
679 
680 /**
681  * Returns the small immediate value to be encoded in to the raddr b field if
682  * the argument can be represented as one, or ~0 otherwise.
683  */
684 uint32_t
qpu_encode_small_immediate(uint32_t i)685 qpu_encode_small_immediate(uint32_t i)
686 {
687         if (i <= 15)
688                 return i;
689         if ((int)i < 0 && (int)i >= -16)
690                 return i + 32;
691 
692         switch (i) {
693         case 0x3f800000:
694                 return 32;
695         case 0x40000000:
696                 return 33;
697         case 0x40800000:
698                 return 34;
699         case 0x41000000:
700                 return 35;
701         case 0x41800000:
702                 return 36;
703         case 0x42000000:
704                 return 37;
705         case 0x42800000:
706                 return 38;
707         case 0x43000000:
708                 return 39;
709         case 0x3b800000:
710                 return 40;
711         case 0x3c000000:
712                 return 41;
713         case 0x3c800000:
714                 return 42;
715         case 0x3d000000:
716                 return 43;
717         case 0x3d800000:
718                 return 44;
719         case 0x3e000000:
720                 return 45;
721         case 0x3e800000:
722                 return 46;
723         case 0x3f000000:
724                 return 47;
725         }
726 
727         return ~0;
728 }
729 
730 void
qpu_serialize_one_inst(struct vc4_compile * c,uint64_t inst)731 qpu_serialize_one_inst(struct vc4_compile *c, uint64_t inst)
732 {
733         if (c->qpu_inst_count >= c->qpu_inst_size) {
734                 c->qpu_inst_size = MAX2(16, c->qpu_inst_size * 2);
735                 c->qpu_insts = reralloc(c, c->qpu_insts,
736                                         uint64_t, c->qpu_inst_size);
737         }
738         c->qpu_insts[c->qpu_inst_count++] = inst;
739 }
740