1 /*
2  * Copyright © 2016 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <stdlib.h>
25 #include "util/macros.h"
26 #include "broadcom/common/v3d_device_info.h"
27 #include "qpu_instr.h"
28 
29 const char *
v3d_qpu_magic_waddr_name(enum v3d_qpu_waddr waddr)30 v3d_qpu_magic_waddr_name(enum v3d_qpu_waddr waddr)
31 {
32         static const char *waddr_magic[] = {
33                 [V3D_QPU_WADDR_R0] = "r0",
34                 [V3D_QPU_WADDR_R1] = "r1",
35                 [V3D_QPU_WADDR_R2] = "r2",
36                 [V3D_QPU_WADDR_R3] = "r3",
37                 [V3D_QPU_WADDR_R4] = "r4",
38                 [V3D_QPU_WADDR_R5] = "r5",
39                 [V3D_QPU_WADDR_NOP] = "-",
40                 [V3D_QPU_WADDR_TLB] = "tlb",
41                 [V3D_QPU_WADDR_TLBU] = "tlbu",
42                 [V3D_QPU_WADDR_TMU] = "tmu",
43                 [V3D_QPU_WADDR_TMUL] = "tmul",
44                 [V3D_QPU_WADDR_TMUD] = "tmud",
45                 [V3D_QPU_WADDR_TMUA] = "tmua",
46                 [V3D_QPU_WADDR_TMUAU] = "tmuau",
47                 [V3D_QPU_WADDR_VPM] = "vpm",
48                 [V3D_QPU_WADDR_VPMU] = "vpmu",
49                 [V3D_QPU_WADDR_SYNC] = "sync",
50                 [V3D_QPU_WADDR_SYNCU] = "syncu",
51                 [V3D_QPU_WADDR_SYNCB] = "syncb",
52                 [V3D_QPU_WADDR_RECIP] = "recip",
53                 [V3D_QPU_WADDR_RSQRT] = "rsqrt",
54                 [V3D_QPU_WADDR_EXP] = "exp",
55                 [V3D_QPU_WADDR_LOG] = "log",
56                 [V3D_QPU_WADDR_SIN] = "sin",
57                 [V3D_QPU_WADDR_RSQRT2] = "rsqrt2",
58                 [V3D_QPU_WADDR_TMUC] = "tmuc",
59                 [V3D_QPU_WADDR_TMUS] = "tmus",
60                 [V3D_QPU_WADDR_TMUT] = "tmut",
61                 [V3D_QPU_WADDR_TMUR] = "tmur",
62                 [V3D_QPU_WADDR_TMUI] = "tmui",
63                 [V3D_QPU_WADDR_TMUB] = "tmub",
64                 [V3D_QPU_WADDR_TMUDREF] = "tmudref",
65                 [V3D_QPU_WADDR_TMUOFF] = "tmuoff",
66                 [V3D_QPU_WADDR_TMUSCM] = "tmuscm",
67                 [V3D_QPU_WADDR_TMUSF] = "tmusf",
68                 [V3D_QPU_WADDR_TMUSLOD] = "tmuslod",
69                 [V3D_QPU_WADDR_TMUHS] = "tmuhs",
70                 [V3D_QPU_WADDR_TMUHSCM] = "tmuscm",
71                 [V3D_QPU_WADDR_TMUHSF] = "tmuhsf",
72                 [V3D_QPU_WADDR_TMUHSLOD] = "tmuhslod",
73                 [V3D_QPU_WADDR_R5REP] = "r5rep",
74         };
75 
76         return waddr_magic[waddr];
77 }
78 
79 const char *
v3d_qpu_add_op_name(enum v3d_qpu_add_op op)80 v3d_qpu_add_op_name(enum v3d_qpu_add_op op)
81 {
82         static const char *op_names[] = {
83                 [V3D_QPU_A_FADD] = "fadd",
84                 [V3D_QPU_A_FADDNF] = "faddnf",
85                 [V3D_QPU_A_VFPACK] = "vfpack",
86                 [V3D_QPU_A_ADD] = "add",
87                 [V3D_QPU_A_SUB] = "sub",
88                 [V3D_QPU_A_FSUB] = "fsub",
89                 [V3D_QPU_A_MIN] = "min",
90                 [V3D_QPU_A_MAX] = "max",
91                 [V3D_QPU_A_UMIN] = "umin",
92                 [V3D_QPU_A_UMAX] = "umax",
93                 [V3D_QPU_A_SHL] = "shl",
94                 [V3D_QPU_A_SHR] = "shr",
95                 [V3D_QPU_A_ASR] = "asr",
96                 [V3D_QPU_A_ROR] = "ror",
97                 [V3D_QPU_A_FMIN] = "fmin",
98                 [V3D_QPU_A_FMAX] = "fmax",
99                 [V3D_QPU_A_VFMIN] = "vfmin",
100                 [V3D_QPU_A_AND] = "and",
101                 [V3D_QPU_A_OR] = "or",
102                 [V3D_QPU_A_XOR] = "xor",
103                 [V3D_QPU_A_VADD] = "vadd",
104                 [V3D_QPU_A_VSUB] = "vsub",
105                 [V3D_QPU_A_NOT] = "not",
106                 [V3D_QPU_A_NEG] = "neg",
107                 [V3D_QPU_A_FLAPUSH] = "flapush",
108                 [V3D_QPU_A_FLBPUSH] = "flbpush",
109                 [V3D_QPU_A_FLPOP] = "flpop",
110                 [V3D_QPU_A_RECIP] = "recip",
111                 [V3D_QPU_A_SETMSF] = "setmsf",
112                 [V3D_QPU_A_SETREVF] = "setrevf",
113                 [V3D_QPU_A_NOP] = "nop",
114                 [V3D_QPU_A_TIDX] = "tidx",
115                 [V3D_QPU_A_EIDX] = "eidx",
116                 [V3D_QPU_A_LR] = "lr",
117                 [V3D_QPU_A_VFLA] = "vfla",
118                 [V3D_QPU_A_VFLNA] = "vflna",
119                 [V3D_QPU_A_VFLB] = "vflb",
120                 [V3D_QPU_A_VFLNB] = "vflnb",
121                 [V3D_QPU_A_FXCD] = "fxcd",
122                 [V3D_QPU_A_XCD] = "xcd",
123                 [V3D_QPU_A_FYCD] = "fycd",
124                 [V3D_QPU_A_YCD] = "ycd",
125                 [V3D_QPU_A_MSF] = "msf",
126                 [V3D_QPU_A_REVF] = "revf",
127                 [V3D_QPU_A_VDWWT] = "vdwwt",
128                 [V3D_QPU_A_IID] = "iid",
129                 [V3D_QPU_A_SAMPID] = "sampid",
130                 [V3D_QPU_A_BARRIERID] = "barrierid",
131                 [V3D_QPU_A_TMUWT] = "tmuwt",
132                 [V3D_QPU_A_VPMSETUP] = "vpmsetup",
133                 [V3D_QPU_A_VPMWT] = "vpmwt",
134                 [V3D_QPU_A_LDVPMV_IN] = "ldvpmv_in",
135                 [V3D_QPU_A_LDVPMV_OUT] = "ldvpmv_out",
136                 [V3D_QPU_A_LDVPMD_IN] = "ldvpmd_in",
137                 [V3D_QPU_A_LDVPMD_OUT] = "ldvpmd_out",
138                 [V3D_QPU_A_LDVPMP] = "ldvpmp",
139                 [V3D_QPU_A_RSQRT] = "rsqrt",
140                 [V3D_QPU_A_EXP] = "exp",
141                 [V3D_QPU_A_LOG] = "log",
142                 [V3D_QPU_A_SIN] = "sin",
143                 [V3D_QPU_A_RSQRT2] = "rsqrt2",
144                 [V3D_QPU_A_LDVPMG_IN] = "ldvpmg_in",
145                 [V3D_QPU_A_LDVPMG_OUT] = "ldvpmg_out",
146                 [V3D_QPU_A_FCMP] = "fcmp",
147                 [V3D_QPU_A_VFMAX] = "vfmax",
148                 [V3D_QPU_A_FROUND] = "fround",
149                 [V3D_QPU_A_FTOIN] = "ftoin",
150                 [V3D_QPU_A_FTRUNC] = "ftrunc",
151                 [V3D_QPU_A_FTOIZ] = "ftoiz",
152                 [V3D_QPU_A_FFLOOR] = "ffloor",
153                 [V3D_QPU_A_FTOUZ] = "ftouz",
154                 [V3D_QPU_A_FCEIL] = "fceil",
155                 [V3D_QPU_A_FTOC] = "ftoc",
156                 [V3D_QPU_A_FDX] = "fdx",
157                 [V3D_QPU_A_FDY] = "fdy",
158                 [V3D_QPU_A_STVPMV] = "stvpmv",
159                 [V3D_QPU_A_STVPMD] = "stvpmd",
160                 [V3D_QPU_A_STVPMP] = "stvpmp",
161                 [V3D_QPU_A_ITOF] = "itof",
162                 [V3D_QPU_A_CLZ] = "clz",
163                 [V3D_QPU_A_UTOF] = "utof",
164         };
165 
166         if (op >= ARRAY_SIZE(op_names))
167                 return NULL;
168 
169         return op_names[op];
170 }
171 
172 const char *
v3d_qpu_mul_op_name(enum v3d_qpu_mul_op op)173 v3d_qpu_mul_op_name(enum v3d_qpu_mul_op op)
174 {
175         static const char *op_names[] = {
176                 [V3D_QPU_M_ADD] = "add",
177                 [V3D_QPU_M_SUB] = "sub",
178                 [V3D_QPU_M_UMUL24] = "umul24",
179                 [V3D_QPU_M_VFMUL] = "vfmul",
180                 [V3D_QPU_M_SMUL24] = "smul24",
181                 [V3D_QPU_M_MULTOP] = "multop",
182                 [V3D_QPU_M_FMOV] = "fmov",
183                 [V3D_QPU_M_MOV] = "mov",
184                 [V3D_QPU_M_NOP] = "nop",
185                 [V3D_QPU_M_FMUL] = "fmul",
186         };
187 
188         if (op >= ARRAY_SIZE(op_names))
189                 return NULL;
190 
191         return op_names[op];
192 }
193 
194 const char *
v3d_qpu_cond_name(enum v3d_qpu_cond cond)195 v3d_qpu_cond_name(enum v3d_qpu_cond cond)
196 {
197         switch (cond) {
198         case V3D_QPU_COND_NONE:
199                 return "";
200         case V3D_QPU_COND_IFA:
201                 return ".ifa";
202         case V3D_QPU_COND_IFB:
203                 return ".ifb";
204         case V3D_QPU_COND_IFNA:
205                 return ".ifna";
206         case V3D_QPU_COND_IFNB:
207                 return ".ifnb";
208         default:
209                 unreachable("bad cond value");
210         }
211 }
212 
213 const char *
v3d_qpu_branch_cond_name(enum v3d_qpu_branch_cond cond)214 v3d_qpu_branch_cond_name(enum v3d_qpu_branch_cond cond)
215 {
216         switch (cond) {
217         case V3D_QPU_BRANCH_COND_ALWAYS:
218                 return "";
219         case V3D_QPU_BRANCH_COND_A0:
220                 return ".a0";
221         case V3D_QPU_BRANCH_COND_NA0:
222                 return ".na0";
223         case V3D_QPU_BRANCH_COND_ALLA:
224                 return ".alla";
225         case V3D_QPU_BRANCH_COND_ANYNA:
226                 return ".anyna";
227         case V3D_QPU_BRANCH_COND_ANYA:
228                 return ".anya";
229         case V3D_QPU_BRANCH_COND_ALLNA:
230                 return ".allna";
231         default:
232                 unreachable("bad branch cond value");
233         }
234 }
235 
236 const char *
v3d_qpu_msfign_name(enum v3d_qpu_msfign msfign)237 v3d_qpu_msfign_name(enum v3d_qpu_msfign msfign)
238 {
239         switch (msfign) {
240         case V3D_QPU_MSFIGN_NONE:
241                 return "";
242         case V3D_QPU_MSFIGN_P:
243                 return "p";
244         case V3D_QPU_MSFIGN_Q:
245                 return "q";
246         default:
247                 unreachable("bad branch cond value");
248         }
249 }
250 
251 const char *
v3d_qpu_pf_name(enum v3d_qpu_pf pf)252 v3d_qpu_pf_name(enum v3d_qpu_pf pf)
253 {
254         switch (pf) {
255         case V3D_QPU_PF_NONE:
256                 return "";
257         case V3D_QPU_PF_PUSHZ:
258                 return ".pushz";
259         case V3D_QPU_PF_PUSHN:
260                 return ".pushn";
261         case V3D_QPU_PF_PUSHC:
262                 return ".pushc";
263         default:
264                 unreachable("bad pf value");
265         }
266 }
267 
268 const char *
v3d_qpu_uf_name(enum v3d_qpu_uf uf)269 v3d_qpu_uf_name(enum v3d_qpu_uf uf)
270 {
271         switch (uf) {
272         case V3D_QPU_UF_NONE:
273                 return "";
274         case V3D_QPU_UF_ANDZ:
275                 return ".andz";
276         case V3D_QPU_UF_ANDNZ:
277                 return ".andnz";
278         case V3D_QPU_UF_NORZ:
279                 return ".norz";
280         case V3D_QPU_UF_NORNZ:
281                 return ".nornz";
282         case V3D_QPU_UF_ANDN:
283                 return ".andn";
284         case V3D_QPU_UF_ANDNN:
285                 return ".andnn";
286         case V3D_QPU_UF_NORN:
287                 return ".norn";
288         case V3D_QPU_UF_NORNN:
289                 return ".nornn";
290         case V3D_QPU_UF_ANDC:
291                 return ".andc";
292         case V3D_QPU_UF_ANDNC:
293                 return ".andnc";
294         case V3D_QPU_UF_NORC:
295                 return ".norc";
296         case V3D_QPU_UF_NORNC:
297                 return ".nornc";
298         default:
299                 unreachable("bad pf value");
300         }
301 }
302 
303 const char *
v3d_qpu_pack_name(enum v3d_qpu_output_pack pack)304 v3d_qpu_pack_name(enum v3d_qpu_output_pack pack)
305 {
306         switch (pack) {
307         case V3D_QPU_PACK_NONE:
308                 return "";
309         case V3D_QPU_PACK_L:
310                 return ".l";
311         case V3D_QPU_PACK_H:
312                 return ".h";
313         default:
314                 unreachable("bad pack value");
315         }
316 }
317 
318 const char *
v3d_qpu_unpack_name(enum v3d_qpu_input_unpack unpack)319 v3d_qpu_unpack_name(enum v3d_qpu_input_unpack unpack)
320 {
321         switch (unpack) {
322         case V3D_QPU_UNPACK_NONE:
323                 return "";
324         case V3D_QPU_UNPACK_L:
325                 return ".l";
326         case V3D_QPU_UNPACK_H:
327                 return ".h";
328         case V3D_QPU_UNPACK_ABS:
329                 return ".abs";
330         case V3D_QPU_UNPACK_REPLICATE_32F_16:
331                 return ".ff";
332         case V3D_QPU_UNPACK_REPLICATE_L_16:
333                 return ".ll";
334         case V3D_QPU_UNPACK_REPLICATE_H_16:
335                 return ".hh";
336         case V3D_QPU_UNPACK_SWAP_16:
337                 return ".swp";
338         default:
339                 unreachable("bad unpack value");
340         }
341 }
342 
343 #define D	1
344 #define A	2
345 #define B	4
346 static const uint8_t add_op_args[] = {
347         [V3D_QPU_A_FADD] = D | A | B,
348         [V3D_QPU_A_FADDNF] = D | A | B,
349         [V3D_QPU_A_VFPACK] = D | A | B,
350         [V3D_QPU_A_ADD] = D | A | B,
351         [V3D_QPU_A_VFPACK] = D | A | B,
352         [V3D_QPU_A_SUB] = D | A | B,
353         [V3D_QPU_A_VFPACK] = D | A | B,
354         [V3D_QPU_A_FSUB] = D | A | B,
355         [V3D_QPU_A_MIN] = D | A | B,
356         [V3D_QPU_A_MAX] = D | A | B,
357         [V3D_QPU_A_UMIN] = D | A | B,
358         [V3D_QPU_A_UMAX] = D | A | B,
359         [V3D_QPU_A_SHL] = D | A | B,
360         [V3D_QPU_A_SHR] = D | A | B,
361         [V3D_QPU_A_ASR] = D | A | B,
362         [V3D_QPU_A_ROR] = D | A | B,
363         [V3D_QPU_A_FMIN] = D | A | B,
364         [V3D_QPU_A_FMAX] = D | A | B,
365         [V3D_QPU_A_VFMIN] = D | A | B,
366 
367         [V3D_QPU_A_AND] = D | A | B,
368         [V3D_QPU_A_OR] = D | A | B,
369         [V3D_QPU_A_XOR] = D | A | B,
370 
371         [V3D_QPU_A_VADD] = D | A | B,
372         [V3D_QPU_A_VSUB] = D | A | B,
373         [V3D_QPU_A_NOT] = D | A,
374         [V3D_QPU_A_NEG] = D | A,
375         [V3D_QPU_A_FLAPUSH] = D | A,
376         [V3D_QPU_A_FLBPUSH] = D | A,
377         [V3D_QPU_A_FLPOP] = D | A,
378         [V3D_QPU_A_RECIP] = D | A,
379         [V3D_QPU_A_SETMSF] = D | A,
380         [V3D_QPU_A_SETREVF] = D | A,
381         [V3D_QPU_A_NOP] = 0,
382         [V3D_QPU_A_TIDX] = D,
383         [V3D_QPU_A_EIDX] = D,
384         [V3D_QPU_A_LR] = D,
385         [V3D_QPU_A_VFLA] = D,
386         [V3D_QPU_A_VFLNA] = D,
387         [V3D_QPU_A_VFLB] = D,
388         [V3D_QPU_A_VFLNB] = D,
389 
390         [V3D_QPU_A_FXCD] = D,
391         [V3D_QPU_A_XCD] = D,
392         [V3D_QPU_A_FYCD] = D,
393         [V3D_QPU_A_YCD] = D,
394 
395         [V3D_QPU_A_MSF] = D,
396         [V3D_QPU_A_REVF] = D,
397         [V3D_QPU_A_VDWWT] = D,
398         [V3D_QPU_A_IID] = D,
399         [V3D_QPU_A_SAMPID] = D,
400         [V3D_QPU_A_BARRIERID] = D,
401         [V3D_QPU_A_TMUWT] = D,
402         [V3D_QPU_A_VPMWT] = D,
403 
404         [V3D_QPU_A_VPMSETUP] = D | A,
405 
406         [V3D_QPU_A_LDVPMV_IN] = D | A,
407         [V3D_QPU_A_LDVPMV_OUT] = D | A,
408         [V3D_QPU_A_LDVPMD_IN] = D | A,
409         [V3D_QPU_A_LDVPMD_OUT] = D | A,
410         [V3D_QPU_A_LDVPMP] = D | A,
411         [V3D_QPU_A_RSQRT] = D | A,
412         [V3D_QPU_A_EXP] = D | A,
413         [V3D_QPU_A_LOG] = D | A,
414         [V3D_QPU_A_SIN] = D | A,
415         [V3D_QPU_A_RSQRT2] = D | A,
416         [V3D_QPU_A_LDVPMG_IN] = D | A | B,
417         [V3D_QPU_A_LDVPMG_OUT] = D | A | B,
418 
419         /* FIXME: MOVABSNEG */
420 
421         [V3D_QPU_A_FCMP] = D | A | B,
422         [V3D_QPU_A_VFMAX] = D | A | B,
423 
424         [V3D_QPU_A_FROUND] = D | A,
425         [V3D_QPU_A_FTOIN] = D | A,
426         [V3D_QPU_A_FTRUNC] = D | A,
427         [V3D_QPU_A_FTOIZ] = D | A,
428         [V3D_QPU_A_FFLOOR] = D | A,
429         [V3D_QPU_A_FTOUZ] = D | A,
430         [V3D_QPU_A_FCEIL] = D | A,
431         [V3D_QPU_A_FTOC] = D | A,
432 
433         [V3D_QPU_A_FDX] = D | A,
434         [V3D_QPU_A_FDY] = D | A,
435 
436         [V3D_QPU_A_STVPMV] = A | B,
437         [V3D_QPU_A_STVPMD] = A | B,
438         [V3D_QPU_A_STVPMP] = A | B,
439 
440         [V3D_QPU_A_ITOF] = D | A,
441         [V3D_QPU_A_CLZ] = D | A,
442         [V3D_QPU_A_UTOF] = D | A,
443 };
444 
445 static const uint8_t mul_op_args[] = {
446         [V3D_QPU_M_ADD] = D | A | B,
447         [V3D_QPU_M_SUB] = D | A | B,
448         [V3D_QPU_M_UMUL24] = D | A | B,
449         [V3D_QPU_M_VFMUL] = D | A | B,
450         [V3D_QPU_M_SMUL24] = D | A | B,
451         [V3D_QPU_M_MULTOP] = D | A | B,
452         [V3D_QPU_M_FMOV] = D | A,
453         [V3D_QPU_M_NOP] = 0,
454         [V3D_QPU_M_MOV] = D | A,
455         [V3D_QPU_M_FMUL] = D | A | B,
456 };
457 
458 bool
v3d_qpu_add_op_has_dst(enum v3d_qpu_add_op op)459 v3d_qpu_add_op_has_dst(enum v3d_qpu_add_op op)
460 {
461         assert(op < ARRAY_SIZE(add_op_args));
462 
463         return add_op_args[op] & D;
464 }
465 
466 bool
v3d_qpu_mul_op_has_dst(enum v3d_qpu_mul_op op)467 v3d_qpu_mul_op_has_dst(enum v3d_qpu_mul_op op)
468 {
469         assert(op < ARRAY_SIZE(mul_op_args));
470 
471         return mul_op_args[op] & D;
472 }
473 
474 int
v3d_qpu_add_op_num_src(enum v3d_qpu_add_op op)475 v3d_qpu_add_op_num_src(enum v3d_qpu_add_op op)
476 {
477         assert(op < ARRAY_SIZE(add_op_args));
478 
479         uint8_t args = add_op_args[op];
480         if (args & B)
481                 return 2;
482         else if (args & A)
483                 return 1;
484         else
485                 return 0;
486 }
487 
488 int
v3d_qpu_mul_op_num_src(enum v3d_qpu_mul_op op)489 v3d_qpu_mul_op_num_src(enum v3d_qpu_mul_op op)
490 {
491         assert(op < ARRAY_SIZE(mul_op_args));
492 
493         uint8_t args = mul_op_args[op];
494         if (args & B)
495                 return 2;
496         else if (args & A)
497                 return 1;
498         else
499                 return 0;
500 }
501 
502 enum v3d_qpu_cond
v3d_qpu_cond_invert(enum v3d_qpu_cond cond)503 v3d_qpu_cond_invert(enum v3d_qpu_cond cond)
504 {
505         switch (cond) {
506         case V3D_QPU_COND_IFA:
507                 return V3D_QPU_COND_IFNA;
508         case V3D_QPU_COND_IFNA:
509                 return V3D_QPU_COND_IFA;
510         case V3D_QPU_COND_IFB:
511                 return V3D_QPU_COND_IFNB;
512         case V3D_QPU_COND_IFNB:
513                 return V3D_QPU_COND_IFB;
514         default:
515                 unreachable("Non-invertible cond");
516         }
517 }
518 
519 bool
v3d_qpu_magic_waddr_is_sfu(enum v3d_qpu_waddr waddr)520 v3d_qpu_magic_waddr_is_sfu(enum v3d_qpu_waddr waddr)
521 {
522         switch (waddr) {
523         case V3D_QPU_WADDR_RECIP:
524         case V3D_QPU_WADDR_RSQRT:
525         case V3D_QPU_WADDR_EXP:
526         case V3D_QPU_WADDR_LOG:
527         case V3D_QPU_WADDR_SIN:
528         case V3D_QPU_WADDR_RSQRT2:
529                 return true;
530         default:
531                 return false;
532         }
533 }
534 
535 bool
v3d_qpu_magic_waddr_is_tmu(enum v3d_qpu_waddr waddr)536 v3d_qpu_magic_waddr_is_tmu(enum v3d_qpu_waddr waddr)
537 {
538         /* XXX: WADDR_TMU changed to UNIFA on 4.x */
539         return ((waddr >= V3D_QPU_WADDR_TMU &&
540                  waddr <= V3D_QPU_WADDR_TMUAU) ||
541                 (waddr >= V3D_QPU_WADDR_TMUC &&
542                  waddr <= V3D_QPU_WADDR_TMUHSLOD));
543 }
544 
545 bool
v3d_qpu_waits_on_tmu(const struct v3d_qpu_instr * inst)546 v3d_qpu_waits_on_tmu(const struct v3d_qpu_instr *inst)
547 {
548         return (inst->sig.ldtmu ||
549                 (inst->type == V3D_QPU_INSTR_TYPE_ALU &&
550                  inst->alu.add.op == V3D_QPU_A_TMUWT));
551 }
552 
553 bool
v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr)554 v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr)
555 {
556         return (waddr == V3D_QPU_WADDR_TLB ||
557                 waddr == V3D_QPU_WADDR_TLBU);
558 }
559 
560 bool
v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr)561 v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr)
562 {
563         return (waddr == V3D_QPU_WADDR_VPM ||
564                 waddr == V3D_QPU_WADDR_VPMU);
565 }
566 
567 bool
v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr)568 v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr)
569 {
570         return (waddr == V3D_QPU_WADDR_SYNC ||
571                 waddr == V3D_QPU_WADDR_SYNCB ||
572                 waddr == V3D_QPU_WADDR_SYNCU);
573 }
574 
575 bool
v3d_qpu_magic_waddr_loads_unif(enum v3d_qpu_waddr waddr)576 v3d_qpu_magic_waddr_loads_unif(enum v3d_qpu_waddr waddr)
577 {
578         switch (waddr) {
579         case V3D_QPU_WADDR_VPMU:
580         case V3D_QPU_WADDR_TLBU:
581         case V3D_QPU_WADDR_TMUAU:
582         case V3D_QPU_WADDR_SYNCU:
583                 return true;
584         default:
585                 return false;
586         }
587 }
588 
589 static bool
v3d_qpu_add_op_reads_vpm(enum v3d_qpu_add_op op)590 v3d_qpu_add_op_reads_vpm(enum  v3d_qpu_add_op op)
591 {
592         switch (op) {
593         case V3D_QPU_A_VPMSETUP:
594         case V3D_QPU_A_LDVPMV_IN:
595         case V3D_QPU_A_LDVPMV_OUT:
596         case V3D_QPU_A_LDVPMD_IN:
597         case V3D_QPU_A_LDVPMD_OUT:
598         case V3D_QPU_A_LDVPMP:
599         case V3D_QPU_A_LDVPMG_IN:
600         case V3D_QPU_A_LDVPMG_OUT:
601                 return true;
602         default:
603                 return false;
604         }
605 }
606 
607 static bool
v3d_qpu_add_op_writes_vpm(enum v3d_qpu_add_op op)608 v3d_qpu_add_op_writes_vpm(enum  v3d_qpu_add_op op)
609 {
610         switch (op) {
611         case V3D_QPU_A_VPMSETUP:
612         case V3D_QPU_A_STVPMV:
613         case V3D_QPU_A_STVPMD:
614         case V3D_QPU_A_STVPMP:
615                 return true;
616         default:
617                 return false;
618         }
619 }
620 
621 bool
v3d_qpu_uses_tlb(const struct v3d_qpu_instr * inst)622 v3d_qpu_uses_tlb(const struct v3d_qpu_instr *inst)
623 {
624         if (inst->sig.ldtlb ||
625             inst->sig.ldtlbu)
626                 return true;
627 
628         if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
629                 if (inst->alu.add.magic_write &&
630                     v3d_qpu_magic_waddr_is_tlb(inst->alu.add.waddr)) {
631                         return true;
632                 }
633 
634                 if (inst->alu.mul.magic_write &&
635                     v3d_qpu_magic_waddr_is_tlb(inst->alu.mul.waddr)) {
636                         return true;
637                 }
638         }
639 
640         return false;
641 }
642 
643 bool
v3d_qpu_uses_sfu(const struct v3d_qpu_instr * inst)644 v3d_qpu_uses_sfu(const struct v3d_qpu_instr *inst)
645 {
646         if (v3d_qpu_instr_is_sfu(inst))
647                 return true;
648 
649         if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
650                 if (inst->alu.add.magic_write &&
651                     v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr)) {
652                         return true;
653                 }
654 
655                 if (inst->alu.mul.magic_write &&
656                     v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr)) {
657                         return true;
658                 }
659         }
660 
661         return false;
662 }
663 
664 bool
v3d_qpu_instr_is_sfu(const struct v3d_qpu_instr * inst)665 v3d_qpu_instr_is_sfu(const struct v3d_qpu_instr *inst)
666 {
667         if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
668                 switch (inst->alu.add.op) {
669                 case V3D_QPU_A_RECIP:
670                 case V3D_QPU_A_RSQRT:
671                 case V3D_QPU_A_EXP:
672                 case V3D_QPU_A_LOG:
673                 case V3D_QPU_A_SIN:
674                 case V3D_QPU_A_RSQRT2:
675                         return true;
676                 default:
677                         return false;
678                 }
679         }
680         return false;
681 }
682 
683 bool
v3d_qpu_writes_tmu(const struct v3d_qpu_instr * inst)684 v3d_qpu_writes_tmu(const struct v3d_qpu_instr *inst)
685 {
686         return (inst->type == V3D_QPU_INSTR_TYPE_ALU &&
687                 ((inst->alu.add.magic_write &&
688                   v3d_qpu_magic_waddr_is_tmu(inst->alu.add.waddr)) ||
689                  (inst->alu.mul.magic_write &&
690                   v3d_qpu_magic_waddr_is_tmu(inst->alu.mul.waddr))));
691 }
692 
693 bool
v3d_qpu_writes_tmu_not_tmuc(const struct v3d_qpu_instr * inst)694 v3d_qpu_writes_tmu_not_tmuc(const struct v3d_qpu_instr *inst)
695 {
696         return v3d_qpu_writes_tmu(inst) &&
697                (!inst->alu.add.magic_write ||
698                 inst->alu.add.waddr != V3D_QPU_WADDR_TMUC) &&
699                (!inst->alu.mul.magic_write ||
700                 inst->alu.mul.waddr != V3D_QPU_WADDR_TMUC);
701 }
702 
703 bool
v3d_qpu_reads_vpm(const struct v3d_qpu_instr * inst)704 v3d_qpu_reads_vpm(const struct v3d_qpu_instr *inst)
705 {
706         if (inst->sig.ldvpm)
707                 return true;
708 
709         if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
710                 if (v3d_qpu_add_op_reads_vpm(inst->alu.add.op))
711                         return true;
712         }
713 
714         return false;
715 }
716 
717 bool
v3d_qpu_writes_vpm(const struct v3d_qpu_instr * inst)718 v3d_qpu_writes_vpm(const struct v3d_qpu_instr *inst)
719 {
720         if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
721                 if (v3d_qpu_add_op_writes_vpm(inst->alu.add.op))
722                         return true;
723 
724                 if (inst->alu.add.magic_write &&
725                     v3d_qpu_magic_waddr_is_vpm(inst->alu.add.waddr)) {
726                         return true;
727                 }
728 
729                 if (inst->alu.mul.magic_write &&
730                     v3d_qpu_magic_waddr_is_vpm(inst->alu.mul.waddr)) {
731                         return true;
732                 }
733         }
734 
735         return false;
736 }
737 
738 static bool
v3d_qpu_waits_vpm(const struct v3d_qpu_instr * inst)739 v3d_qpu_waits_vpm(const struct v3d_qpu_instr *inst)
740 {
741         return inst->type == V3D_QPU_INSTR_TYPE_ALU &&
742                inst->alu.add.op == V3D_QPU_A_VPMWT;
743 }
744 
745 bool
v3d_qpu_reads_or_writes_vpm(const struct v3d_qpu_instr * inst)746 v3d_qpu_reads_or_writes_vpm(const struct v3d_qpu_instr *inst)
747 {
748         return v3d_qpu_reads_vpm(inst) || v3d_qpu_writes_vpm(inst);
749 }
750 
751 bool
v3d_qpu_uses_vpm(const struct v3d_qpu_instr * inst)752 v3d_qpu_uses_vpm(const struct v3d_qpu_instr *inst)
753 {
754         return v3d_qpu_reads_vpm(inst) ||
755                v3d_qpu_writes_vpm(inst) ||
756                v3d_qpu_waits_vpm(inst);
757 }
758 
759 bool
v3d_qpu_writes_r3(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst)760 v3d_qpu_writes_r3(const struct v3d_device_info *devinfo,
761                   const struct v3d_qpu_instr *inst)
762 {
763         if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
764                 if (inst->alu.add.magic_write &&
765                     inst->alu.add.waddr == V3D_QPU_WADDR_R3) {
766                         return true;
767                 }
768 
769                 if (inst->alu.mul.magic_write &&
770                     inst->alu.mul.waddr == V3D_QPU_WADDR_R3) {
771                         return true;
772                 }
773         }
774 
775         if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&
776             inst->sig_magic && inst->sig_addr == V3D_QPU_WADDR_R3) {
777                 return true;
778         }
779 
780         return inst->sig.ldvary || inst->sig.ldvpm;
781 }
782 
783 bool
v3d_qpu_writes_r4(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst)784 v3d_qpu_writes_r4(const struct v3d_device_info *devinfo,
785                   const struct v3d_qpu_instr *inst)
786 {
787         if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
788                 if (inst->alu.add.magic_write &&
789                     (inst->alu.add.waddr == V3D_QPU_WADDR_R4 ||
790                      v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr))) {
791                         return true;
792                 }
793 
794                 if (inst->alu.mul.magic_write &&
795                     (inst->alu.mul.waddr == V3D_QPU_WADDR_R4 ||
796                      v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr))) {
797                         return true;
798                 }
799         }
800 
801         if (v3d_qpu_sig_writes_address(devinfo, &inst->sig)) {
802                 if (inst->sig_magic && inst->sig_addr == V3D_QPU_WADDR_R4)
803                         return true;
804         } else if (inst->sig.ldtmu) {
805                 return true;
806         }
807 
808         return false;
809 }
810 
811 bool
v3d_qpu_writes_r5(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst)812 v3d_qpu_writes_r5(const struct v3d_device_info *devinfo,
813                   const struct v3d_qpu_instr *inst)
814 {
815         if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
816                 if (inst->alu.add.magic_write &&
817                     inst->alu.add.waddr == V3D_QPU_WADDR_R5) {
818                         return true;
819                 }
820 
821                 if (inst->alu.mul.magic_write &&
822                     inst->alu.mul.waddr == V3D_QPU_WADDR_R5) {
823                         return true;
824                 }
825         }
826 
827         if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&
828             inst->sig_magic && inst->sig_addr == V3D_QPU_WADDR_R5) {
829                 return true;
830         }
831 
832         return inst->sig.ldvary || inst->sig.ldunif || inst->sig.ldunifa;
833 }
834 
835 bool
v3d_qpu_uses_mux(const struct v3d_qpu_instr * inst,enum v3d_qpu_mux mux)836 v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux)
837 {
838         int add_nsrc = v3d_qpu_add_op_num_src(inst->alu.add.op);
839         int mul_nsrc = v3d_qpu_mul_op_num_src(inst->alu.mul.op);
840 
841         return ((add_nsrc > 0 && inst->alu.add.a == mux) ||
842                 (add_nsrc > 1 && inst->alu.add.b == mux) ||
843                 (mul_nsrc > 0 && inst->alu.mul.a == mux) ||
844                 (mul_nsrc > 1 && inst->alu.mul.b == mux));
845 }
846 
847 bool
v3d_qpu_sig_writes_address(const struct v3d_device_info * devinfo,const struct v3d_qpu_sig * sig)848 v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo,
849                            const struct v3d_qpu_sig *sig)
850 {
851         if (devinfo->ver < 41)
852                 return false;
853 
854         return (sig->ldunifrf ||
855                 sig->ldunifarf ||
856                 sig->ldvary ||
857                 sig->ldtmu ||
858                 sig->ldtlb ||
859                 sig->ldtlbu);
860 }
861 
862 bool
v3d_qpu_reads_flags(const struct v3d_qpu_instr * inst)863 v3d_qpu_reads_flags(const struct v3d_qpu_instr *inst)
864 {
865         if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH) {
866                 return inst->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS;
867         } else if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
868                 if (inst->flags.ac != V3D_QPU_COND_NONE ||
869                     inst->flags.mc != V3D_QPU_COND_NONE ||
870                     inst->flags.auf != V3D_QPU_UF_NONE ||
871                     inst->flags.muf != V3D_QPU_UF_NONE)
872                         return true;
873 
874                 switch (inst->alu.add.op) {
875                 case V3D_QPU_A_VFLA:
876                 case V3D_QPU_A_VFLNA:
877                 case V3D_QPU_A_VFLB:
878                 case V3D_QPU_A_VFLNB:
879                 case V3D_QPU_A_FLAPUSH:
880                 case V3D_QPU_A_FLBPUSH:
881                         return true;
882                 default:
883                         break;
884                 }
885         }
886 
887         return false;
888 }
889 
890 bool
v3d_qpu_writes_flags(const struct v3d_qpu_instr * inst)891 v3d_qpu_writes_flags(const struct v3d_qpu_instr *inst)
892 {
893         if (inst->flags.apf != V3D_QPU_PF_NONE ||
894             inst->flags.mpf != V3D_QPU_PF_NONE ||
895             inst->flags.auf != V3D_QPU_UF_NONE ||
896             inst->flags.muf != V3D_QPU_UF_NONE) {
897                 return true;
898         }
899 
900         return false;
901 }
902 
903 bool
v3d_qpu_unpacks_f32(const struct v3d_qpu_instr * inst)904 v3d_qpu_unpacks_f32(const struct v3d_qpu_instr *inst)
905 {
906         if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
907                 return false;
908 
909         switch (inst->alu.add.op) {
910         case V3D_QPU_A_FADD:
911         case V3D_QPU_A_FADDNF:
912         case V3D_QPU_A_FSUB:
913         case V3D_QPU_A_FMIN:
914         case V3D_QPU_A_FMAX:
915         case V3D_QPU_A_FCMP:
916         case V3D_QPU_A_FROUND:
917         case V3D_QPU_A_FTRUNC:
918         case V3D_QPU_A_FFLOOR:
919         case V3D_QPU_A_FCEIL:
920         case V3D_QPU_A_FDX:
921         case V3D_QPU_A_FDY:
922         case V3D_QPU_A_FTOIN:
923         case V3D_QPU_A_FTOIZ:
924         case V3D_QPU_A_FTOUZ:
925         case V3D_QPU_A_FTOC:
926         case V3D_QPU_A_VFPACK:
927                 return true;
928                 break;
929         default:
930                 break;
931         }
932 
933         switch (inst->alu.mul.op) {
934         case V3D_QPU_M_FMOV:
935         case V3D_QPU_M_FMUL:
936                 return true;
937                 break;
938         default:
939                 break;
940         }
941 
942         return false;
943 }
944 bool
v3d_qpu_unpacks_f16(const struct v3d_qpu_instr * inst)945 v3d_qpu_unpacks_f16(const struct v3d_qpu_instr *inst)
946 {
947         if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
948                 return false;
949 
950         switch (inst->alu.add.op) {
951         case V3D_QPU_A_VFMIN:
952         case V3D_QPU_A_VFMAX:
953                 return true;
954                 break;
955         default:
956                 break;
957         }
958 
959         switch (inst->alu.mul.op) {
960         case V3D_QPU_M_VFMUL:
961                 return true;
962                 break;
963         default:
964                 break;
965         }
966 
967         return false;
968 }
969